diff options
author | David Rowley <drowley@postgresql.org> | 2021-08-03 12:25:52 +1200 |
---|---|---|
committer | David Rowley <drowley@postgresql.org> | 2021-08-03 12:25:52 +1200 |
commit | db632fbca392389807ffb9d9b2207157e8e9b3e8 (patch) | |
tree | 620f60d919288f919f8af66abcfb2598591d2678 /src/backend/partitioning/partbounds.c | |
parent | 475dbd0b718de8ac44da144f934651b959e3b705 (diff) | |
download | postgresql-db632fbca392389807ffb9d9b2207157e8e9b3e8.tar.gz postgresql-db632fbca392389807ffb9d9b2207157e8e9b3e8.zip |
Allow ordered partition scans in more cases
959d00e9d added the ability to make use of an Append node instead of a
MergeAppend when we wanted to perform a scan of a partitioned table and
the required sort order was the same as the partitioned keys and the
partitioned table was defined in such a way that earlier partitions were
guaranteed to only contain lower-order values than later partitions.
However, previously we didn't allow these ordered partition scans for
LIST partitioned table when there were any partitions that allowed
multiple Datums. This was a very cheap check to make and we could likely
have done a little better by checking if there were interleaved
partitions, but at the time we didn't have visibility about which
partitions were pruned, so we still may have disallowed cases where all
interleaved partitions were pruned.
Since 475dbd0b7, we now have knowledge of pruned partitions, we can do a
much better job inside partitions_are_ordered().
Here we pass which partitions survived partition pruning into
partitions_are_ordered() and, for LIST partitioning, have it check to see
if any live partitions exist that are also in the new "interleaved_parts"
field defined in PartitionBoundInfo.
For RANGE partitioning we can relax the code which caused the partitions
to be unordered if a DEFAULT partition existed. Since we now know which
partitions were pruned, partitions_are_ordered() now returns true when the
DEFAULT partition was pruned.
Reviewed-by: Amit Langote, Zhihong Yu
Discussion: https://postgr.es/m/CAApHDvrdoN_sXU52i=QDXe2k3WAo=EVry29r2+Tq2WYcn2xhEA@mail.gmail.com
Diffstat (limited to 'src/backend/partitioning/partbounds.c')
-rw-r--r-- | src/backend/partitioning/partbounds.c | 105 |
1 files changed, 81 insertions, 24 deletions
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index 25018b1a8b7..fdfe712f917 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -395,6 +395,7 @@ create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts, boundinfo->ndatums = nparts; boundinfo->datums = (Datum **) palloc0(nparts * sizeof(Datum *)); boundinfo->kind = NULL; + boundinfo->interleaved_parts = NULL; boundinfo->nindexes = greatest_modulus; boundinfo->indexes = (int *) palloc(greatest_modulus * sizeof(int)); for (i = 0; i < greatest_modulus; i++) @@ -543,6 +544,7 @@ create_list_bounds(PartitionBoundSpec **boundspecs, int nparts, boundinfo->ndatums = ndatums; boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *)); boundinfo->kind = NULL; + boundinfo->interleaved_parts = NULL; boundinfo->nindexes = ndatums; boundinfo->indexes = (int *) palloc(ndatums * sizeof(int)); @@ -607,6 +609,69 @@ create_list_bounds(PartitionBoundSpec **boundspecs, int nparts, boundinfo->default_index = (*mapping)[default_index]; } + /* + * Calculate interleaved partitions. Here we look for partitions which + * might be interleaved with other partitions and set a bit in + * interleaved_parts for any partitions which may be interleaved with + * another partition. + */ + + /* + * There must be multiple partitions to have any interleaved partitions, + * otherwise there's nothing to interleave with. + */ + if (nparts > 1) + { + /* + * Short-circuit check to see if only 1 Datum is allowed per + * partition. When this is true there's no need to do the more + * expensive checks to look for interleaved values. + */ + if (boundinfo->ndatums + + partition_bound_accepts_nulls(boundinfo) + + partition_bound_has_default(boundinfo) != nparts) + { + int last_index = -1; + + /* + * Since the indexes array is sorted in Datum order, if any + * partitions are interleaved then it will show up by the + * partition indexes not being in ascending order. Here we check + * for that and record all partitions that are out of order. + */ + for (i = 0; i < boundinfo->nindexes; i++) + { + int index = boundinfo->indexes[i]; + + if (index < last_index) + boundinfo->interleaved_parts = bms_add_member(boundinfo->interleaved_parts, + index); + + /* + * Mark the NULL partition as interleaved if we find that it + * allows some other non-NULL Datum. + */ + if (partition_bound_accepts_nulls(boundinfo) && + index == boundinfo->null_index) + boundinfo->interleaved_parts = bms_add_member(boundinfo->interleaved_parts, + boundinfo->null_index); + + last_index = index; + } + } + + /* + * The DEFAULT partition is the "catch-all" partition that can contain + * anything that does not belong to any other partition. If there are + * any other partitions then the DEFAULT partition must be marked as + * interleaved. + */ + if (partition_bound_has_default(boundinfo)) + boundinfo->interleaved_parts = bms_add_member(boundinfo->interleaved_parts, + boundinfo->default_index); + } + + /* All partitions must now have been assigned canonical indexes. */ Assert(next_index == nparts); return boundinfo; @@ -750,6 +815,7 @@ create_range_bounds(PartitionBoundSpec **boundspecs, int nparts, boundinfo->kind = (PartitionRangeDatumKind **) palloc(ndatums * sizeof(PartitionRangeDatumKind *)); + boundinfo->interleaved_parts = NULL; /* * For range partitioning, an additional value of -1 is stored as the last @@ -993,6 +1059,9 @@ partition_bounds_copy(PartitionBoundInfo src, else dest->kind = NULL; + /* copy interleaved partitions for LIST partitioned tables */ + dest->interleaved_parts = bms_copy(src->interleaved_parts); + /* * For hash partitioning, datums array will have two elements - modulus * and remainder. @@ -2780,13 +2849,15 @@ add_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs, * that is partitions appearing earlier in the PartitionDesc sequence * contain partition keys strictly less than those appearing later. * Also, if NULL values are possible, they must come in the last - * partition defined in the PartitionDesc. + * partition defined in the PartitionDesc. 'live_parts' marks which + * partitions we should include when checking the ordering. Partitions + * that do not appear in 'live_parts' are ignored. * * If out of order, or there is insufficient info to know the order, * then we return false. */ bool -partitions_are_ordered(PartitionBoundInfo boundinfo, int nparts) +partitions_are_ordered(PartitionBoundInfo boundinfo, Bitmapset *live_parts) { Assert(boundinfo != NULL); @@ -2798,38 +2869,24 @@ partitions_are_ordered(PartitionBoundInfo boundinfo, int nparts) * RANGE-type partitioning guarantees that the partitions can be * scanned in the order that they're defined in the PartitionDesc * to provide sequential, non-overlapping ranges of tuples. - * However, if a DEFAULT partition exists then it doesn't work, as - * that could contain tuples from either below or above the - * defined range, or tuples belonging to gaps between partitions. + * However, if a DEFAULT partition exists and it's contained + * within live_parts, then the partitions are not ordered. */ - if (!partition_bound_has_default(boundinfo)) + if (!partition_bound_has_default(boundinfo) || + !bms_is_member(boundinfo->default_index, live_parts)) return true; break; case PARTITION_STRATEGY_LIST: /* - * LIST partitioning can also guarantee ordering, but only if the - * partitions don't accept interleaved values. We could likely - * check for this by looping over the PartitionBound's indexes - * array to check that the indexes are in order. For now, let's - * just keep it simple and just accept LIST partitioning when - * there's no DEFAULT partition, exactly one value per partition, - * and optionally a NULL partition that does not accept any other - * values. Such a NULL partition will come last in the - * PartitionDesc, and the other partitions will be properly - * ordered. This is a cheap test to make as it does not require - * any per-partition processing. Maybe we'd like to handle more - * complex cases in the future. + * LIST partitioned are ordered providing none of live_parts + * overlap with the partitioned table's interleaved partitions. */ - if (partition_bound_has_default(boundinfo)) - return false; - - if (boundinfo->ndatums + partition_bound_accepts_nulls(boundinfo) - == nparts) + if (!bms_overlap(live_parts, boundinfo->interleaved_parts)) return true; - break; + break; default: /* HASH, or some other strategy */ break; |