aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/executor/execPartition.c4
-rw-r--r--src/backend/partitioning/partbounds.c115
-rw-r--r--src/backend/partitioning/partprune.c30
-rw-r--r--src/include/partitioning/partbounds.h29
-rw-r--r--src/test/regress/expected/partition_prune.out60
-rw-r--r--src/test/regress/sql/partition_prune.sql27
6 files changed, 124 insertions, 141 deletions
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index 1746cb87936..746cd1e9d7a 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -1323,16 +1323,14 @@ get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull)
{
case PARTITION_STRATEGY_HASH:
{
- int greatest_modulus;
uint64 rowHash;
- greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
rowHash = compute_partition_hash_value(key->partnatts,
key->partsupfunc,
key->partcollation,
values, isnull);
- part_index = boundinfo->indexes[rowHash % greatest_modulus];
+ part_index = boundinfo->indexes[rowHash % boundinfo->nindexes];
}
break;
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c
index b9aeb77bc2b..0c3f212ff21 100644
--- a/src/backend/partitioning/partbounds.c
+++ b/src/backend/partitioning/partbounds.c
@@ -224,7 +224,6 @@ static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation,
PartitionBoundInfo boundinfo,
PartitionRangeBound *probe, int32 *cmpval);
-static int get_partition_bound_num_indexes(PartitionBoundInfo b);
static Expr *make_partition_op_expr(PartitionKey key, int keynum,
uint16 strategy, Expr *arg1, Expr *arg2);
static Oid get_partition_operator(PartitionKey key, int col,
@@ -398,6 +397,7 @@ create_hash_bounds(PartitionBoundSpec **boundspecs, int nparts,
boundinfo->ndatums = ndatums;
boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
+ boundinfo->nindexes = greatest_modulus;
boundinfo->indexes = (int *) palloc(greatest_modulus * sizeof(int));
for (i = 0; i < greatest_modulus; i++)
boundinfo->indexes[i] = -1;
@@ -530,6 +530,7 @@ create_list_bounds(PartitionBoundSpec **boundspecs, int nparts,
boundinfo->ndatums = ndatums;
boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
+ boundinfo->nindexes = ndatums;
boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
/*
@@ -725,8 +726,9 @@ create_range_bounds(PartitionBoundSpec **boundspecs, int nparts,
/*
* For range partitioning, an additional value of -1 is stored as the last
- * element.
+ * element of the indexes[] array.
*/
+ boundinfo->nindexes = ndatums + 1;
boundinfo->indexes = (int *) palloc((ndatums + 1) * sizeof(int));
for (i = 0; i < ndatums; i++)
@@ -807,45 +809,41 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
if (b1->ndatums != b2->ndatums)
return false;
+ if (b1->nindexes != b2->nindexes)
+ return false;
+
if (b1->null_index != b2->null_index)
return false;
if (b1->default_index != b2->default_index)
return false;
- if (b1->strategy == PARTITION_STRATEGY_HASH)
+ /* For all partition strategies, the indexes[] arrays have to match */
+ for (i = 0; i < b1->nindexes; i++)
{
- int greatest_modulus = get_hash_partition_greatest_modulus(b1);
-
- /*
- * If two hash partitioned tables have different greatest moduli,
- * their partition schemes don't match.
- */
- if (greatest_modulus != get_hash_partition_greatest_modulus(b2))
+ if (b1->indexes[i] != b2->indexes[i])
return false;
+ }
+ /* Finally, compare the datums[] arrays */
+ if (b1->strategy == PARTITION_STRATEGY_HASH)
+ {
/*
* We arrange the partitions in the ascending order of their moduli
* and remainders. Also every modulus is factor of next larger
* modulus. Therefore we can safely store index of a given partition
* in indexes array at remainder of that partition. Also entries at
* (remainder + N * modulus) positions in indexes array are all same
- * for (modulus, remainder) specification for any partition. Thus
- * datums array from both the given bounds are same, if and only if
- * their indexes array will be same. So, it suffices to compare
- * indexes array.
- */
- for (i = 0; i < greatest_modulus; i++)
- if (b1->indexes[i] != b2->indexes[i])
- return false;
-
-#ifdef USE_ASSERT_CHECKING
-
- /*
- * Nonetheless make sure that the bounds are indeed same when the
+ * for (modulus, remainder) specification for any partition. Thus the
+ * datums arrays from the given bounds are the same, if and only if
+ * their indexes arrays are the same. So, it suffices to compare the
+ * indexes arrays.
+ *
+ * Nonetheless make sure that the bounds are indeed the same when the
* indexes match. Hash partition bound stores modulus and remainder
* at b1->datums[i][0] and b1->datums[i][1] position respectively.
*/
+#ifdef USE_ASSERT_CHECKING
for (i = 0; i < b1->ndatums; i++)
Assert((b1->datums[i][0] == b2->datums[i][0] &&
b1->datums[i][1] == b2->datums[i][1]));
@@ -891,15 +889,7 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
parttypbyval[j], parttyplen[j]))
return false;
}
-
- if (b1->indexes[i] != b2->indexes[i])
- return false;
}
-
- /* There are ndatums+1 indexes in case of range partitions */
- if (b1->strategy == PARTITION_STRATEGY_RANGE &&
- b1->indexes[i] != b2->indexes[i])
- return false;
}
return true;
}
@@ -920,8 +910,8 @@ partition_bounds_copy(PartitionBoundInfo src,
PartitionBoundInfo dest;
int i;
int ndatums;
+ int nindexes;
int partnatts;
- int num_indexes;
bool hash_part;
int natts;
@@ -929,10 +919,9 @@ partition_bounds_copy(PartitionBoundInfo src,
dest->strategy = src->strategy;
ndatums = dest->ndatums = src->ndatums;
+ nindexes = dest->nindexes = src->nindexes;
partnatts = key->partnatts;
- num_indexes = get_partition_bound_num_indexes(src);
-
/* List partitioned tables have only a single partition key. */
Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1);
@@ -990,8 +979,8 @@ partition_bounds_copy(PartitionBoundInfo src,
}
}
- dest->indexes = (int *) palloc(sizeof(int) * num_indexes);
- memcpy(dest->indexes, src->indexes, sizeof(int) * num_indexes);
+ dest->indexes = (int *) palloc(sizeof(int) * nindexes);
+ memcpy(dest->indexes, src->indexes, sizeof(int) * nindexes);
dest->null_index = src->null_index;
dest->default_index = src->default_index;
@@ -2456,6 +2445,7 @@ build_merged_partition_bounds(char strategy, List *merged_datums,
}
Assert(list_length(merged_indexes) == ndatums);
+ merged_bounds->nindexes = ndatums;
merged_bounds->indexes = (int *) palloc(sizeof(int) * ndatums);
pos = 0;
foreach(lc, merged_indexes)
@@ -2889,7 +2879,7 @@ check_new_partition_bound(char *relname, Relation parent,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("every hash partition modulus must be a factor of the next larger modulus")));
- greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
+ greatest_modulus = boundinfo->nindexes;
remainder = spec->remainder;
/*
@@ -3282,18 +3272,15 @@ check_default_partition_contents(Relation parent, Relation default_rel,
/*
* get_hash_partition_greatest_modulus
*
- * Returns the greatest modulus of the hash partition bound. The greatest
- * modulus will be at the end of the datums array because hash partitions are
- * arranged in the ascending order of their moduli and remainders.
+ * Returns the greatest modulus of the hash partition bound.
+ * This is no longer used in the core code, but we keep it around
+ * in case external modules are using it.
*/
int
get_hash_partition_greatest_modulus(PartitionBoundInfo bound)
{
Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH);
- Assert(bound->datums && bound->ndatums > 0);
- Assert(DatumGetInt32(bound->datums[bound->ndatums - 1][0]) > 0);
-
- return DatumGetInt32(bound->datums[bound->ndatums - 1][0]);
+ return bound->nindexes;
}
/*
@@ -3698,46 +3685,6 @@ qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
}
/*
- * get_partition_bound_num_indexes
- *
- * Returns the number of the entries in the partition bound indexes array.
- */
-static int
-get_partition_bound_num_indexes(PartitionBoundInfo bound)
-{
- int num_indexes;
-
- Assert(bound);
-
- switch (bound->strategy)
- {
- case PARTITION_STRATEGY_HASH:
-
- /*
- * The number of the entries in the indexes array is same as the
- * greatest modulus.
- */
- num_indexes = get_hash_partition_greatest_modulus(bound);
- break;
-
- case PARTITION_STRATEGY_LIST:
- num_indexes = bound->ndatums;
- break;
-
- case PARTITION_STRATEGY_RANGE:
- /* Range partitioned table has an extra index. */
- num_indexes = bound->ndatums + 1;
- break;
-
- default:
- elog(ERROR, "unexpected partition strategy: %d",
- (int) bound->strategy);
- }
-
- return num_indexes;
-}
-
-/*
* get_partition_operator
*
* Return oid of the operator of the given strategy for the given partition
diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c
index fac921eea5b..42c7c5f5541 100644
--- a/src/backend/partitioning/partprune.c
+++ b/src/backend/partitioning/partprune.c
@@ -781,7 +781,10 @@ get_matching_partitions(PartitionPruneContext *context, List *pruning_steps)
scan_default = final_result->scan_default;
while ((i = bms_next_member(final_result->bound_offsets, i)) >= 0)
{
- int partindex = context->boundinfo->indexes[i];
+ int partindex;
+
+ Assert(i < context->boundinfo->nindexes);
+ partindex = context->boundinfo->indexes[i];
if (partindex < 0)
{
@@ -2514,20 +2517,19 @@ get_matching_hash_bounds(PartitionPruneContext *context,
for (i = 0; i < partnatts; i++)
isnull[i] = bms_is_member(i, nullkeys);
- greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
rowHash = compute_partition_hash_value(partnatts, partsupfunc, partcollation,
values, isnull);
+ greatest_modulus = boundinfo->nindexes;
if (partindices[rowHash % greatest_modulus] >= 0)
result->bound_offsets =
bms_make_singleton(rowHash % greatest_modulus);
}
else
{
- /* Getting here means at least one hash partition exists. */
- Assert(boundinfo->ndatums > 0);
+ /* Report all valid offsets into the boundinfo->indexes array. */
result->bound_offsets = bms_add_range(NULL, 0,
- boundinfo->ndatums - 1);
+ boundinfo->nindexes - 1);
}
/*
@@ -3388,30 +3390,20 @@ perform_pruning_combine_step(PartitionPruneContext *context,
PartitionPruneStepCombine *cstep,
PruneStepResult **step_results)
{
- ListCell *lc1;
- PruneStepResult *result = NULL;
+ PruneStepResult *result = (PruneStepResult *) palloc0(sizeof(PruneStepResult));
bool firststep;
+ ListCell *lc1;
/*
* A combine step without any source steps is an indication to not perform
* any partition pruning. Return all datum indexes in that case.
*/
- result = (PruneStepResult *) palloc0(sizeof(PruneStepResult));
- if (list_length(cstep->source_stepids) == 0)
+ if (cstep->source_stepids == NIL)
{
PartitionBoundInfo boundinfo = context->boundinfo;
- int rangemax;
-
- /*
- * Add all valid offsets into the boundinfo->indexes array. For range
- * partitioning, boundinfo->indexes contains (boundinfo->ndatums + 1)
- * valid entries; otherwise there are boundinfo->ndatums.
- */
- rangemax = context->strategy == PARTITION_STRATEGY_RANGE ?
- boundinfo->ndatums : boundinfo->ndatums - 1;
result->bound_offsets =
- bms_add_range(result->bound_offsets, 0, rangemax);
+ bms_add_range(NULL, 0, boundinfo->nindexes - 1);
result->scan_default = partition_bound_has_default(boundinfo);
result->scan_null = partition_bound_accepts_nulls(boundinfo);
return result;
diff --git a/src/include/partitioning/partbounds.h b/src/include/partitioning/partbounds.h
index 6bd330dd938..ebf3ff1f497 100644
--- a/src/include/partitioning/partbounds.h
+++ b/src/include/partitioning/partbounds.h
@@ -30,7 +30,7 @@ struct RelOptInfo; /* avoid including pathnodes.h here */
* In the case of range partitioning, ndatums will typically be far less than
* 2 * nparts, because a partition's upper bound and the next partition's lower
* bound are the same in most common cases, and we only store one of them (the
- * upper bound). In case of hash partitioning, ndatums will be same as the
+ * upper bound). In case of hash partitioning, ndatums will be the same as the
* number of partitions.
*
* For range and list partitioned tables, datums is an array of datum-tuples
@@ -46,24 +46,31 @@ struct RelOptInfo; /* avoid including pathnodes.h here */
* the partition key's operator classes and collations.
*
* In the case of list partitioning, the indexes array stores one entry for
- * every datum, which is the index of the partition that accepts a given datum.
- * In case of range partitioning, it stores one entry per distinct range
- * datum, which is the index of the partition for which a given datum
- * is an upper bound. In the case of hash partitioning, the number of the
- * entries in the indexes array is same as the greatest modulus amongst all
- * partitions. For a given partition key datum-tuple, the index of the
- * partition which would accept that datum-tuple would be given by the entry
- * pointed by remainder produced when hash value of the datum-tuple is divided
- * by the greatest modulus.
+ * each datum-array entry, which is the index of the partition that accepts
+ * rows matching that datum. So nindexes == ndatums.
+ *
+ * In the case of range partitioning, the indexes array stores one entry per
+ * distinct range datum, which is the index of the partition for which that
+ * datum is an upper bound (or -1 for a "gap" that has no partition). It is
+ * convenient to have an extra -1 entry representing values above the last
+ * range datum, so nindexes == ndatums + 1.
+ *
+ * In the case of hash partitioning, the number of entries in the indexes
+ * array is the same as the greatest modulus amongst all partitions (which
+ * is a multiple of all partition moduli), so nindexes == greatest modulus.
+ * The indexes array is indexed according to the hash key's remainder modulo
+ * the greatest modulus, and it contains either the partition index accepting
+ * that remainder, or -1 if there is no partition for that remainder.
*/
typedef struct PartitionBoundInfoData
{
char strategy; /* hash, list or range? */
- int ndatums; /* Length of the datums following array */
+ int ndatums; /* Length of the datums[] array */
Datum **datums;
PartitionRangeDatumKind **kind; /* The kind of each range bound datum;
* NULL for hash and list partitioned
* tables */
+ int nindexes; /* Length of the indexes[] array */
int *indexes; /* Partition indexes */
int null_index; /* Index of the null-accepting partition; -1
* if there isn't one */
diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out
index c72a6d051f1..bde29e38a94 100644
--- a/src/test/regress/expected/partition_prune.out
+++ b/src/test/regress/expected/partition_prune.out
@@ -1538,26 +1538,27 @@ drop table lp, coll_pruning, rlp, mc3p, mc2p, boolpart, boolrangep, rp, coll_pru
-- result on different machines. See the definitions of
-- part_part_test_int4_ops and part_test_text_ops in insert.sql.
--
-create table hp (a int, b text) partition by hash (a part_test_int4_ops, b part_test_text_ops);
+create table hp (a int, b text, c int)
+ partition by hash (a part_test_int4_ops, b part_test_text_ops);
create table hp0 partition of hp for values with (modulus 4, remainder 0);
create table hp3 partition of hp for values with (modulus 4, remainder 3);
create table hp1 partition of hp for values with (modulus 4, remainder 1);
create table hp2 partition of hp for values with (modulus 4, remainder 2);
-insert into hp values (null, null);
-insert into hp values (1, null);
-insert into hp values (1, 'xxx');
-insert into hp values (null, 'xxx');
-insert into hp values (2, 'xxx');
-insert into hp values (1, 'abcde');
-select tableoid::regclass, * from hp order by 1;
- tableoid | a | b
-----------+---+-------
- hp0 | |
- hp0 | 1 | xxx
- hp3 | 2 | xxx
- hp1 | 1 |
- hp2 | | xxx
- hp2 | 1 | abcde
+insert into hp values (null, null, 0);
+insert into hp values (1, null, 1);
+insert into hp values (1, 'xxx', 2);
+insert into hp values (null, 'xxx', 3);
+insert into hp values (2, 'xxx', 4);
+insert into hp values (1, 'abcde', 5);
+select tableoid::regclass, * from hp order by c;
+ tableoid | a | b | c
+----------+---+-------+---
+ hp0 | | | 0
+ hp1 | 1 | | 1
+ hp0 | 1 | xxx | 2
+ hp2 | | xxx | 3
+ hp3 | 2 | xxx | 4
+ hp2 | 1 | abcde | 5
(6 rows)
-- partial keys won't prune, nor would non-equality conditions
@@ -1715,6 +1716,33 @@ explain (costs off) select * from hp where (a = 1 and b = 'abcde') or (a = 2 and
Filter: (((a = 1) AND (b = 'abcde'::text)) OR ((a = 2) AND (b = 'xxx'::text)) OR ((a IS NULL) AND (b IS NULL)))
(7 rows)
+-- test pruning when not all the partitions exist
+drop table hp1;
+drop table hp3;
+explain (costs off) select * from hp where a = 1 and b = 'abcde';
+ QUERY PLAN
+---------------------------------------------
+ Seq Scan on hp2 hp
+ Filter: ((a = 1) AND (b = 'abcde'::text))
+(2 rows)
+
+explain (costs off) select * from hp where a = 1 and b = 'abcde' and
+ (c = 2 or c = 3);
+ QUERY PLAN
+----------------------------------------------------------------------
+ Seq Scan on hp2 hp
+ Filter: ((a = 1) AND (b = 'abcde'::text) AND ((c = 2) OR (c = 3)))
+(2 rows)
+
+drop table hp2;
+explain (costs off) select * from hp where a = 1 and b = 'abcde' and
+ (c = 2 or c = 3);
+ QUERY PLAN
+--------------------------
+ Result
+ One-Time Filter: false
+(2 rows)
+
drop table hp;
--
-- Test runtime partition pruning
diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql
index ffd5fe8b0dc..6ccb52ad1d6 100644
--- a/src/test/regress/sql/partition_prune.sql
+++ b/src/test/regress/sql/partition_prune.sql
@@ -304,19 +304,20 @@ drop table lp, coll_pruning, rlp, mc3p, mc2p, boolpart, boolrangep, rp, coll_pru
-- part_part_test_int4_ops and part_test_text_ops in insert.sql.
--
-create table hp (a int, b text) partition by hash (a part_test_int4_ops, b part_test_text_ops);
+create table hp (a int, b text, c int)
+ partition by hash (a part_test_int4_ops, b part_test_text_ops);
create table hp0 partition of hp for values with (modulus 4, remainder 0);
create table hp3 partition of hp for values with (modulus 4, remainder 3);
create table hp1 partition of hp for values with (modulus 4, remainder 1);
create table hp2 partition of hp for values with (modulus 4, remainder 2);
-insert into hp values (null, null);
-insert into hp values (1, null);
-insert into hp values (1, 'xxx');
-insert into hp values (null, 'xxx');
-insert into hp values (2, 'xxx');
-insert into hp values (1, 'abcde');
-select tableoid::regclass, * from hp order by 1;
+insert into hp values (null, null, 0);
+insert into hp values (1, null, 1);
+insert into hp values (1, 'xxx', 2);
+insert into hp values (null, 'xxx', 3);
+insert into hp values (2, 'xxx', 4);
+insert into hp values (1, 'abcde', 5);
+select tableoid::regclass, * from hp order by c;
-- partial keys won't prune, nor would non-equality conditions
explain (costs off) select * from hp where a = 1;
@@ -337,6 +338,16 @@ explain (costs off) select * from hp where a = 2 and b = 'xxx';
explain (costs off) select * from hp where a = 1 and b = 'abcde';
explain (costs off) select * from hp where (a = 1 and b = 'abcde') or (a = 2 and b = 'xxx') or (a is null and b is null);
+-- test pruning when not all the partitions exist
+drop table hp1;
+drop table hp3;
+explain (costs off) select * from hp where a = 1 and b = 'abcde';
+explain (costs off) select * from hp where a = 1 and b = 'abcde' and
+ (c = 2 or c = 3);
+drop table hp2;
+explain (costs off) select * from hp where a = 1 and b = 'abcde' and
+ (c = 2 or c = 3);
+
drop table hp;
--