aboutsummaryrefslogtreecommitdiff
path: root/src/backend/partitioning/partbounds.c
diff options
context:
space:
mode:
authorEtsuro Fujita <efujita@postgresql.org>2020-04-08 10:25:00 +0900
committerEtsuro Fujita <efujita@postgresql.org>2020-04-08 10:25:00 +0900
commitc8434d64ce03c32e0029417a82ae937f2055268f (patch)
tree7ea57c9e8292b9ad8a61b99603299f66ae4951b6 /src/backend/partitioning/partbounds.c
parent41a194f49177daf9348bfde2c42e85b806dcee31 (diff)
downloadpostgresql-c8434d64ce03c32e0029417a82ae937f2055268f.tar.gz
postgresql-c8434d64ce03c32e0029417a82ae937f2055268f.zip
Allow partitionwise joins in more cases.
Previously, the partitionwise join technique only allowed partitionwise join when input partitioned tables had exactly the same partition bounds. This commit extends the technique to some cases when the tables have different partition bounds, by using an advanced partition-matching algorithm introduced by this commit. For both the input partitioned tables, the algorithm checks whether every partition of one input partitioned table only matches one partition of the other input partitioned table at most, and vice versa. In such a case the join between the tables can be broken down into joins between the matching partitions, so the algorithm produces the pairs of the matching partitions, plus the partition bounds for the join relation, to allow partitionwise join for computing the join. Currently, the algorithm works for list-partitioned and range-partitioned tables, but not hash-partitioned tables. See comments in partition_bounds_merge(). Ashutosh Bapat and Etsuro Fujita, most of regression tests by Rajkumar Raghuwanshi, some of the tests by Mark Dilger and Amul Sul, reviewed by Dmitry Dolgov and Amul Sul, with additional review at various points by Ashutosh Bapat, Mark Dilger, Robert Haas, Antonin Houska, Amit Langote, Justin Pryzby, and Tomas Vondra Discussion: https://postgr.es/m/CAFjFpRdjQvaUEV5DJX3TW6pU5eq54NCkadtxHX2JiJG_GvbrCA@mail.gmail.com
Diffstat (limited to 'src/backend/partitioning/partbounds.c')
-rw-r--r--src/backend/partitioning/partbounds.c1865
1 files changed, 1865 insertions, 0 deletions
diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c
index 4c47f54a57c..7607501fe7f 100644
--- a/src/backend/partitioning/partbounds.c
+++ b/src/backend/partitioning/partbounds.c
@@ -26,6 +26,7 @@
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
+#include "nodes/pathnodes.h"
#include "parser/parse_coerce.h"
#include "partitioning/partbounds.h"
#include "partitioning/partdesc.h"
@@ -68,6 +69,28 @@ typedef struct PartitionRangeBound
bool lower; /* this is the lower (vs upper) bound */
} PartitionRangeBound;
+/*
+ * Mapping from partitions of a joining relation to partitions of a join
+ * relation being computed (a.k.a merged partitions)
+ */
+typedef struct PartitionMap
+{
+ int nparts; /* number of partitions */
+ int *merged_indexes; /* indexes of merged partitions */
+ bool *merged; /* flags to indicate whether partitions are
+ * merged with non-dummy partitions */
+ bool did_remapping; /* did we re-map partitions? */
+ int *old_indexes; /* old indexes of merged partitions if
+ * did_remapping */
+} PartitionMap;
+
+/* Macro for comparing two range bounds */
+#define compare_range_bounds(partnatts, partsupfunc, partcollations, \
+ bound1, bound2) \
+ (partition_rbound_cmp(partnatts, partsupfunc, partcollations, \
+ (bound1)->datums, (bound1)->kind, (bound1)->lower, \
+ bound2))
+
static int32 qsort_partition_hbound_cmp(const void *a, const void *b);
static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
void *arg);
@@ -79,6 +102,116 @@ static PartitionBoundInfo create_list_bounds(PartitionBoundSpec **boundspecs,
int nparts, PartitionKey key, int **mapping);
static PartitionBoundInfo create_range_bounds(PartitionBoundSpec **boundspecs,
int nparts, PartitionKey key, int **mapping);
+static PartitionBoundInfo merge_list_bounds(FmgrInfo *partsupfunc,
+ Oid *collations,
+ RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel,
+ JoinType jointype,
+ List **outer_parts,
+ List **inner_parts);
+static PartitionBoundInfo merge_range_bounds(int partnatts,
+ FmgrInfo *partsupfuncs,
+ Oid *partcollations,
+ RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel,
+ JoinType jointype,
+ List **outer_parts,
+ List **inner_parts);
+static void init_partition_map(RelOptInfo *rel, PartitionMap *map);
+static void free_partition_map(PartitionMap *map);
+static bool is_dummy_partition(RelOptInfo *rel, int part_index);
+static int merge_matching_partitions(PartitionMap *outer_map,
+ PartitionMap *inner_map,
+ int outer_part,
+ int inner_part,
+ int *next_index);
+static int process_outer_partition(PartitionMap *outer_map,
+ PartitionMap *inner_map,
+ bool outer_has_default,
+ bool inner_has_default,
+ int outer_index,
+ int inner_default,
+ JoinType jointype,
+ int *next_index,
+ int *default_index);
+static int process_inner_partition(PartitionMap *outer_map,
+ PartitionMap *inner_map,
+ bool outer_has_default,
+ bool inner_has_default,
+ int inner_index,
+ int outer_default,
+ JoinType jointype,
+ int *next_index,
+ int *default_index);
+static void merge_null_partitions(PartitionMap *outer_map,
+ PartitionMap *inner_map,
+ bool outer_has_null,
+ bool inner_has_null,
+ int outer_null,
+ int inner_null,
+ JoinType jointype,
+ int *next_index,
+ int *null_index);
+static void merge_default_partitions(PartitionMap *outer_map,
+ PartitionMap *inner_map,
+ bool outer_has_default,
+ bool inner_has_default,
+ int outer_default,
+ int inner_default,
+ JoinType jointype,
+ int *next_index,
+ int *default_index);
+static int merge_partition_with_dummy(PartitionMap *map, int index,
+ int *next_index);
+static void fix_merged_indexes(PartitionMap *outer_map,
+ PartitionMap *inner_map,
+ int nmerged, List *merged_indexes);
+static void generate_matching_part_pairs(RelOptInfo *outer_rel,
+ RelOptInfo *inner_rel,
+ PartitionMap *outer_map,
+ PartitionMap *inner_map,
+ int nmerged,
+ List **outer_parts,
+ List **inner_parts);
+static PartitionBoundInfo build_merged_partition_bounds(char strategy,
+ List *merged_datums,
+ List *merged_kinds,
+ List *merged_indexes,
+ int null_index,
+ int default_index);
+static int get_range_partition(RelOptInfo *rel,
+ PartitionBoundInfo bi,
+ int *lb_pos,
+ PartitionRangeBound *lb,
+ PartitionRangeBound *ub);
+static int get_range_partition_internal(PartitionBoundInfo bi,
+ int *lb_pos,
+ PartitionRangeBound *lb,
+ PartitionRangeBound *ub);
+static bool compare_range_partitions(int partnatts, FmgrInfo *partsupfuncs,
+ Oid *partcollations,
+ PartitionRangeBound *outer_lb,
+ PartitionRangeBound *outer_ub,
+ PartitionRangeBound *inner_lb,
+ PartitionRangeBound *inner_ub,
+ int *lb_cmpval, int *ub_cmpval);
+static void get_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
+ Oid *partcollations, JoinType jointype,
+ PartitionRangeBound *outer_lb,
+ PartitionRangeBound *outer_ub,
+ PartitionRangeBound *inner_lb,
+ PartitionRangeBound *inner_ub,
+ int lb_cmpval, int ub_cmpval,
+ PartitionRangeBound *merged_lb,
+ PartitionRangeBound *merged_ub);
+static void add_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
+ Oid *partcollations,
+ PartitionRangeBound *merged_lb,
+ PartitionRangeBound *merged_ub,
+ int merged_index,
+ List **merged_datums,
+ List **merged_kinds,
+ List **merged_indexes);
static PartitionRangeBound *make_one_partition_rbound(PartitionKey key, int index,
List *datums, bool lower);
static int32 partition_hbound_cmp(int modulus1, int remainder1, int modulus2,
@@ -867,6 +1000,1738 @@ partition_bounds_copy(PartitionBoundInfo src,
}
/*
+ * partition_bounds_merge
+ * Check to see whether every partition of 'outer_rel' matches/overlaps
+ * one partition of 'inner_rel' at most, and vice versa; and if so, build
+ * and return the partition bounds for a join relation between the rels,
+ * generating two lists of the matching/overlapping partitions, which are
+ * returned to *outer_parts and *inner_parts respectively.
+ *
+ * The lists contain the same number of partitions, and the partitions at the
+ * same positions in the lists indicate join pairs used for partitioned join.
+ * If a partition on one side matches/overlaps multiple partitions on the other
+ * side, this function returns NULL, setting *outer_parts and *inner_parts to
+ * NIL.
+ */
+PartitionBoundInfo
+partition_bounds_merge(int partnatts,
+ FmgrInfo *partsupfunc, Oid *partcollation,
+ RelOptInfo *outer_rel, RelOptInfo *inner_rel,
+ JoinType jointype,
+ List **outer_parts, List **inner_parts)
+{
+ PartitionBoundInfo outer_binfo = outer_rel->boundinfo;
+ PartitionBoundInfo inner_binfo = inner_rel->boundinfo;
+
+ /*
+ * Currently, this function is called only from try_partitionwise_join(),
+ * so the join type should be INNER, LEFT, FULL, SEMI, or ANTI.
+ */
+ Assert(jointype == JOIN_INNER || jointype == JOIN_LEFT ||
+ jointype == JOIN_FULL || jointype == JOIN_SEMI ||
+ jointype == JOIN_ANTI);
+
+ /* The partitioning strategies should be the same. */
+ Assert(outer_binfo->strategy == inner_binfo->strategy);
+
+ *outer_parts = *inner_parts = NIL;
+ switch (outer_binfo->strategy)
+ {
+ case PARTITION_STRATEGY_HASH:
+
+ /*
+ * For hash partitioned tables, we currently support partitioned
+ * join only when they have exactly the same partition bounds.
+ *
+ * XXX: it might be possible to relax the restriction to support
+ * cases where hash partitioned tables have missing partitions
+ * and/or different moduli, but it's not clear if it would be
+ * useful to support the former case since it's unusual to have
+ * missing partitions. On the other hand, it would be useful to
+ * support the latter case, but in that case, there is a high
+ * probability that a partition on one side will match multiple
+ * partitions on the other side, which is the scenario the current
+ * implementation of partitioned join can't handle.
+ */
+ return NULL;
+
+ case PARTITION_STRATEGY_LIST:
+ return merge_list_bounds(partsupfunc,
+ partcollation,
+ outer_rel,
+ inner_rel,
+ jointype,
+ outer_parts,
+ inner_parts);
+
+ case PARTITION_STRATEGY_RANGE:
+ return merge_range_bounds(partnatts,
+ partsupfunc,
+ partcollation,
+ outer_rel,
+ inner_rel,
+ jointype,
+ outer_parts,
+ inner_parts);
+
+ default:
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) outer_binfo->strategy);
+ return NULL; /* keep compiler quiet */
+ }
+}
+
+/*
+ * merge_list_bounds
+ * Create the partition bounds for a join relation between list
+ * partitioned tables, if possible
+ *
+ * In this function we try to find sets of matching partitions from both sides
+ * by comparing list values stored in their partition bounds. Since the list
+ * values appear in the ascending order, an algorithm similar to merge join is
+ * used for that. If a partition on one side doesn't have a matching
+ * partition on the other side, the algorithm tries to match it with the
+ * default partition on the other side if any; if not, the algorithm tries to
+ * match it with a dummy partition on the other side if it's on the
+ * non-nullable side of an outer join. Also, if both sides have the default
+ * partitions, the algorithm tries to match them with each other. We give up
+ * if the algorithm finds a partition matching multiple partitions on the
+ * other side, which is the scenario the current implementation of partitioned
+ * join can't handle.
+ */
+static PartitionBoundInfo
+merge_list_bounds(FmgrInfo *partsupfunc, Oid *partcollation,
+ RelOptInfo *outer_rel, RelOptInfo *inner_rel,
+ JoinType jointype,
+ List **outer_parts, List **inner_parts)
+{
+ PartitionBoundInfo merged_bounds = NULL;
+ PartitionBoundInfo outer_bi = outer_rel->boundinfo;
+ PartitionBoundInfo inner_bi = inner_rel->boundinfo;
+ bool outer_has_default = partition_bound_has_default(outer_bi);
+ bool inner_has_default = partition_bound_has_default(inner_bi);
+ int outer_default = outer_bi->default_index;
+ int inner_default = inner_bi->default_index;
+ bool outer_has_null = partition_bound_accepts_nulls(outer_bi);
+ bool inner_has_null = partition_bound_accepts_nulls(inner_bi);
+ PartitionMap outer_map;
+ PartitionMap inner_map;
+ int outer_pos;
+ int inner_pos;
+ int next_index = 0;
+ int null_index = -1;
+ int default_index = -1;
+ List *merged_datums = NIL;
+ List *merged_indexes = NIL;
+
+ Assert(*outer_parts == NIL);
+ Assert(*inner_parts == NIL);
+ Assert(outer_bi->strategy == inner_bi->strategy &&
+ outer_bi->strategy == PARTITION_STRATEGY_LIST);
+ /* List partitioning doesn't require kinds. */
+ Assert(!outer_bi->kind && !inner_bi->kind);
+
+ init_partition_map(outer_rel, &outer_map);
+ init_partition_map(inner_rel, &inner_map);
+
+ /*
+ * If the default partitions (if any) have been proven empty, deem them
+ * non-existent.
+ */
+ if (outer_has_default && is_dummy_partition(outer_rel, outer_default))
+ outer_has_default = false;
+ if (inner_has_default && is_dummy_partition(inner_rel, inner_default))
+ inner_has_default = false;
+
+ /*
+ * Merge partitions from both sides. In each iteration we compare a pair
+ * of list values, one from each side, and decide whether the corresponding
+ * partitions match or not. If the two values match exactly, move to the
+ * next pair of list values, otherwise move to the next list value on the
+ * side with a smaller list value.
+ */
+ outer_pos = inner_pos = 0;
+ while (outer_pos < outer_bi->ndatums || inner_pos < inner_bi->ndatums)
+ {
+ int outer_index = -1;
+ int inner_index = -1;
+ Datum *outer_datums;
+ Datum *inner_datums;
+ int cmpval;
+ Datum *merged_datum = NULL;
+ int merged_index = -1;
+
+ if (outer_pos < outer_bi->ndatums)
+ {
+ /*
+ * If the partition on the outer side has been proven empty, ignore
+ * it and move to the next datum on the outer side.
+ */
+ outer_index = outer_bi->indexes[outer_pos];
+ if (is_dummy_partition(outer_rel, outer_index))
+ {
+ outer_pos++;
+ continue;
+ }
+ }
+ if (inner_pos < inner_bi->ndatums)
+ {
+ /*
+ * If the partition on the inner side has been proven empty, ignore
+ * it and move to the next datum on the inner side.
+ */
+ inner_index = inner_bi->indexes[inner_pos];
+ if (is_dummy_partition(inner_rel, inner_index))
+ {
+ inner_pos++;
+ continue;
+ }
+ }
+
+ /* Get the list values. */
+ outer_datums = outer_pos < outer_bi->ndatums ?
+ outer_bi->datums[outer_pos] : NULL;
+ inner_datums = inner_pos < inner_bi->ndatums ?
+ inner_bi->datums[inner_pos] : NULL;
+
+ /*
+ * We run this loop till both sides finish. This allows us to avoid
+ * duplicating code to handle the remaining values on the side which
+ * finishes later. For that we set the comparison parameter cmpval in
+ * such a way that it appears as if the side which finishes earlier has
+ * an extra value higher than any other value on the unfinished side.
+ * That way we advance the values on the unfinished side till all of
+ * its values are exhausted.
+ */
+ if (outer_pos >= outer_bi->ndatums)
+ cmpval = 1;
+ else if (inner_pos >= inner_bi->ndatums)
+ cmpval = -1;
+ else
+ {
+ Assert(outer_datums != NULL && inner_datums != NULL);
+ cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0],
+ partcollation[0],
+ outer_datums[0],
+ inner_datums[0]));
+ }
+
+ if (cmpval == 0)
+ {
+ /* Two list values match exactly. */
+ Assert(outer_pos < outer_bi->ndatums);
+ Assert(inner_pos < inner_bi->ndatums);
+ Assert(outer_index >= 0);
+ Assert(inner_index >= 0);
+
+ /*
+ * Try merging both paritions. If successful, add the list value
+ * and index of the merged partition below.
+ */
+ merged_index = merge_matching_partitions(&outer_map, &inner_map,
+ outer_index, inner_index,
+ &next_index);
+ if (merged_index == -1)
+ goto cleanup;
+
+ merged_datum = outer_datums;
+
+ /* Move to the next pair of list values. */
+ outer_pos++;
+ inner_pos++;
+ }
+ else if (cmpval < 0)
+ {
+ /* A list value missing from the inner side. */
+ Assert(outer_pos < outer_bi->ndatums);
+
+ /*
+ * If the inner side has the default partition, or this is an outer
+ * join, try to assign a merged partition to the outer partition
+ * (see process_outer_partition()). Otherwise, the outer partition
+ * will not contribute to the result.
+ */
+ if (inner_has_default || IS_OUTER_JOIN(jointype))
+ {
+ /* Get the outer partition. */
+ outer_index = outer_bi->indexes[outer_pos];
+ Assert(outer_index >= 0);
+ merged_index = process_outer_partition(&outer_map,
+ &inner_map,
+ outer_has_default,
+ inner_has_default,
+ outer_index,
+ inner_default,
+ jointype,
+ &next_index,
+ &default_index);
+ if (merged_index == -1)
+ goto cleanup;
+ merged_datum = outer_datums;
+ }
+
+ /* Move to the next list value on the outer side. */
+ outer_pos++;
+ }
+ else
+ {
+ /* A list value missing from the outer side. */
+ Assert(cmpval > 0);
+ Assert(inner_pos < inner_bi->ndatums);
+
+ /*
+ * If the outer side has the default partition, or this is a FULL
+ * join, try to assign a merged partition to the inner partition
+ * (see process_inner_partition()). Otherwise, the inner partition
+ * will not contribute to the result.
+ */
+ if (outer_has_default || jointype == JOIN_FULL)
+ {
+ /* Get the inner partition. */
+ inner_index = inner_bi->indexes[inner_pos];
+ Assert(inner_index >= 0);
+ merged_index = process_inner_partition(&outer_map,
+ &inner_map,
+ outer_has_default,
+ inner_has_default,
+ inner_index,
+ outer_default,
+ jointype,
+ &next_index,
+ &default_index);
+ if (merged_index == -1)
+ goto cleanup;
+ merged_datum = inner_datums;
+ }
+
+ /* Move to the next list value on the inner side. */
+ inner_pos++;
+ }
+
+ /*
+ * If we assigned a merged partition, add the list value and index of
+ * the merged partition if appropriate.
+ */
+ if (merged_index >= 0 && merged_index != default_index)
+ {
+ merged_datums = lappend(merged_datums, merged_datum);
+ merged_indexes = lappend_int(merged_indexes, merged_index);
+ }
+ }
+
+ /*
+ * If the NULL partitions (if any) have been proven empty, deem them
+ * non-existent.
+ */
+ if (outer_has_null &&
+ is_dummy_partition(outer_rel, outer_bi->null_index))
+ outer_has_null = false;
+ if (inner_has_null &&
+ is_dummy_partition(inner_rel, inner_bi->null_index))
+ inner_has_null = false;
+
+ /* Merge the NULL partitions if any. */
+ if (outer_has_null || inner_has_null)
+ merge_null_partitions(&outer_map, &inner_map,
+ outer_has_null, inner_has_null,
+ outer_bi->null_index, inner_bi->null_index,
+ jointype, &next_index, &null_index);
+ else
+ Assert(null_index == -1);
+
+ /* Merge the default partitions if any. */
+ if (outer_has_default || inner_has_default)
+ merge_default_partitions(&outer_map, &inner_map,
+ outer_has_default, inner_has_default,
+ outer_default, inner_default,
+ jointype, &next_index, &default_index);
+ else
+ Assert(default_index == -1);
+
+ /* If we have merged partitions, create the partition bounds. */
+ if (next_index > 0)
+ {
+ /* Fix the merged_indexes list if necessary. */
+ if (outer_map.did_remapping || inner_map.did_remapping)
+ {
+ Assert(jointype == JOIN_FULL);
+ fix_merged_indexes(&outer_map, &inner_map,
+ next_index, merged_indexes);
+ }
+
+ /* Use maps to match partitions from inputs. */
+ generate_matching_part_pairs(outer_rel, inner_rel,
+ &outer_map, &inner_map,
+ next_index,
+ outer_parts, inner_parts);
+ Assert(*outer_parts != NIL);
+ Assert(*inner_parts != NIL);
+ Assert(list_length(*outer_parts) == list_length(*inner_parts));
+ Assert(list_length(*outer_parts) <= next_index);
+
+ /* Make a PartitionBoundInfo struct to return. */
+ merged_bounds = build_merged_partition_bounds(outer_bi->strategy,
+ merged_datums,
+ NIL,
+ merged_indexes,
+ null_index,
+ default_index);
+ Assert(merged_bounds);
+ }
+
+cleanup:
+ /* Free local memory before returning. */
+ list_free(merged_datums);
+ list_free(merged_indexes);
+ free_partition_map(&outer_map);
+ free_partition_map(&inner_map);
+
+ return merged_bounds;
+}
+
+/*
+ * merge_range_bounds
+ * Create the partition bounds for a join relation between range
+ * partitioned tables, if possible
+ *
+ * In this function we try to find sets of overlapping partitions from both
+ * sides by comparing ranges stored in their partition bounds. Since the
+ * ranges appear in the ascending order, an algorithm similar to merge join is
+ * used for that. If a partition on one side doesn't have an overlapping
+ * partition on the other side, the algorithm tries to match it with the
+ * default partition on the other side if any; if not, the algorithm tries to
+ * match it with a dummy partition on the other side if it's on the
+ * non-nullable side of an outer join. Also, if both sides have the default
+ * partitions, the algorithm tries to match them with each other. We give up
+ * if the algorithm finds a partition overlapping multiple partitions on the
+ * other side, which is the scenario the current implementation of partitioned
+ * join can't handle.
+ */
+static PartitionBoundInfo
+merge_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
+ Oid *partcollations,
+ RelOptInfo *outer_rel, RelOptInfo *inner_rel,
+ JoinType jointype,
+ List **outer_parts, List **inner_parts)
+{
+ PartitionBoundInfo merged_bounds = NULL;
+ PartitionBoundInfo outer_bi = outer_rel->boundinfo;
+ PartitionBoundInfo inner_bi = inner_rel->boundinfo;
+ bool outer_has_default = partition_bound_has_default(outer_bi);
+ bool inner_has_default = partition_bound_has_default(inner_bi);
+ int outer_default = outer_bi->default_index;
+ int inner_default = inner_bi->default_index;
+ PartitionMap outer_map;
+ PartitionMap inner_map;
+ int outer_index;
+ int inner_index;
+ int outer_lb_pos;
+ int inner_lb_pos;
+ PartitionRangeBound outer_lb;
+ PartitionRangeBound outer_ub;
+ PartitionRangeBound inner_lb;
+ PartitionRangeBound inner_ub;
+ int next_index = 0;
+ int default_index = -1;
+ List *merged_datums = NIL;
+ List *merged_kinds = NIL;
+ List *merged_indexes = NIL;
+
+ Assert(*outer_parts == NIL);
+ Assert(*inner_parts == NIL);
+ Assert(outer_bi->strategy == inner_bi->strategy &&
+ outer_bi->strategy == PARTITION_STRATEGY_RANGE);
+
+ init_partition_map(outer_rel, &outer_map);
+ init_partition_map(inner_rel, &inner_map);
+
+ /*
+ * If the default partitions (if any) have been proven empty, deem them
+ * non-existent.
+ */
+ if (outer_has_default && is_dummy_partition(outer_rel, outer_default))
+ outer_has_default = false;
+ if (inner_has_default && is_dummy_partition(inner_rel, inner_default))
+ inner_has_default = false;
+
+ /*
+ * Merge partitions from both sides. In each iteration we compare a pair
+ * of ranges, one from each side, and decide whether the corresponding
+ * partitions match or not. If the two ranges overlap, move to the next
+ * pair of ranges, otherwise move to the next range on the side with a
+ * lower range. outer_lb_pos/inner_lb_pos keep track of the positions of
+ * lower bounds in the datums arrays in the outer/inner PartitionBoundInfos
+ * respectively.
+ */
+ outer_lb_pos = inner_lb_pos = 0;
+ outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
+ &outer_lb, &outer_ub);
+ inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
+ &inner_lb, &inner_ub);
+ while (outer_index >= 0 || inner_index >= 0)
+ {
+ bool overlap;
+ int ub_cmpval;
+ int lb_cmpval;
+ PartitionRangeBound merged_lb = {-1, NULL, NULL, true};
+ PartitionRangeBound merged_ub = {-1, NULL, NULL, false};
+ int merged_index = -1;
+
+ /*
+ * We run this loop till both sides finish. This allows us to avoid
+ * duplicating code to handle the remaining ranges on the side which
+ * finishes later. For that we set the comparison parameter cmpval in
+ * such a way that it appears as if the side which finishes earlier has
+ * an extra range higher than any other range on the unfinished side.
+ * That way we advance the ranges on the unfinished side till all of
+ * its ranges are exhausted.
+ */
+ if (outer_index == -1)
+ {
+ overlap = false;
+ lb_cmpval = 1;
+ ub_cmpval = 1;
+ }
+ else if (inner_index == -1)
+ {
+ overlap = false;
+ lb_cmpval = -1;
+ ub_cmpval = -1;
+ }
+ else
+ overlap = compare_range_partitions(partnatts, partsupfuncs,
+ partcollations,
+ &outer_lb, &outer_ub,
+ &inner_lb, &inner_ub,
+ &lb_cmpval, &ub_cmpval);
+
+ if (overlap)
+ {
+ /* Two ranges overlap; form a join pair. */
+
+ PartitionRangeBound save_outer_ub;
+ PartitionRangeBound save_inner_ub;
+
+ /* Both partitions should not have been merged yet. */
+ Assert(outer_index >= 0);
+ Assert(outer_map.merged_indexes[outer_index] == -1 &&
+ outer_map.merged[outer_index] == false);
+ Assert(inner_index >= 0);
+ Assert(inner_map.merged_indexes[inner_index] == -1 &&
+ inner_map.merged[inner_index] == false);
+
+ /*
+ * Get the index of the merged partition. Both partitions aren't
+ * merged yet, so the partitions should be merged successfully.
+ */
+ merged_index = merge_matching_partitions(&outer_map, &inner_map,
+ outer_index, inner_index,
+ &next_index);
+ Assert(merged_index >= 0);
+
+ /* Get the range of the merged partition. */
+ get_merged_range_bounds(partnatts, partsupfuncs,
+ partcollations, jointype,
+ &outer_lb, &outer_ub,
+ &inner_lb, &inner_ub,
+ lb_cmpval, ub_cmpval,
+ &merged_lb, &merged_ub);
+
+ /* Save the upper bounds of both partitions for use below. */
+ save_outer_ub = outer_ub;
+ save_inner_ub = inner_ub;
+
+ /* Move to the next pair of ranges. */
+ outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
+ &outer_lb, &outer_ub);
+ inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
+ &inner_lb, &inner_ub);
+
+ /*
+ * If the range of a partition on one side overlaps the range of
+ * the next partition on the other side, that will cause the
+ * partition on one side to match at least two partitions on the
+ * other side, which is the case that we currently don't support
+ * partitioned join for; give up.
+ */
+ if (ub_cmpval > 0 && inner_index >= 0 &&
+ compare_range_bounds(partnatts, partsupfuncs, partcollations,
+ &save_outer_ub, &inner_lb) > 0)
+ goto cleanup;
+ if (ub_cmpval < 0 && outer_index >= 0 &&
+ compare_range_bounds(partnatts, partsupfuncs, partcollations,
+ &outer_lb, &save_inner_ub) < 0)
+ goto cleanup;
+
+ /*
+ * A row from a non-overlapping portion (if any) of a partition
+ * on one side might find its join partner in the default
+ * partition (if any) on the other side, causing the same
+ * situation as above; give up in that case.
+ */
+ if ((outer_has_default && (lb_cmpval > 0 || ub_cmpval < 0)) ||
+ (inner_has_default && (lb_cmpval < 0 || ub_cmpval > 0)))
+ goto cleanup;
+ }
+ else if (ub_cmpval < 0)
+ {
+ /* A non-overlapping outer range. */
+
+ /* The outer partition should not have been merged yet. */
+ Assert(outer_index >= 0);
+ Assert(outer_map.merged_indexes[outer_index] == -1 &&
+ outer_map.merged[outer_index] == false);
+
+ /*
+ * If the inner side has the default partition, or this is an outer
+ * join, try to assign a merged partition to the outer partition
+ * (see process_outer_partition()). Otherwise, the outer partition
+ * will not contribute to the result.
+ */
+ if (inner_has_default || IS_OUTER_JOIN(jointype))
+ {
+ merged_index = process_outer_partition(&outer_map,
+ &inner_map,
+ outer_has_default,
+ inner_has_default,
+ outer_index,
+ inner_default,
+ jointype,
+ &next_index,
+ &default_index);
+ if (merged_index == -1)
+ goto cleanup;
+ merged_lb = outer_lb;
+ merged_ub = outer_ub;
+ }
+
+ /* Move to the next range on the outer side. */
+ outer_index = get_range_partition(outer_rel, outer_bi, &outer_lb_pos,
+ &outer_lb, &outer_ub);
+ }
+ else
+ {
+ /* A non-overlapping inner range. */
+ Assert(ub_cmpval > 0);
+
+ /* The inner partition should not have been merged yet. */
+ Assert(inner_index >= 0);
+ Assert(inner_map.merged_indexes[inner_index] == -1 &&
+ inner_map.merged[inner_index] == false);
+
+ /*
+ * If the outer side has the default partition, or this is a FULL
+ * join, try to assign a merged partition to the inner partition
+ * (see process_inner_partition()). Otherwise, the inner partition
+ * will not contribute to the result.
+ */
+ if (outer_has_default || jointype == JOIN_FULL)
+ {
+ merged_index = process_inner_partition(&outer_map,
+ &inner_map,
+ outer_has_default,
+ inner_has_default,
+ inner_index,
+ outer_default,
+ jointype,
+ &next_index,
+ &default_index);
+ if (merged_index == -1)
+ goto cleanup;
+ merged_lb = inner_lb;
+ merged_ub = inner_ub;
+ }
+
+ /* Move to the next range on the inner side. */
+ inner_index = get_range_partition(inner_rel, inner_bi, &inner_lb_pos,
+ &inner_lb, &inner_ub);
+ }
+
+ /*
+ * If we assigned a merged partition, add the range bounds and index of
+ * the merged partition if appropriate.
+ */
+ if (merged_index >= 0 && merged_index != default_index)
+ add_merged_range_bounds(partnatts, partsupfuncs, partcollations,
+ &merged_lb, &merged_ub, merged_index,
+ &merged_datums, &merged_kinds,
+ &merged_indexes);
+ }
+
+ /* Merge the default partitions if any. */
+ if (outer_has_default || inner_has_default)
+ merge_default_partitions(&outer_map, &inner_map,
+ outer_has_default, inner_has_default,
+ outer_default, inner_default,
+ jointype, &next_index, &default_index);
+ else
+ Assert(default_index == -1);
+
+ /* If we have merged partitions, create the partition bounds. */
+ if (next_index > 0)
+ {
+ /*
+ * Unlike the case of list partitioning, we wouldn't have re-merged
+ * partitions, so did_remapping should be left alone.
+ */
+ Assert(!outer_map.did_remapping);
+ Assert(!inner_map.did_remapping);
+
+ /* Use maps to match partitions from inputs. */
+ generate_matching_part_pairs(outer_rel, inner_rel,
+ &outer_map, &inner_map,
+ next_index,
+ outer_parts, inner_parts);
+ Assert(*outer_parts != NIL);
+ Assert(*inner_parts != NIL);
+ Assert(list_length(*outer_parts) == list_length(*inner_parts));
+ Assert(list_length(*outer_parts) == next_index);
+
+ /* Make a PartitionBoundInfo struct to return. */
+ merged_bounds = build_merged_partition_bounds(outer_bi->strategy,
+ merged_datums,
+ merged_kinds,
+ merged_indexes,
+ -1,
+ default_index);
+ Assert(merged_bounds);
+ }
+
+cleanup:
+ /* Free local memory before returning. */
+ list_free(merged_datums);
+ list_free(merged_kinds);
+ list_free(merged_indexes);
+ free_partition_map(&outer_map);
+ free_partition_map(&inner_map);
+
+ return merged_bounds;
+}
+
+/*
+ * init_partition_map
+ * Initialize a PartitionMap struct for given relation
+ */
+static void
+init_partition_map(RelOptInfo *rel, PartitionMap *map)
+{
+ int nparts = rel->nparts;
+ int i;
+
+ map->nparts = nparts;
+ map->merged_indexes = (int *) palloc(sizeof(int) * nparts);
+ map->merged = (bool *) palloc(sizeof(bool) * nparts);
+ map->did_remapping = false;
+ map->old_indexes = (int *) palloc(sizeof(int) * nparts);
+ for (i = 0; i < nparts; i++)
+ {
+ map->merged_indexes[i] = map->old_indexes[i] = -1;
+ map->merged[i] = false;
+ }
+}
+
+/*
+ * free_partition_map
+ */
+static void
+free_partition_map(PartitionMap *map)
+{
+ pfree(map->merged_indexes);
+ pfree(map->merged);
+ pfree(map->old_indexes);
+}
+
+/*
+ * is_dummy_partition --- has partition been proven empty?
+ */
+static bool
+is_dummy_partition(RelOptInfo *rel, int part_index)
+{
+ RelOptInfo *part_rel;
+
+ Assert(part_index >= 0);
+ part_rel = rel->part_rels[part_index];
+ if (part_rel == NULL || IS_DUMMY_REL(part_rel))
+ return true;
+ return false;
+}
+
+/*
+ * merge_matching_partitions
+ * Try to merge given outer/inner partitions, and return the index of a
+ * merged partition produced from them if successful, -1 otherwise
+ *
+ * If the merged partition is newly created, *next_index is incremented.
+ */
+static int
+merge_matching_partitions(PartitionMap *outer_map, PartitionMap *inner_map,
+ int outer_index, int inner_index, int *next_index)
+{
+ int outer_merged_index;
+ int inner_merged_index;
+ bool outer_merged;
+ bool inner_merged;
+
+ Assert(outer_index >= 0 && outer_index < outer_map->nparts);
+ outer_merged_index = outer_map->merged_indexes[outer_index];
+ outer_merged = outer_map->merged[outer_index];
+ Assert(inner_index >= 0 && inner_index < inner_map->nparts);
+ inner_merged_index = inner_map->merged_indexes[inner_index];
+ inner_merged = inner_map->merged[inner_index];
+
+ /*
+ * Handle cases where we have already assigned a merged partition to each
+ * of the given partitions.
+ */
+ if (outer_merged_index >= 0 && inner_merged_index >= 0)
+ {
+ /*
+ * If the mereged partitions are the same, no need to do anything;
+ * return the index of the merged partitions. Otherwise, if each of
+ * the given partitions has been merged with a dummy partition on the
+ * other side, re-map them to either of the two merged partitions.
+ * Otherwise, they can't be merged, so return -1.
+ */
+ if (outer_merged_index == inner_merged_index)
+ {
+ Assert(outer_merged);
+ Assert(inner_merged);
+ return outer_merged_index;
+ }
+ if (!outer_merged && !inner_merged)
+ {
+ /*
+ * This can only happen for a list-partitioning case. We re-map
+ * them to the merged partition with the smaller of the two merged
+ * indexes to preserve the property that the canonical order of
+ * list partitions is determined by the indexes assigned to the
+ * smallest list value of each partition.
+ */
+ if (outer_merged_index < inner_merged_index)
+ {
+ outer_map->merged[outer_index] = true;
+ inner_map->merged_indexes[inner_index] = outer_merged_index;
+ inner_map->merged[inner_index] = true;
+ inner_map->did_remapping = true;
+ inner_map->old_indexes[inner_index] = inner_merged_index;
+ return outer_merged_index;
+ }
+ else
+ {
+ inner_map->merged[inner_index] = true;
+ outer_map->merged_indexes[outer_index] = inner_merged_index;
+ outer_map->merged[outer_index] = true;
+ outer_map->did_remapping = true;
+ outer_map->old_indexes[outer_index] = outer_merged_index;
+ return inner_merged_index;
+ }
+ }
+ return -1;
+ }
+
+ /* At least one of the given partitions should not have yet been merged. */
+ Assert(outer_merged_index == -1 || inner_merged_index == -1);
+
+ /*
+ * If neither of them has been merged, merge them. Otherwise, if one has
+ * been merged with a dummy relation on the other side (and the other
+ * hasn't yet been merged with anything), re-merge them. Otherwise, they
+ * can't be merged, so return -1.
+ */
+ if (outer_merged_index == -1 && inner_merged_index == -1)
+ {
+ int merged_index = *next_index;
+
+ Assert(!outer_merged);
+ Assert(!inner_merged);
+ outer_map->merged_indexes[outer_index] = merged_index;
+ outer_map->merged[outer_index] = true;
+ inner_map->merged_indexes[inner_index] = merged_index;
+ inner_map->merged[inner_index] = true;
+ *next_index = *next_index + 1;
+ return merged_index;
+ }
+ if (outer_merged_index >= 0 && !outer_map->merged[outer_index])
+ {
+ Assert(inner_merged_index == -1);
+ Assert(!inner_merged);
+ inner_map->merged_indexes[inner_index] = outer_merged_index;
+ inner_map->merged[inner_index] = true;
+ outer_map->merged[outer_index] = true;
+ return outer_merged_index;
+ }
+ if (inner_merged_index >= 0 && !inner_map->merged[inner_index])
+ {
+ Assert(outer_merged_index == -1);
+ Assert(!outer_merged);
+ outer_map->merged_indexes[outer_index] = inner_merged_index;
+ outer_map->merged[outer_index] = true;
+ inner_map->merged[inner_index] = true;
+ return inner_merged_index;
+ }
+ return -1;
+}
+
+/*
+ * process_outer_partition
+ * Try to assign given outer partition a merged partition, and return the
+ * index of the merged partition if successful, -1 otherwise
+ *
+ * If the partition is newly created, *next_index is incremented. Also, if it
+ * is the default partition of the join relation, *default_index is set to the
+ * index if not already done.
+ */
+static int
+process_outer_partition(PartitionMap *outer_map,
+ PartitionMap *inner_map,
+ bool outer_has_default,
+ bool inner_has_default,
+ int outer_index,
+ int inner_default,
+ JoinType jointype,
+ int *next_index,
+ int *default_index)
+{
+ int merged_index = -1;
+
+ Assert(outer_index >= 0);
+
+ /*
+ * If the inner side has the default partition, a row from the outer
+ * partition might find its join partner in the default partition; try
+ * merging the outer partition with the default partition. Otherwise, this
+ * should be an outer join, in which case the outer partition has to be
+ * scanned all the way anyway; merge the outer partition with a dummy
+ * partition on the other side.
+ */
+ if (inner_has_default)
+ {
+ Assert(inner_default >= 0);
+
+ /*
+ * If the outer side has the default partition as well, the default
+ * partition on the inner side will have two matching partitions on the
+ * other side: the outer partition and the default partition on the
+ * outer side. Partitionwise join doesn't handle this scenario yet.
+ */
+ if (outer_has_default)
+ return -1;
+
+ merged_index = merge_matching_partitions(outer_map, inner_map,
+ outer_index, inner_default,
+ next_index);
+ if (merged_index == -1)
+ return -1;
+
+ /*
+ * If this is a FULL join, the default partition on the inner side
+ * has to be scanned all the way anyway, so the resulting partition
+ * will contain all key values from the default partition, which any
+ * other partition of the join relation will not contain. Thus the
+ * resulting partition will act as the default partition of the join
+ * relation; record the index in *default_index if not already done.
+ */
+ if (jointype == JOIN_FULL)
+ {
+ if (*default_index == -1)
+ *default_index = merged_index;
+ else
+ Assert(*default_index == merged_index);
+ }
+ }
+ else
+ {
+ Assert(IS_OUTER_JOIN(jointype));
+ Assert(jointype != JOIN_RIGHT);
+
+ /* If we have already assigned a partition, no need to do anything. */
+ merged_index = outer_map->merged_indexes[outer_index];
+ if (merged_index == -1)
+ merged_index = merge_partition_with_dummy(outer_map, outer_index,
+ next_index);
+ }
+ return merged_index;
+}
+
+/*
+ * process_inner_partition
+ * Try to assign given inner partition a merged partition, and return the
+ * index of the merged partition if successful, -1 otherwise
+ *
+ * If the partition is newly created, *next_index is incremented. Also, if it
+ * is the default partition of the join relation, *default_index is set to the
+ * index if not already done.
+ */
+static int
+process_inner_partition(PartitionMap *outer_map,
+ PartitionMap *inner_map,
+ bool outer_has_default,
+ bool inner_has_default,
+ int inner_index,
+ int outer_default,
+ JoinType jointype,
+ int *next_index,
+ int *default_index)
+{
+ int merged_index = -1;
+
+ Assert(inner_index >= 0);
+
+ /*
+ * If the outer side has the default partition, a row from the inner
+ * partition might find its join partner in the default partition; try
+ * merging the inner partition with the default partition. Otherwise, this
+ * should be a FULL join, in which case the inner partition has to be
+ * scanned all the way anyway; merge the inner partition with a dummy
+ * partition on the other side.
+ */
+ if (outer_has_default)
+ {
+ Assert(outer_default >= 0);
+
+ /*
+ * If the inner side has the default partition as well, the default
+ * partition on the outer side will have two matching partitions on the
+ * other side: the inner partition and the default partition on the
+ * inner side. Partitionwise join doesn't handle this scenario yet.
+ */
+ if (inner_has_default)
+ return -1;
+
+ merged_index = merge_matching_partitions(outer_map, inner_map,
+ outer_default, inner_index,
+ next_index);
+ if (merged_index == -1)
+ return -1;
+
+ /*
+ * If this is an outer join, the default partition on the outer side
+ * has to be scanned all the way anyway, so the resulting partition
+ * will contain all key values from the default partition, which any
+ * other partition of the join relation will not contain. Thus the
+ * resulting partition will act as the default partition of the join
+ * relation; record the index in *default_index if not already done.
+ */
+ if (IS_OUTER_JOIN(jointype))
+ {
+ Assert(jointype != JOIN_RIGHT);
+ if (*default_index == -1)
+ *default_index = merged_index;
+ else
+ Assert(*default_index == merged_index);
+ }
+ }
+ else
+ {
+ Assert(jointype == JOIN_FULL);
+
+ /* If we have already assigned a partition, no need to do anything. */
+ merged_index = inner_map->merged_indexes[inner_index];
+ if (merged_index == -1)
+ merged_index = merge_partition_with_dummy(inner_map, inner_index,
+ next_index);
+ }
+ return merged_index;
+}
+
+/*
+ * merge_null_partitions
+ * Merge the NULL partitions from a join's outer and inner sides.
+ *
+ * If the merged partition produced from them is the NULL partition of the join
+ * relation, *null_index is set to the index of the merged partition.
+ *
+ * Note: We assume here that the join clause for a partitioned join is strict
+ * because have_partkey_equi_join() requires that the corresponding operator
+ * be mergejoinable, and we currently assume that mergejoinable operators are
+ * strict (see MJEvalOuterValues()/MJEvalInnerValues()).
+ */
+static void
+merge_null_partitions(PartitionMap *outer_map,
+ PartitionMap *inner_map,
+ bool outer_has_null,
+ bool inner_has_null,
+ int outer_null,
+ int inner_null,
+ JoinType jointype,
+ int *next_index,
+ int *null_index)
+{
+ bool consider_outer_null = false;
+ bool consider_inner_null = false;
+
+ Assert(outer_has_null || inner_has_null);
+ Assert(*null_index == -1);
+
+ /*
+ * Check whether the NULL partitions have already been merged and if so,
+ * set the consider_outer_null/consider_inner_null flags.
+ */
+ if (outer_has_null)
+ {
+ Assert(outer_null >= 0 && outer_null < outer_map->nparts);
+ if (outer_map->merged_indexes[outer_null] == -1)
+ consider_outer_null = true;
+ }
+ if (inner_has_null)
+ {
+ Assert(inner_null >= 0 && inner_null < inner_map->nparts);
+ if (inner_map->merged_indexes[inner_null] == -1)
+ consider_inner_null = true;
+ }
+
+ /* If both flags are set false, we don't need to do anything. */
+ if (!consider_outer_null && !consider_inner_null)
+ return;
+
+ if (consider_outer_null && !consider_inner_null)
+ {
+ Assert(outer_has_null);
+
+ /*
+ * If this is an outer join, the NULL partition on the outer side has
+ * to be scanned all the way anyway; merge the NULL partition with a
+ * dummy partition on the other side. In that case consider_outer_null
+ * means that the NULL partition only contains NULL values as the key
+ * values, so the merged partition will do so; treat it as the NULL
+ * partition of the join relation.
+ */
+ if (IS_OUTER_JOIN(jointype))
+ {
+ Assert(jointype != JOIN_RIGHT);
+ *null_index = merge_partition_with_dummy(outer_map, outer_null,
+ next_index);
+ }
+ }
+ else if (!consider_outer_null && consider_inner_null)
+ {
+ Assert(inner_has_null);
+
+ /*
+ * If this is a FULL join, the NULL partition on the inner side has
+ * to be scanned all the way anyway; merge the NULL partition with a
+ * dummy partition on the other side. In that case consider_inner_null
+ * means that the NULL partition only contains NULL values as the key
+ * values, so the merged partition will do so; treat it as the NULL
+ * partition of the join relation.
+ */
+ if (jointype == JOIN_FULL)
+ *null_index = merge_partition_with_dummy(inner_map, inner_null,
+ next_index);
+ }
+ else
+ {
+ Assert(consider_outer_null && consider_inner_null);
+ Assert(outer_has_null);
+ Assert(inner_has_null);
+
+ /*
+ * If this is an outer join, the NULL partition on the outer side (and
+ * that on the inner side if this is a FULL join) have to be scanned
+ * all the way anyway, so merge them. Note that each of the NULL
+ * partitions isn't merged yet, so they should be merged successfully.
+ * Like the above, each of the NULL partitions only contains NULL
+ * values as the key values, so the merged partition will do so; treat
+ * it as the NULL partition of the join relation.
+ *
+ * Note: if this an INNER/SEMI join, the join clause will never be
+ * satisfied by two NULL values (see comments above), so both the NULL
+ * partitions can be eliminated.
+ */
+ if (IS_OUTER_JOIN(jointype))
+ {
+ Assert(jointype != JOIN_RIGHT);
+ *null_index = merge_matching_partitions(outer_map, inner_map,
+ outer_null, inner_null,
+ next_index);
+ Assert(*null_index >= 0);
+ }
+ }
+}
+
+/*
+ * merge_default_partitions
+ * Merge the default partitions from a join's outer and inner sides.
+ *
+ * If the merged partition produced from them is the default partition of the
+ * join relation, *default_index is set to the index of the merged partition.
+ */
+static void
+merge_default_partitions(PartitionMap *outer_map,
+ PartitionMap *inner_map,
+ bool outer_has_default,
+ bool inner_has_default,
+ int outer_default,
+ int inner_default,
+ JoinType jointype,
+ int *next_index,
+ int *default_index)
+{
+ int outer_merged_index = -1;
+ int inner_merged_index = -1;
+
+ Assert(outer_has_default || inner_has_default);
+
+ /* Get the merged partition indexes for the default partitions. */
+ if (outer_has_default)
+ {
+ Assert(outer_default >= 0 && outer_default < outer_map->nparts);
+ outer_merged_index = outer_map->merged_indexes[outer_default];
+ }
+ if (inner_has_default)
+ {
+ Assert(inner_default >= 0 && inner_default < inner_map->nparts);
+ inner_merged_index = inner_map->merged_indexes[inner_default];
+ }
+
+ if (outer_has_default && !inner_has_default)
+ {
+ /*
+ * If this is an outer join, the default partition on the outer side
+ * has to be scanned all the way anyway; if we have not yet assigned a
+ * partition, merge the default partition with a dummy partition on the
+ * other side. The merged partition will act as the default partition
+ * of the join relation (see comments in process_inner_partition()).
+ */
+ if (IS_OUTER_JOIN(jointype))
+ {
+ Assert(jointype != JOIN_RIGHT);
+ if (outer_merged_index == -1)
+ {
+ Assert(*default_index == -1);
+ *default_index = merge_partition_with_dummy(outer_map,
+ outer_default,
+ next_index);
+ }
+ else
+ Assert(*default_index == outer_merged_index);
+ }
+ else
+ Assert(*default_index == -1);
+ }
+ else if (!outer_has_default && inner_has_default)
+ {
+ /*
+ * If this is a FULL join, the default partition on the inner side
+ * has to be scanned all the way anyway; if we have not yet assigned a
+ * partition, merge the default partition with a dummy partition on the
+ * other side. The merged partition will act as the default partition
+ * of the join relation (see comments in process_outer_partition()).
+ */
+ if (jointype == JOIN_FULL)
+ {
+ if (inner_merged_index == -1)
+ {
+ Assert(*default_index == -1);
+ *default_index = merge_partition_with_dummy(inner_map,
+ inner_default,
+ next_index);
+ }
+ else
+ Assert(*default_index == inner_merged_index);
+ }
+ else
+ Assert(*default_index == -1);
+ }
+ else
+ {
+ Assert(outer_has_default && inner_has_default);
+
+ /*
+ * The default partitions have to be joined with each other, so merge
+ * them. Note that each of the default partitions isn't merged yet
+ * (see, process_outer_partition()/process_innerer_partition()), so
+ * they should be merged successfully. The merged partition will act
+ * as the default partition of the join relation.
+ */
+ Assert(outer_merged_index == -1);
+ Assert(inner_merged_index == -1);
+ Assert(*default_index == -1);
+ *default_index = merge_matching_partitions(outer_map,
+ inner_map,
+ outer_default,
+ inner_default,
+ next_index);
+ Assert(*default_index >= 0);
+ }
+}
+
+/*
+ * merge_partition_with_dummy
+ * Assign given partition a new partition of a join relation
+ *
+ * Note: The caller assumes that the given partition doesn't have a non-dummy
+ * matching partition on the other side, but if the given partition finds the
+ * matching partition later, we will adjust the assignment.
+ */
+static int
+merge_partition_with_dummy(PartitionMap *map, int index, int *next_index)
+{
+ int merged_index = *next_index;
+
+ Assert(index >= 0 && index < map->nparts);
+ Assert(map->merged_indexes[index] == -1);
+ Assert(!map->merged[index]);
+ map->merged_indexes[index] = merged_index;
+ /* Leave the merged flag alone! */
+ *next_index = *next_index + 1;
+ return merged_index;
+}
+
+/*
+ * fix_merged_indexes
+ * Adjust merged indexes of re-merged partitions
+ */
+static void
+fix_merged_indexes(PartitionMap *outer_map, PartitionMap *inner_map,
+ int nmerged, List *merged_indexes)
+{
+ int *new_indexes;
+ int merged_index;
+ int i;
+ ListCell *lc;
+
+ Assert(nmerged > 0);
+
+ new_indexes = (int *) palloc(sizeof(int) * nmerged);
+ for (i = 0; i < nmerged; i++)
+ new_indexes[i] = -1;
+
+ /* Build the mapping of old merged indexes to new merged indexes. */
+ if (outer_map->did_remapping)
+ {
+ for (i = 0; i < outer_map->nparts; i++)
+ {
+ merged_index = outer_map->old_indexes[i];
+ if (merged_index >= 0)
+ new_indexes[merged_index] = outer_map->merged_indexes[i];
+ }
+ }
+ if (inner_map->did_remapping)
+ {
+ for (i = 0; i < inner_map->nparts; i++)
+ {
+ merged_index = inner_map->old_indexes[i];
+ if (merged_index >= 0)
+ new_indexes[merged_index] = inner_map->merged_indexes[i];
+ }
+ }
+
+ /* Fix the merged_indexes list using the mapping. */
+ foreach(lc, merged_indexes)
+ {
+ merged_index = lfirst_int(lc);
+ Assert(merged_index >= 0);
+ if (new_indexes[merged_index] >= 0)
+ lfirst_int(lc) = new_indexes[merged_index];
+ }
+
+ pfree(new_indexes);
+}
+
+/*
+ * generate_matching_part_pairs
+ * Generate a pair of lists of partitions that produce merged partitions
+ *
+ * The lists of partitions are built in the order of merged partition indexes,
+ * and returned in *outer_parts and *inner_parts.
+ */
+static void
+generate_matching_part_pairs(RelOptInfo *outer_rel, RelOptInfo *inner_rel,
+ PartitionMap *outer_map, PartitionMap *inner_map,
+ int nmerged,
+ List **outer_parts, List **inner_parts)
+{
+ int outer_nparts = outer_map->nparts;
+ int inner_nparts = inner_map->nparts;
+ int *outer_indexes;
+ int *inner_indexes;
+ int max_nparts;
+ int i;
+
+ Assert(nmerged > 0);
+ Assert(*outer_parts == NIL);
+ Assert(*inner_parts == NIL);
+
+ outer_indexes = (int *) palloc(sizeof(int) * nmerged);
+ inner_indexes = (int *) palloc(sizeof(int) * nmerged);
+ for (i = 0; i < nmerged; i++)
+ outer_indexes[i] = inner_indexes[i] = -1;
+
+ /* Set pairs of matching partitions. */
+ Assert(outer_nparts == outer_rel->nparts);
+ Assert(inner_nparts == inner_rel->nparts);
+ max_nparts = Max(outer_nparts, inner_nparts);
+ for (i = 0; i < max_nparts; i++)
+ {
+ if (i < outer_nparts)
+ {
+ int merged_index = outer_map->merged_indexes[i];
+
+ if (merged_index >= 0)
+ {
+ Assert(merged_index < nmerged);
+ outer_indexes[merged_index] = i;
+ }
+ }
+ if (i < inner_nparts)
+ {
+ int merged_index = inner_map->merged_indexes[i];
+
+ if (merged_index >= 0)
+ {
+ Assert(merged_index < nmerged);
+ inner_indexes[merged_index] = i;
+ }
+ }
+ }
+
+ /* Build the list pairs. */
+ for (i = 0; i < nmerged; i++)
+ {
+ int outer_index = outer_indexes[i];
+ int inner_index = inner_indexes[i];
+
+ /*
+ * If both partitions are dummy, it means the merged partition that had
+ * been assigned to the outer/inner partition was removed when
+ * re-merging the outer/inner partition in merge_matching_partitions();
+ * ignore the merged partition.
+ */
+ if (outer_index == -1 && inner_index == -1)
+ continue;
+
+ *outer_parts = lappend(*outer_parts, outer_index >= 0 ?
+ outer_rel->part_rels[outer_index] : NULL);
+ *inner_parts = lappend(*inner_parts, inner_index >= 0 ?
+ inner_rel->part_rels[inner_index] : NULL);
+ }
+
+ pfree(outer_indexes);
+ pfree(inner_indexes);
+}
+
+/*
+ * build_merged_partition_bounds
+ * Create a PartitionBoundInfo struct from merged partition bounds
+ */
+static PartitionBoundInfo
+build_merged_partition_bounds(char strategy, List *merged_datums,
+ List *merged_kinds, List *merged_indexes,
+ int null_index, int default_index)
+{
+ PartitionBoundInfo merged_bounds;
+ int ndatums = list_length(merged_datums);
+ int pos;
+ ListCell *lc;
+
+ merged_bounds = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData));
+ merged_bounds->strategy = strategy;
+ merged_bounds->ndatums = ndatums;
+
+ merged_bounds->datums = (Datum **) palloc(sizeof(Datum *) * ndatums);
+ pos = 0;
+ foreach(lc, merged_datums)
+ merged_bounds->datums[pos++] = (Datum *) lfirst(lc);
+
+ if (strategy == PARTITION_STRATEGY_RANGE)
+ {
+ Assert(list_length(merged_kinds) == ndatums);
+ merged_bounds->kind = (PartitionRangeDatumKind **)
+ palloc(sizeof(PartitionRangeDatumKind *) * ndatums);
+ pos = 0;
+ foreach(lc, merged_kinds)
+ merged_bounds->kind[pos++] = (PartitionRangeDatumKind *) lfirst(lc);
+
+ /* There are ndatums+1 indexes in the case of range partitioning. */
+ merged_indexes = lappend_int(merged_indexes, -1);
+ ndatums++;
+ }
+ else
+ {
+ Assert(strategy == PARTITION_STRATEGY_LIST);
+ Assert(merged_kinds == NIL);
+ merged_bounds->kind = NULL;
+ }
+
+ Assert(list_length(merged_indexes) == ndatums);
+ merged_bounds->indexes = (int *) palloc(sizeof(int) * ndatums);
+ pos = 0;
+ foreach(lc, merged_indexes)
+ merged_bounds->indexes[pos++] = lfirst_int(lc);
+
+ merged_bounds->null_index = null_index;
+ merged_bounds->default_index = default_index;
+
+ return merged_bounds;
+}
+
+/*
+ * get_range_partition
+ * Get the next non-dummy partition of a range-partitioned relation,
+ * returning the index of that partition
+ *
+ * *lb and *ub are set to the lower and upper bounds of that partition
+ * respectively, and *lb_pos is advanced to the next lower bound, if any.
+ */
+static int
+get_range_partition(RelOptInfo *rel,
+ PartitionBoundInfo bi,
+ int *lb_pos,
+ PartitionRangeBound *lb,
+ PartitionRangeBound *ub)
+{
+ int part_index;
+
+ Assert(bi->strategy == PARTITION_STRATEGY_RANGE);
+
+ do {
+ part_index = get_range_partition_internal(bi, lb_pos, lb, ub);
+ if (part_index == -1)
+ return -1;
+ } while (is_dummy_partition(rel, part_index));
+
+ return part_index;
+}
+
+static int
+get_range_partition_internal(PartitionBoundInfo bi,
+ int *lb_pos,
+ PartitionRangeBound *lb,
+ PartitionRangeBound *ub)
+{
+ /* Return the index as -1 if we've exhausted all lower bounds. */
+ if (*lb_pos >= bi->ndatums)
+ return -1;
+
+ /* A lower bound should have at least one more bound after it. */
+ Assert(*lb_pos + 1 < bi->ndatums);
+
+ /* Set the lower bound. */
+ lb->index = bi->indexes[*lb_pos];
+ lb->datums = bi->datums[*lb_pos];
+ lb->kind = bi->kind[*lb_pos];
+ lb->lower = true;
+ /* Set the upper bound. */
+ ub->index = bi->indexes[*lb_pos + 1];
+ ub->datums = bi->datums[*lb_pos + 1];
+ ub->kind = bi->kind[*lb_pos + 1];
+ ub->lower = false;
+
+ /* The index assigned to an upper bound should be valid. */
+ Assert(ub->index >= 0);
+
+ /*
+ * Advance the position to the next lower bound. If there are no bounds
+ * left beyond the upper bound, we have reached the last lower bound.
+ */
+ if (*lb_pos + 2 >= bi->ndatums)
+ *lb_pos = bi->ndatums;
+ else
+ {
+ /*
+ * If the index assigned to the bound next to the upper bound isn't
+ * valid, that is the next lower bound; else, the upper bound is also
+ * the lower bound of the next range partition.
+ */
+ if (bi->indexes[*lb_pos + 2] < 0)
+ *lb_pos = *lb_pos + 2;
+ else
+ *lb_pos = *lb_pos + 1;
+ }
+
+ return ub->index;
+}
+
+/*
+ * compare_range_partitions
+ * Compare the bounds of two range partitions, and return true if the
+ * two partitions overlap, false otherwise
+ *
+ * *lb_cmpval is set to -1, 0, or 1 if the outer partition's lower bound is
+ * lower than, equal to, or higher than the inner partition's lower bound
+ * respectively. Likewise, *ub_cmpval is set to -1, 0, or 1 if the outer
+ * partition's upper bound is lower than, equal to, or higher than the inner
+ * partition's upper bound respectively.
+ */
+static bool
+compare_range_partitions(int partnatts, FmgrInfo *partsupfuncs,
+ Oid *partcollations,
+ PartitionRangeBound *outer_lb,
+ PartitionRangeBound *outer_ub,
+ PartitionRangeBound *inner_lb,
+ PartitionRangeBound *inner_ub,
+ int *lb_cmpval, int *ub_cmpval)
+{
+ /*
+ * Check if the outer partition's upper bound is lower than the inner
+ * partition's lower bound; if so the partitions aren't overlapping.
+ */
+ if (compare_range_bounds(partnatts, partsupfuncs, partcollations,
+ outer_ub, inner_lb) < 0)
+ {
+ *lb_cmpval = -1;
+ *ub_cmpval = -1;
+ return false;
+ }
+
+ /*
+ * Check if the outer partition's lower bound is higher than the inner
+ * partition's upper bound; if so the partitions aren't overlapping.
+ */
+ if (compare_range_bounds(partnatts, partsupfuncs, partcollations,
+ outer_lb, inner_ub) > 0)
+ {
+ *lb_cmpval = 1;
+ *ub_cmpval = 1;
+ return false;
+ }
+
+ /* All other cases indicate overlapping partitions. */
+ *lb_cmpval = compare_range_bounds(partnatts, partsupfuncs, partcollations,
+ outer_lb, inner_lb);
+ *ub_cmpval = compare_range_bounds(partnatts, partsupfuncs, partcollations,
+ outer_ub, inner_ub);
+ return true;
+}
+
+/*
+ * get_merged_range_bounds
+ * Given the bounds of range partitions to be joined, determine the bounds
+ * of a merged partition produced from the range partitions
+ *
+ * *merged_lb and *merged_ub are set to the lower and upper bounds of the
+ * merged partition.
+ */
+static void
+get_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
+ Oid *partcollations, JoinType jointype,
+ PartitionRangeBound *outer_lb,
+ PartitionRangeBound *outer_ub,
+ PartitionRangeBound *inner_lb,
+ PartitionRangeBound *inner_ub,
+ int lb_cmpval, int ub_cmpval,
+ PartitionRangeBound *merged_lb,
+ PartitionRangeBound *merged_ub)
+{
+ Assert(compare_range_bounds(partnatts, partsupfuncs, partcollations,
+ outer_lb, inner_lb) == lb_cmpval);
+ Assert(compare_range_bounds(partnatts, partsupfuncs, partcollations,
+ outer_ub, inner_ub) == ub_cmpval);
+
+ switch (jointype)
+ {
+ case JOIN_INNER:
+ case JOIN_SEMI:
+
+ /*
+ * An INNER/SEMI join will have the rows that fit both sides, so
+ * the lower bound of the merged partition will be the higher of
+ * the two lower bounds, and the upper bound of the merged
+ * partition will be the lower of the two upper bounds.
+ */
+ *merged_lb = (lb_cmpval > 0) ? *outer_lb : *inner_lb;
+ *merged_ub = (ub_cmpval < 0) ? *outer_ub : *inner_ub;
+ break;
+
+ case JOIN_LEFT:
+ case JOIN_ANTI:
+
+ /*
+ * A LEFT/ANTI join will have all the rows from the outer side, so
+ * the bounds of the merged partition will be the same as the outer
+ * bounds.
+ */
+ *merged_lb = *outer_lb;
+ *merged_ub = *outer_ub;
+ break;
+
+ case JOIN_FULL:
+
+ /*
+ * A FULL join will have all the rows from both sides, so the lower
+ * bound of the merged partition will be the lower of the two lower
+ * bounds, and the upper bound of the merged partition will be the
+ * higher of the two upper bounds.
+ */
+ *merged_lb = (lb_cmpval < 0) ? *outer_lb : *inner_lb;
+ *merged_ub = (ub_cmpval > 0) ? *outer_ub : *inner_ub;
+ break;
+
+ default:
+ elog(ERROR, "unrecognized join type: %d", (int) jointype);
+ }
+}
+
+/*
+ * add_merged_range_bounds
+ * Add the bounds of a merged partition to the lists of range bounds
+ */
+static void
+add_merged_range_bounds(int partnatts, FmgrInfo *partsupfuncs,
+ Oid *partcollations,
+ PartitionRangeBound *merged_lb,
+ PartitionRangeBound *merged_ub,
+ int merged_index,
+ List **merged_datums,
+ List **merged_kinds,
+ List **merged_indexes)
+{
+ int cmpval;
+
+ if (!*merged_datums)
+ {
+ /* First merged partition */
+ Assert(!*merged_kinds);
+ Assert(!*merged_indexes);
+ cmpval = 1;
+ }
+ else
+ {
+ PartitionRangeBound prev_ub;
+
+ Assert(*merged_datums);
+ Assert(*merged_kinds);
+ Assert(*merged_indexes);
+
+ /* Get the last upper bound. */
+ prev_ub.index = llast_int(*merged_indexes);
+ prev_ub.datums = (Datum *) llast(*merged_datums);
+ prev_ub.kind = (PartitionRangeDatumKind *) llast(*merged_kinds);
+ prev_ub.lower = false;
+
+ /*
+ * We pass to partition_rbound_cmp() lower1 as false to prevent it
+ * from considering the last upper bound to be smaller than the lower
+ * bound of the merged partition when the values of the two range
+ * bounds compare equal.
+ */
+ cmpval = partition_rbound_cmp(partnatts, partsupfuncs, partcollations,
+ merged_lb->datums, merged_lb->kind,
+ false, &prev_ub);
+ Assert(cmpval >= 0);
+ }
+
+ /*
+ * If the lower bound is higher than the last upper bound, add the lower
+ * bound with the index as -1 indicating that that is a lower bound; else,
+ * the last upper bound will be reused as the lower bound of the merged
+ * partition, so skip this.
+ */
+ if (cmpval > 0)
+ {
+ *merged_datums = lappend(*merged_datums, merged_lb->datums);
+ *merged_kinds = lappend(*merged_kinds, merged_lb->kind);
+ *merged_indexes = lappend_int(*merged_indexes, -1);
+ }
+
+ /* Add the upper bound and index of the merged partition. */
+ *merged_datums = lappend(*merged_datums, merged_ub->datums);
+ *merged_kinds = lappend(*merged_kinds, merged_ub->kind);
+ *merged_indexes = lappend_int(*merged_indexes, merged_index);
+}
+
+/*
* partitions_are_ordered
* Determine whether the partitions described by 'boundinfo' are ordered,
* that is partitions appearing earlier in the PartitionDesc sequence