diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/optimizer/util/plancat.c | 159 | ||||
-rw-r--r-- | src/backend/optimizer/util/relnode.c | 37 | ||||
-rw-r--r-- | src/include/nodes/relation.h | 56 |
3 files changed, 249 insertions, 3 deletions
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c index a1ebd4acc81..cac46bedf9e 100644 --- a/src/backend/optimizer/util/plancat.c +++ b/src/backend/optimizer/util/plancat.c @@ -68,6 +68,10 @@ static List *get_relation_constraints(PlannerInfo *root, static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index, Relation heapRelation); static List *get_relation_statistics(RelOptInfo *rel, Relation relation); +static void set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel, + Relation relation); +static PartitionScheme find_partition_scheme(PlannerInfo *root, Relation rel); +static List **build_baserel_partition_key_exprs(Relation relation, Index varno); /* * get_relation_info - @@ -420,6 +424,13 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent, /* Collect info about relation's foreign keys, if relevant */ get_relation_foreign_keys(root, rel, relation, inhparent); + /* + * Collect info about relation's partitioning scheme, if any. Only + * inheritance parents may be partitioned. + */ + if (inhparent && relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) + set_relation_partition_info(root, rel, relation); + heap_close(relation, NoLock); /* @@ -1802,3 +1813,151 @@ has_row_triggers(PlannerInfo *root, Index rti, CmdType event) heap_close(relation, NoLock); return result; } + +/* + * set_relation_partition_info + * + * Set partitioning scheme and related information for a partitioned table. + */ +static void +set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel, + Relation relation) +{ + PartitionDesc partdesc; + + Assert(relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); + + partdesc = RelationGetPartitionDesc(relation); + rel->part_scheme = find_partition_scheme(root, relation); + Assert(partdesc != NULL && rel->part_scheme != NULL); + rel->boundinfo = partdesc->boundinfo; + rel->nparts = partdesc->nparts; + rel->partexprs = build_baserel_partition_key_exprs(relation, rel->relid); +} + +/* + * find_partition_scheme + * + * Find or create a PartitionScheme for this Relation. + */ +static PartitionScheme +find_partition_scheme(PlannerInfo *root, Relation relation) +{ + PartitionKey partkey = RelationGetPartitionKey(relation); + ListCell *lc; + int partnatts; + PartitionScheme part_scheme; + + /* A partitioned table should have a partition key. */ + Assert(partkey != NULL); + + partnatts = partkey->partnatts; + + /* Search for a matching partition scheme and return if found one. */ + foreach(lc, root->part_schemes) + { + part_scheme = lfirst(lc); + + /* Match partitioning strategy and number of keys. */ + if (partkey->strategy != part_scheme->strategy || + partnatts != part_scheme->partnatts) + continue; + + /* Match the partition key types. */ + if (memcmp(partkey->partopfamily, part_scheme->partopfamily, + sizeof(Oid) * partnatts) != 0 || + memcmp(partkey->partopcintype, part_scheme->partopcintype, + sizeof(Oid) * partnatts) != 0 || + memcmp(partkey->parttypcoll, part_scheme->parttypcoll, + sizeof(Oid) * partnatts) != 0) + continue; + + /* + * Length and byval information should match when partopcintype + * matches. + */ + Assert(memcmp(partkey->parttyplen, part_scheme->parttyplen, + sizeof(int16) * partnatts) == 0); + Assert(memcmp(partkey->parttypbyval, part_scheme->parttypbyval, + sizeof(bool) * partnatts) == 0); + + /* Found matching partition scheme. */ + return part_scheme; + } + + /* + * Did not find matching partition scheme. Create one copying relevant + * information from the relcache. Instead of copying whole arrays, copy + * the pointers in relcache. It's safe to do so since + * RelationClearRelation() wouldn't change it while planner is using it. + */ + part_scheme = (PartitionScheme) palloc0(sizeof(PartitionSchemeData)); + part_scheme->strategy = partkey->strategy; + part_scheme->partnatts = partkey->partnatts; + part_scheme->partopfamily = partkey->partopfamily; + part_scheme->partopcintype = partkey->partopcintype; + part_scheme->parttypcoll = partkey->parttypcoll; + part_scheme->parttyplen = partkey->parttyplen; + part_scheme->parttypbyval = partkey->parttypbyval; + + /* Add the partitioning scheme to PlannerInfo. */ + root->part_schemes = lappend(root->part_schemes, part_scheme); + + return part_scheme; +} + +/* + * build_baserel_partition_key_exprs + * + * Collects partition key expressions for a given base relation. Any single + * column partition keys are converted to Var nodes. All Var nodes are set + * to the given varno. The partition key expressions are returned as an array + * of single element lists to be stored in RelOptInfo of the base relation. + */ +static List ** +build_baserel_partition_key_exprs(Relation relation, Index varno) +{ + PartitionKey partkey = RelationGetPartitionKey(relation); + int partnatts; + int cnt; + List **partexprs; + ListCell *lc; + + /* A partitioned table should have a partition key. */ + Assert(partkey != NULL); + + partnatts = partkey->partnatts; + partexprs = (List **) palloc(sizeof(List *) * partnatts); + lc = list_head(partkey->partexprs); + + for (cnt = 0; cnt < partnatts; cnt++) + { + Expr *partexpr; + AttrNumber attno = partkey->partattrs[cnt]; + + if (attno != InvalidAttrNumber) + { + /* Single column partition key is stored as a Var node. */ + Assert(attno > 0); + + partexpr = (Expr *) makeVar(varno, attno, + partkey->parttypid[cnt], + partkey->parttypmod[cnt], + partkey->parttypcoll[cnt], 0); + } + else + { + if (lc == NULL) + elog(ERROR, "wrong number of partition key expressions"); + + /* Re-stamp the expression with given varno. */ + partexpr = (Expr *) copyObject(lfirst(lc)); + ChangeVarNodes((Node *) partexpr, 1, varno, 0); + lc = lnext(lc); + } + + partexprs[cnt] = list_make1(partexpr); + } + + return partexprs; +} diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index c7b2695ebb3..077e89ae435 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -146,6 +146,11 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->baserestrict_min_security = UINT_MAX; rel->joininfo = NIL; rel->has_eclass_joins = false; + rel->part_scheme = NULL; + rel->nparts = 0; + rel->boundinfo = NULL; + rel->part_rels = NULL; + rel->partexprs = NULL; /* * Pass top parent's relids down the inheritance hierarchy. If the parent @@ -218,18 +223,41 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) if (rte->inh) { ListCell *l; + int nparts = rel->nparts; + int cnt_parts = 0; + + if (nparts > 0) + rel->part_rels = (RelOptInfo **) + palloc(sizeof(RelOptInfo *) * nparts); foreach(l, root->append_rel_list) { AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l); + RelOptInfo *childrel; /* append_rel_list contains all append rels; ignore others */ if (appinfo->parent_relid != relid) continue; - (void) build_simple_rel(root, appinfo->child_relid, - rel); + childrel = build_simple_rel(root, appinfo->child_relid, + rel); + + /* Nothing more to do for an unpartitioned table. */ + if (!rel->part_scheme) + continue; + + /* + * The order of partition OIDs in append_rel_list is the same as + * the order in the PartitionDesc, so the order of part_rels will + * also match the PartitionDesc. See expand_partitioned_rtentry. + */ + Assert(cnt_parts < nparts); + rel->part_rels[cnt_parts] = childrel; + cnt_parts++; } + + /* We should have seen all the child partitions. */ + Assert(cnt_parts == nparts); } return rel; @@ -527,6 +555,11 @@ build_join_rel(PlannerInfo *root, joinrel->joininfo = NIL; joinrel->has_eclass_joins = false; joinrel->top_parent_relids = NULL; + joinrel->part_scheme = NULL; + joinrel->nparts = 0; + joinrel->boundinfo = NULL; + joinrel->part_rels = NULL; + joinrel->partexprs = NULL; /* Compute information relevant to the foreign relations. */ set_foreign_rel_properties(joinrel, outer_rel, inner_rel); diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index d50ff556819..48e6012f7fe 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -266,6 +266,9 @@ typedef struct PlannerInfo List *distinct_pathkeys; /* distinctClause pathkeys, if any */ List *sort_pathkeys; /* sortClause pathkeys, if any */ + List *part_schemes; /* Canonicalised partition schemes used in the + * query. */ + List *initial_rels; /* RelOptInfos we are now trying to join */ /* Use fetch_upper_rel() to get any particular upper rel */ @@ -326,6 +329,34 @@ typedef struct PlannerInfo ((root)->simple_rte_array ? (root)->simple_rte_array[rti] : \ rt_fetch(rti, (root)->parse->rtable)) +/* + * If multiple relations are partitioned the same way, all such partitions + * will have a pointer to the same PartitionScheme. A list of PartitionScheme + * objects is attached to the PlannerInfo. By design, the partition scheme + * incorporates only the general properties of the partition method (LIST vs. + * RANGE, number of partitioning columns and the type information for each) + * and not the specific bounds. + * + * We store the opclass-declared input data types instead of the partition key + * datatypes since the former rather than the latter are used to compare + * partition bounds. Since partition key data types and the opclass declared + * input data types are expected to be binary compatible (per ResolveOpClass), + * both of those should have same byval and length properties. + */ +typedef struct PartitionSchemeData +{ + char strategy; /* partition strategy */ + int16 partnatts; /* number of partition attributes */ + Oid *partopfamily; /* OIDs of operator families */ + Oid *partopcintype; /* OIDs of opclass declared input data types */ + Oid *parttypcoll; /* OIDs of collations of partition keys. */ + + /* Cached information about partition key data types. */ + int16 *parttyplen; + bool *parttypbyval; +} PartitionSchemeData; + +typedef struct PartitionSchemeData *PartitionScheme; /*---------- * RelOptInfo @@ -456,7 +487,7 @@ typedef struct PlannerInfo * other rels for which we have tried and failed to prove * this one unique * - * The presence of the remaining fields depends on the restrictions + * The presence of the following fields depends on the restrictions * and joins that the relation participates in: * * baserestrictinfo - List of RestrictInfo nodes, containing info about @@ -487,6 +518,21 @@ typedef struct PlannerInfo * We store baserestrictcost in the RelOptInfo (for base relations) because * we know we will need it at least once (to price the sequential scan) * and may need it multiple times to price index scans. + * + * If the relation is partitioned, these fields will be set: + * + * part_scheme - Partitioning scheme of the relation + * boundinfo - Partition bounds + * nparts - Number of partitions + * part_rels - RelOptInfos for each partition + * partexprs - Partition key expressions + * + * Note: A base relation always has only one set of partition keys, but a join + * relation may have as many sets of partition keys as the number of relations + * being joined. partexprs is an array containing part_scheme->partnatts + * elements, each of which is a list of partition key expressions. For a base + * relation each list contains only one expression, but for a join relation + * there can be one per baserel. *---------- */ typedef enum RelOptKind @@ -592,6 +638,14 @@ typedef struct RelOptInfo /* used by "other" relations */ Relids top_parent_relids; /* Relids of topmost parents */ + + /* used for partitioned relations */ + PartitionScheme part_scheme; /* Partitioning scheme. */ + int nparts; /* number of partitions */ + struct PartitionBoundInfoData *boundinfo; /* Partition bounds */ + struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions, + * stored in the same order of bounds */ + List **partexprs; /* Partition key expressions. */ } RelOptInfo; /* |