diff options
Diffstat (limited to 'src/backend/utils')
-rw-r--r-- | src/backend/utils/adt/ruleutils.c | 2 | ||||
-rw-r--r-- | src/backend/utils/cache/Makefile | 6 | ||||
-rw-r--r-- | src/backend/utils/cache/partcache.c | 967 | ||||
-rw-r--r-- | src/backend/utils/cache/relcache.c | 208 | ||||
-rw-r--r-- | src/backend/utils/misc/pg_controldata.c | 5 |
5 files changed, 976 insertions, 212 deletions
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index 99643e83b25..74e1cd8afb1 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -24,7 +24,6 @@ #include "access/sysattr.h" #include "catalog/dependency.h" #include "catalog/indexing.h" -#include "catalog/partition.h" #include "catalog/pg_aggregate.h" #include "catalog/pg_am.h" #include "catalog/pg_authid.h" @@ -64,6 +63,7 @@ #include "utils/guc.h" #include "utils/hsearch.h" #include "utils/lsyscache.h" +#include "utils/partcache.h" #include "utils/rel.h" #include "utils/ruleutils.h" #include "utils/snapmgr.h" diff --git a/src/backend/utils/cache/Makefile b/src/backend/utils/cache/Makefile index a943f8ea4bc..e6a6b9f395f 100644 --- a/src/backend/utils/cache/Makefile +++ b/src/backend/utils/cache/Makefile @@ -12,8 +12,8 @@ subdir = src/backend/utils/cache top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = attoptcache.o catcache.o evtcache.o inval.o plancache.o relcache.o \ - relmapper.o relfilenodemap.o spccache.o syscache.o lsyscache.o \ - typcache.o ts_cache.o +OBJS = attoptcache.o catcache.o evtcache.o inval.o lsyscache.o \ + partcache.o plancache.o relcache.o relmapper.o relfilenodemap.o \ + spccache.o syscache.o ts_cache.o typcache.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/utils/cache/partcache.c b/src/backend/utils/cache/partcache.c new file mode 100644 index 00000000000..e2f677a46a4 --- /dev/null +++ b/src/backend/utils/cache/partcache.c @@ -0,0 +1,967 @@ +/*------------------------------------------------------------------------- + * + * partcache.c + * Support routines for manipulating partition information cached in + * relcache + * + * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/cache/partcache.c + * + *------------------------------------------------------------------------- +*/ +#include "postgres.h" + +#include "access/hash.h" +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "catalog/partition.h" +#include "catalog/pg_inherits.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_partitioned_table.h" +#include "miscadmin.h" +#include "nodes/makefuncs.h" +#include "nodes/nodeFuncs.h" +#include "optimizer/clauses.h" +#include "optimizer/planner.h" +#include "partitioning/partbounds.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/lsyscache.h" +#include "utils/memutils.h" +#include "utils/partcache.h" +#include "utils/rel.h" +#include "utils/syscache.h" + + +static List *generate_partition_qual(Relation rel); +static int32 qsort_partition_hbound_cmp(const void *a, const void *b); +static int32 qsort_partition_list_value_cmp(const void *a, const void *b, + void *arg); +static int32 qsort_partition_rbound_cmp(const void *a, const void *b, + void *arg); + + +/* + * RelationBuildPartitionKey + * Build and attach to relcache partition key data of relation + * + * Partitioning key data is a complex structure; to avoid complicated logic to + * free individual elements whenever the relcache entry is flushed, we give it + * its own memory context, child of CacheMemoryContext, which can easily be + * deleted on its own. To avoid leaking memory in that context in case of an + * error partway through this function, the context is initially created as a + * child of CurTransactionContext and only re-parented to CacheMemoryContext + * at the end, when no further errors are possible. Also, we don't make this + * context the current context except in very brief code sections, out of fear + * that some of our callees allocate memory on their own which would be leaked + * permanently. + */ +void +RelationBuildPartitionKey(Relation relation) +{ + Form_pg_partitioned_table form; + HeapTuple tuple; + bool isnull; + int i; + PartitionKey key; + AttrNumber *attrs; + oidvector *opclass; + oidvector *collation; + ListCell *partexprs_item; + Datum datum; + MemoryContext partkeycxt, + oldcxt; + int16 procnum; + + tuple = SearchSysCache1(PARTRELID, + ObjectIdGetDatum(RelationGetRelid(relation))); + + /* + * The following happens when we have created our pg_class entry but not + * the pg_partitioned_table entry yet. + */ + if (!HeapTupleIsValid(tuple)) + return; + + partkeycxt = AllocSetContextCreate(CurTransactionContext, + "partition key", + ALLOCSET_SMALL_SIZES); + MemoryContextCopyAndSetIdentifier(partkeycxt, + RelationGetRelationName(relation)); + + key = (PartitionKey) MemoryContextAllocZero(partkeycxt, + sizeof(PartitionKeyData)); + + /* Fixed-length attributes */ + form = (Form_pg_partitioned_table) GETSTRUCT(tuple); + key->strategy = form->partstrat; + key->partnatts = form->partnatts; + + /* + * We can rely on the first variable-length attribute being mapped to the + * relevant field of the catalog's C struct, because all previous + * attributes are non-nullable and fixed-length. + */ + attrs = form->partattrs.values; + + /* But use the hard way to retrieve further variable-length attributes */ + /* Operator class */ + datum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partclass, &isnull); + Assert(!isnull); + opclass = (oidvector *) DatumGetPointer(datum); + + /* Collation */ + datum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partcollation, &isnull); + Assert(!isnull); + collation = (oidvector *) DatumGetPointer(datum); + + /* Expressions */ + datum = SysCacheGetAttr(PARTRELID, tuple, + Anum_pg_partitioned_table_partexprs, &isnull); + if (!isnull) + { + char *exprString; + Node *expr; + + exprString = TextDatumGetCString(datum); + expr = stringToNode(exprString); + pfree(exprString); + + /* + * Run the expressions through const-simplification since the planner + * will be comparing them to similarly-processed qual clause operands, + * and may fail to detect valid matches without this step; fix + * opfuncids while at it. We don't need to bother with + * canonicalize_qual() though, because partition expressions should be + * in canonical form already (ie, no need for OR-merging or constant + * elimination). + */ + expr = eval_const_expressions(NULL, expr); + fix_opfuncids(expr); + + oldcxt = MemoryContextSwitchTo(partkeycxt); + key->partexprs = (List *) copyObject(expr); + MemoryContextSwitchTo(oldcxt); + } + + oldcxt = MemoryContextSwitchTo(partkeycxt); + key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber)); + key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo)); + + key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + + /* Gather type and collation info as well */ + key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32)); + key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16)); + key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool)); + key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char)); + key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid)); + MemoryContextSwitchTo(oldcxt); + + /* determine support function number to search for */ + procnum = (key->strategy == PARTITION_STRATEGY_HASH) ? + HASHEXTENDED_PROC : BTORDER_PROC; + + /* Copy partattrs and fill other per-attribute info */ + memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16)); + partexprs_item = list_head(key->partexprs); + for (i = 0; i < key->partnatts; i++) + { + AttrNumber attno = key->partattrs[i]; + HeapTuple opclasstup; + Form_pg_opclass opclassform; + Oid funcid; + + /* Collect opfamily information */ + opclasstup = SearchSysCache1(CLAOID, + ObjectIdGetDatum(opclass->values[i])); + if (!HeapTupleIsValid(opclasstup)) + elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]); + + opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup); + key->partopfamily[i] = opclassform->opcfamily; + key->partopcintype[i] = opclassform->opcintype; + + /* Get a support function for the specified opfamily and datatypes */ + funcid = get_opfamily_proc(opclassform->opcfamily, + opclassform->opcintype, + opclassform->opcintype, + procnum); + if (!OidIsValid(funcid)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("operator class \"%s\" of access method %s is missing support function %d for type %s", + NameStr(opclassform->opcname), + (key->strategy == PARTITION_STRATEGY_HASH) ? + "hash" : "btree", + procnum, + format_type_be(opclassform->opcintype)))); + + fmgr_info_cxt(funcid, &key->partsupfunc[i], partkeycxt); + + /* Collation */ + key->partcollation[i] = collation->values[i]; + + /* Collect type information */ + if (attno != 0) + { + Form_pg_attribute att = TupleDescAttr(relation->rd_att, attno - 1); + + key->parttypid[i] = att->atttypid; + key->parttypmod[i] = att->atttypmod; + key->parttypcoll[i] = att->attcollation; + } + else + { + if (partexprs_item == NULL) + elog(ERROR, "wrong number of partition key expressions"); + + key->parttypid[i] = exprType(lfirst(partexprs_item)); + key->parttypmod[i] = exprTypmod(lfirst(partexprs_item)); + key->parttypcoll[i] = exprCollation(lfirst(partexprs_item)); + + partexprs_item = lnext(partexprs_item); + } + get_typlenbyvalalign(key->parttypid[i], + &key->parttyplen[i], + &key->parttypbyval[i], + &key->parttypalign[i]); + + ReleaseSysCache(opclasstup); + } + + ReleaseSysCache(tuple); + + /* + * Success --- reparent our context and make the relcache point to the + * newly constructed key + */ + MemoryContextSetParent(partkeycxt, CacheMemoryContext); + relation->rd_partkeycxt = partkeycxt; + relation->rd_partkey = key; +} + +/* + * RelationBuildPartitionDesc + * Form rel's partition descriptor + * + * Not flushed from the cache by RelationClearRelation() unless changed because + * of addition or removal of partition. + */ +void +RelationBuildPartitionDesc(Relation rel) +{ + List *inhoids, + *partoids; + Oid *oids = NULL; + List *boundspecs = NIL; + ListCell *cell; + int i, + nparts; + PartitionKey key = RelationGetPartitionKey(rel); + PartitionDesc result; + MemoryContext oldcxt; + + int ndatums = 0; + int default_index = -1; + + /* Hash partitioning specific */ + PartitionHashBound **hbounds = NULL; + + /* List partitioning specific */ + PartitionListValue **all_values = NULL; + int null_index = -1; + + /* Range partitioning specific */ + PartitionRangeBound **rbounds = NULL; + + /* Get partition oids from pg_inherits */ + inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock); + + /* Collect bound spec nodes in a list */ + i = 0; + partoids = NIL; + foreach(cell, inhoids) + { + Oid inhrelid = lfirst_oid(cell); + HeapTuple tuple; + Datum datum; + bool isnull; + Node *boundspec; + + tuple = SearchSysCache1(RELOID, inhrelid); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", inhrelid); + + /* + * It is possible that the pg_class tuple of a partition has not been + * updated yet to set its relpartbound field. The only case where + * this happens is when we open the parent relation to check using its + * partition descriptor that a new partition's bound does not overlap + * some existing partition. + */ + if (!((Form_pg_class) GETSTRUCT(tuple))->relispartition) + { + ReleaseSysCache(tuple); + continue; + } + + datum = SysCacheGetAttr(RELOID, tuple, + Anum_pg_class_relpartbound, + &isnull); + Assert(!isnull); + boundspec = (Node *) stringToNode(TextDatumGetCString(datum)); + + /* + * Sanity check: If the PartitionBoundSpec says this is the default + * partition, its OID should correspond to whatever's stored in + * pg_partitioned_table.partdefid; if not, the catalog is corrupt. + */ + if (castNode(PartitionBoundSpec, boundspec)->is_default) + { + Oid partdefid; + + partdefid = get_default_partition_oid(RelationGetRelid(rel)); + if (partdefid != inhrelid) + elog(ERROR, "expected partdefid %u, but got %u", + inhrelid, partdefid); + } + + boundspecs = lappend(boundspecs, boundspec); + partoids = lappend_oid(partoids, inhrelid); + ReleaseSysCache(tuple); + } + + nparts = list_length(partoids); + + if (nparts > 0) + { + oids = (Oid *) palloc(nparts * sizeof(Oid)); + i = 0; + foreach(cell, partoids) + oids[i++] = lfirst_oid(cell); + + /* Convert from node to the internal representation */ + if (key->strategy == PARTITION_STRATEGY_HASH) + { + ndatums = nparts; + hbounds = (PartitionHashBound **) + palloc(nparts * sizeof(PartitionHashBound *)); + + i = 0; + foreach(cell, boundspecs) + { + PartitionBoundSpec *spec = castNode(PartitionBoundSpec, + lfirst(cell)); + + if (spec->strategy != PARTITION_STRATEGY_HASH) + elog(ERROR, "invalid strategy in partition bound spec"); + + hbounds[i] = (PartitionHashBound *) + palloc(sizeof(PartitionHashBound)); + + hbounds[i]->modulus = spec->modulus; + hbounds[i]->remainder = spec->remainder; + hbounds[i]->index = i; + i++; + } + + /* Sort all the bounds in ascending order */ + qsort(hbounds, nparts, sizeof(PartitionHashBound *), + qsort_partition_hbound_cmp); + } + else if (key->strategy == PARTITION_STRATEGY_LIST) + { + List *non_null_values = NIL; + + /* + * Create a unified list of non-null values across all partitions. + */ + i = 0; + null_index = -1; + foreach(cell, boundspecs) + { + PartitionBoundSpec *spec = castNode(PartitionBoundSpec, + lfirst(cell)); + ListCell *c; + + if (spec->strategy != PARTITION_STRATEGY_LIST) + elog(ERROR, "invalid strategy in partition bound spec"); + + /* + * Note the index of the partition bound spec for the default + * partition. There's no datum to add to the list of non-null + * datums for this partition. + */ + if (spec->is_default) + { + default_index = i; + i++; + continue; + } + + foreach(c, spec->listdatums) + { + Const *val = castNode(Const, lfirst(c)); + PartitionListValue *list_value = NULL; + + if (!val->constisnull) + { + list_value = (PartitionListValue *) + palloc0(sizeof(PartitionListValue)); + list_value->index = i; + list_value->value = val->constvalue; + } + else + { + /* + * Never put a null into the values array, flag + * instead for the code further down below where we + * construct the actual relcache struct. + */ + if (null_index != -1) + elog(ERROR, "found null more than once"); + null_index = i; + } + + if (list_value) + non_null_values = lappend(non_null_values, + list_value); + } + + i++; + } + + ndatums = list_length(non_null_values); + + /* + * Collect all list values in one array. Alongside the value, we + * also save the index of partition the value comes from. + */ + all_values = (PartitionListValue **) palloc(ndatums * + sizeof(PartitionListValue *)); + i = 0; + foreach(cell, non_null_values) + { + PartitionListValue *src = lfirst(cell); + + all_values[i] = (PartitionListValue *) + palloc(sizeof(PartitionListValue)); + all_values[i]->value = src->value; + all_values[i]->index = src->index; + i++; + } + + qsort_arg(all_values, ndatums, sizeof(PartitionListValue *), + qsort_partition_list_value_cmp, (void *) key); + } + else if (key->strategy == PARTITION_STRATEGY_RANGE) + { + int k; + PartitionRangeBound **all_bounds, + *prev; + + all_bounds = (PartitionRangeBound **) palloc0(2 * nparts * + sizeof(PartitionRangeBound *)); + + /* + * Create a unified list of range bounds across all the + * partitions. + */ + i = ndatums = 0; + foreach(cell, boundspecs) + { + PartitionBoundSpec *spec = castNode(PartitionBoundSpec, + lfirst(cell)); + PartitionRangeBound *lower, + *upper; + + if (spec->strategy != PARTITION_STRATEGY_RANGE) + elog(ERROR, "invalid strategy in partition bound spec"); + + /* + * Note the index of the partition bound spec for the default + * partition. There's no datum to add to the allbounds array + * for this partition. + */ + if (spec->is_default) + { + default_index = i++; + continue; + } + + lower = make_one_range_bound(key, i, spec->lowerdatums, + true); + upper = make_one_range_bound(key, i, spec->upperdatums, + false); + all_bounds[ndatums++] = lower; + all_bounds[ndatums++] = upper; + i++; + } + + Assert(ndatums == nparts * 2 || + (default_index != -1 && ndatums == (nparts - 1) * 2)); + + /* Sort all the bounds in ascending order */ + qsort_arg(all_bounds, ndatums, + sizeof(PartitionRangeBound *), + qsort_partition_rbound_cmp, + (void *) key); + + /* Save distinct bounds from all_bounds into rbounds. */ + rbounds = (PartitionRangeBound **) + palloc(ndatums * sizeof(PartitionRangeBound *)); + k = 0; + prev = NULL; + for (i = 0; i < ndatums; i++) + { + PartitionRangeBound *cur = all_bounds[i]; + bool is_distinct = false; + int j; + + /* Is the current bound distinct from the previous one? */ + for (j = 0; j < key->partnatts; j++) + { + Datum cmpval; + + if (prev == NULL || cur->kind[j] != prev->kind[j]) + { + is_distinct = true; + break; + } + + /* + * If the bounds are both MINVALUE or MAXVALUE, stop now + * and treat them as equal, since any values after this + * point must be ignored. + */ + if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE) + break; + + cmpval = FunctionCall2Coll(&key->partsupfunc[j], + key->partcollation[j], + cur->datums[j], + prev->datums[j]); + if (DatumGetInt32(cmpval) != 0) + { + is_distinct = true; + break; + } + } + + /* + * Only if the bound is distinct save it into a temporary + * array i.e. rbounds which is later copied into boundinfo + * datums array. + */ + if (is_distinct) + rbounds[k++] = all_bounds[i]; + + prev = cur; + } + + /* Update ndatums to hold the count of distinct datums. */ + ndatums = k; + } + else + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + + /* Now build the actual relcache partition descriptor */ + rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext, + "partition descriptor", + ALLOCSET_DEFAULT_SIZES); + MemoryContextCopyAndSetIdentifier(rel->rd_pdcxt, RelationGetRelationName(rel)); + + oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt); + + result = (PartitionDescData *) palloc0(sizeof(PartitionDescData)); + result->nparts = nparts; + if (nparts > 0) + { + PartitionBoundInfo boundinfo; + int *mapping; + int next_index = 0; + + result->oids = (Oid *) palloc0(nparts * sizeof(Oid)); + + boundinfo = (PartitionBoundInfoData *) + palloc0(sizeof(PartitionBoundInfoData)); + boundinfo->strategy = key->strategy; + boundinfo->default_index = -1; + boundinfo->ndatums = ndatums; + boundinfo->null_index = -1; + boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *)); + + /* Initialize mapping array with invalid values */ + mapping = (int *) palloc(sizeof(int) * nparts); + for (i = 0; i < nparts; i++) + mapping[i] = -1; + + switch (key->strategy) + { + case PARTITION_STRATEGY_HASH: + { + /* Modulus are stored in ascending order */ + int greatest_modulus = hbounds[ndatums - 1]->modulus; + + boundinfo->indexes = (int *) palloc(greatest_modulus * + sizeof(int)); + + for (i = 0; i < greatest_modulus; i++) + boundinfo->indexes[i] = -1; + + for (i = 0; i < nparts; i++) + { + int modulus = hbounds[i]->modulus; + int remainder = hbounds[i]->remainder; + + boundinfo->datums[i] = (Datum *) palloc(2 * + sizeof(Datum)); + boundinfo->datums[i][0] = Int32GetDatum(modulus); + boundinfo->datums[i][1] = Int32GetDatum(remainder); + + while (remainder < greatest_modulus) + { + /* overlap? */ + Assert(boundinfo->indexes[remainder] == -1); + boundinfo->indexes[remainder] = i; + remainder += modulus; + } + + mapping[hbounds[i]->index] = i; + pfree(hbounds[i]); + } + pfree(hbounds); + break; + } + + case PARTITION_STRATEGY_LIST: + { + boundinfo->indexes = (int *) palloc(ndatums * sizeof(int)); + + /* + * Copy values. Indexes of individual values are mapped + * to canonical values so that they match for any two list + * partitioned tables with same number of partitions and + * same lists per partition. One way to canonicalize is + * to assign the index in all_values[] of the smallest + * value of each partition, as the index of all of the + * partition's values. + */ + for (i = 0; i < ndatums; i++) + { + boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum)); + boundinfo->datums[i][0] = datumCopy(all_values[i]->value, + key->parttypbyval[0], + key->parttyplen[0]); + + /* If the old index has no mapping, assign one */ + if (mapping[all_values[i]->index] == -1) + mapping[all_values[i]->index] = next_index++; + + boundinfo->indexes[i] = mapping[all_values[i]->index]; + } + + /* + * If null-accepting partition has no mapped index yet, + * assign one. This could happen if such partition + * accepts only null and hence not covered in the above + * loop which only handled non-null values. + */ + if (null_index != -1) + { + Assert(null_index >= 0); + if (mapping[null_index] == -1) + mapping[null_index] = next_index++; + boundinfo->null_index = mapping[null_index]; + } + + /* Assign mapped index for the default partition. */ + if (default_index != -1) + { + /* + * The default partition accepts any value not + * specified in the lists of other partitions, hence + * it should not get mapped index while assigning + * those for non-null datums. + */ + Assert(default_index >= 0 && + mapping[default_index] == -1); + mapping[default_index] = next_index++; + boundinfo->default_index = mapping[default_index]; + } + + /* All partition must now have a valid mapping */ + Assert(next_index == nparts); + break; + } + + case PARTITION_STRATEGY_RANGE: + { + boundinfo->kind = (PartitionRangeDatumKind **) + palloc(ndatums * + sizeof(PartitionRangeDatumKind *)); + boundinfo->indexes = (int *) palloc((ndatums + 1) * + sizeof(int)); + + for (i = 0; i < ndatums; i++) + { + int j; + + boundinfo->datums[i] = (Datum *) palloc(key->partnatts * + sizeof(Datum)); + boundinfo->kind[i] = (PartitionRangeDatumKind *) + palloc(key->partnatts * + sizeof(PartitionRangeDatumKind)); + for (j = 0; j < key->partnatts; j++) + { + if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE) + boundinfo->datums[i][j] = + datumCopy(rbounds[i]->datums[j], + key->parttypbyval[j], + key->parttyplen[j]); + boundinfo->kind[i][j] = rbounds[i]->kind[j]; + } + + /* + * There is no mapping for invalid indexes. + * + * Any lower bounds in the rbounds array have invalid + * indexes assigned, because the values between the + * previous bound (if there is one) and this (lower) + * bound are not part of the range of any existing + * partition. + */ + if (rbounds[i]->lower) + boundinfo->indexes[i] = -1; + else + { + int orig_index = rbounds[i]->index; + + /* If the old index has no mapping, assign one */ + if (mapping[orig_index] == -1) + mapping[orig_index] = next_index++; + + boundinfo->indexes[i] = mapping[orig_index]; + } + } + + /* Assign mapped index for the default partition. */ + if (default_index != -1) + { + Assert(default_index >= 0 && mapping[default_index] == -1); + mapping[default_index] = next_index++; + boundinfo->default_index = mapping[default_index]; + } + boundinfo->indexes[i] = -1; + break; + } + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + + result->boundinfo = boundinfo; + + /* + * Now assign OIDs from the original array into mapped indexes of the + * result array. Order of OIDs in the former is defined by the + * catalog scan that retrieved them, whereas that in the latter is + * defined by canonicalized representation of the partition bounds. + */ + for (i = 0; i < nparts; i++) + result->oids[mapping[i]] = oids[i]; + pfree(mapping); + } + + MemoryContextSwitchTo(oldcxt); + rel->rd_partdesc = result; +} + +/* + * RelationGetPartitionQual + * + * Returns a list of partition quals + */ +List * +RelationGetPartitionQual(Relation rel) +{ + /* Quick exit */ + if (!rel->rd_rel->relispartition) + return NIL; + + return generate_partition_qual(rel); +} + +/* + * get_partition_qual_relid + * + * Returns an expression tree describing the passed-in relation's partition + * constraint. If there is no partition constraint returns NULL; this can + * happen if the default partition is the only partition. + */ +Expr * +get_partition_qual_relid(Oid relid) +{ + Relation rel = heap_open(relid, AccessShareLock); + Expr *result = NULL; + List *and_args; + + /* Do the work only if this relation is a partition. */ + if (rel->rd_rel->relispartition) + { + and_args = generate_partition_qual(rel); + + if (and_args == NIL) + result = NULL; + else if (list_length(and_args) > 1) + result = makeBoolExpr(AND_EXPR, and_args, -1); + else + result = linitial(and_args); + } + + /* Keep the lock. */ + heap_close(rel, NoLock); + + return result; +} + +/* + * generate_partition_qual + * + * Generate partition predicate from rel's partition bound expression. The + * function returns a NIL list if there is no predicate. + * + * Result expression tree is stored CacheMemoryContext to ensure it survives + * as long as the relcache entry. But we should be running in a less long-lived + * working context. To avoid leaking cache memory if this routine fails partway + * through, we build in working memory and then copy the completed structure + * into cache memory. + */ +static List * +generate_partition_qual(Relation rel) +{ + HeapTuple tuple; + MemoryContext oldcxt; + Datum boundDatum; + bool isnull; + PartitionBoundSpec *bound; + List *my_qual = NIL, + *result = NIL; + Relation parent; + bool found_whole_row; + + /* Guard against stack overflow due to overly deep partition tree */ + check_stack_depth(); + + /* Quick copy */ + if (rel->rd_partcheck != NIL) + return copyObject(rel->rd_partcheck); + + /* Grab at least an AccessShareLock on the parent table */ + parent = heap_open(get_partition_parent(RelationGetRelid(rel)), + AccessShareLock); + + /* Get pg_class.relpartbound */ + tuple = SearchSysCache1(RELOID, RelationGetRelid(rel)); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", + RelationGetRelid(rel)); + + boundDatum = SysCacheGetAttr(RELOID, tuple, + Anum_pg_class_relpartbound, + &isnull); + if (isnull) /* should not happen */ + elog(ERROR, "relation \"%s\" has relpartbound = null", + RelationGetRelationName(rel)); + bound = castNode(PartitionBoundSpec, + stringToNode(TextDatumGetCString(boundDatum))); + ReleaseSysCache(tuple); + + my_qual = get_qual_from_partbound(rel, parent, bound); + + /* Add the parent's quals to the list (if any) */ + if (parent->rd_rel->relispartition) + result = list_concat(generate_partition_qual(parent), my_qual); + else + result = my_qual; + + /* + * Change Vars to have partition's attnos instead of the parent's. We do + * this after we concatenate the parent's quals, because we want every Var + * in it to bear this relation's attnos. It's safe to assume varno = 1 + * here. + */ + result = map_partition_varattnos(result, 1, rel, parent, + &found_whole_row); + /* There can never be a whole-row reference here */ + if (found_whole_row) + elog(ERROR, "unexpected whole-row reference found in partition key"); + + /* Save a copy in the relcache */ + oldcxt = MemoryContextSwitchTo(CacheMemoryContext); + rel->rd_partcheck = copyObject(result); + MemoryContextSwitchTo(oldcxt); + + /* Keep the parent locked until commit */ + heap_close(parent, NoLock); + + return result; +} + +/* + * qsort_partition_hbound_cmp + * + * We sort hash bounds by modulus, then by remainder. + */ +static int32 +qsort_partition_hbound_cmp(const void *a, const void *b) +{ + PartitionHashBound *h1 = (*(PartitionHashBound *const *) a); + PartitionHashBound *h2 = (*(PartitionHashBound *const *) b); + + return partition_hbound_cmp(h1->modulus, h1->remainder, + h2->modulus, h2->remainder); +} + +/* + * qsort_partition_list_value_cmp + * + * Compare two list partition bound datums + */ +static int32 +qsort_partition_list_value_cmp(const void *a, const void *b, void *arg) +{ + Datum val1 = (*(const PartitionListValue **) a)->value, + val2 = (*(const PartitionListValue **) b)->value; + PartitionKey key = (PartitionKey) arg; + + return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0], + key->partcollation[0], + val1, val2)); +} + +/* Used when sorting range bounds across all range partitions */ +static int32 +qsort_partition_rbound_cmp(const void *a, const void *b, void *arg) +{ + PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a); + PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b); + PartitionKey key = (PartitionKey) arg; + + return partition_rbound_cmp(key->partnatts, key->partsupfunc, + key->partcollation, b1->datums, b1->kind, + b1->lower, b2); +} diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c index dfa95fed300..22ff36714ca 100644 --- a/src/backend/utils/cache/relcache.c +++ b/src/backend/utils/cache/relcache.c @@ -73,6 +73,7 @@ #include "optimizer/cost.h" #include "optimizer/prep.h" #include "optimizer/var.h" +#include "partitioning/partbounds.h" #include "pgstat.h" #include "rewrite/rewriteDefine.h" #include "rewrite/rowsecurity.h" @@ -85,6 +86,7 @@ #include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/memutils.h" +#include "utils/partcache.h" #include "utils/relmapper.h" #include "utils/resowner_private.h" #include "utils/snapmgr.h" @@ -265,7 +267,6 @@ static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_hi static Relation AllocateRelationDesc(Form_pg_class relp); static void RelationParseRelOptions(Relation relation, HeapTuple tuple); static void RelationBuildTupleDesc(Relation relation); -static void RelationBuildPartitionKey(Relation relation); static Relation RelationBuildDesc(Oid targetRelId, bool insertIt); static void RelationInitPhysicalAddr(Relation relation); static void load_critical_index(Oid indexoid, Oid heapoid); @@ -874,211 +875,6 @@ RelationBuildRuleLock(Relation relation) } /* - * RelationBuildPartitionKey - * Build and attach to relcache partition key data of relation - * - * Partitioning key data is a complex structure; to avoid complicated logic to - * free individual elements whenever the relcache entry is flushed, we give it - * its own memory context, child of CacheMemoryContext, which can easily be - * deleted on its own. To avoid leaking memory in that context in case of an - * error partway through this function, the context is initially created as a - * child of CurTransactionContext and only re-parented to CacheMemoryContext - * at the end, when no further errors are possible. Also, we don't make this - * context the current context except in very brief code sections, out of fear - * that some of our callees allocate memory on their own which would be leaked - * permanently. - */ -static void -RelationBuildPartitionKey(Relation relation) -{ - Form_pg_partitioned_table form; - HeapTuple tuple; - bool isnull; - int i; - PartitionKey key; - AttrNumber *attrs; - oidvector *opclass; - oidvector *collation; - ListCell *partexprs_item; - Datum datum; - MemoryContext partkeycxt, - oldcxt; - int16 procnum; - - tuple = SearchSysCache1(PARTRELID, - ObjectIdGetDatum(RelationGetRelid(relation))); - - /* - * The following happens when we have created our pg_class entry but not - * the pg_partitioned_table entry yet. - */ - if (!HeapTupleIsValid(tuple)) - return; - - partkeycxt = AllocSetContextCreate(CurTransactionContext, - "partition key", - ALLOCSET_SMALL_SIZES); - MemoryContextCopyAndSetIdentifier(partkeycxt, - RelationGetRelationName(relation)); - - key = (PartitionKey) MemoryContextAllocZero(partkeycxt, - sizeof(PartitionKeyData)); - - /* Fixed-length attributes */ - form = (Form_pg_partitioned_table) GETSTRUCT(tuple); - key->strategy = form->partstrat; - key->partnatts = form->partnatts; - - /* - * We can rely on the first variable-length attribute being mapped to the - * relevant field of the catalog's C struct, because all previous - * attributes are non-nullable and fixed-length. - */ - attrs = form->partattrs.values; - - /* But use the hard way to retrieve further variable-length attributes */ - /* Operator class */ - datum = SysCacheGetAttr(PARTRELID, tuple, - Anum_pg_partitioned_table_partclass, &isnull); - Assert(!isnull); - opclass = (oidvector *) DatumGetPointer(datum); - - /* Collation */ - datum = SysCacheGetAttr(PARTRELID, tuple, - Anum_pg_partitioned_table_partcollation, &isnull); - Assert(!isnull); - collation = (oidvector *) DatumGetPointer(datum); - - /* Expressions */ - datum = SysCacheGetAttr(PARTRELID, tuple, - Anum_pg_partitioned_table_partexprs, &isnull); - if (!isnull) - { - char *exprString; - Node *expr; - - exprString = TextDatumGetCString(datum); - expr = stringToNode(exprString); - pfree(exprString); - - /* - * Run the expressions through const-simplification since the planner - * will be comparing them to similarly-processed qual clause operands, - * and may fail to detect valid matches without this step; fix - * opfuncids while at it. We don't need to bother with - * canonicalize_qual() though, because partition expressions should be - * in canonical form already (ie, no need for OR-merging or constant - * elimination). - */ - expr = eval_const_expressions(NULL, expr); - fix_opfuncids(expr); - - oldcxt = MemoryContextSwitchTo(partkeycxt); - key->partexprs = (List *) copyObject(expr); - MemoryContextSwitchTo(oldcxt); - } - - oldcxt = MemoryContextSwitchTo(partkeycxt); - key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber)); - key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid)); - key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid)); - key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo)); - - key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid)); - - /* Gather type and collation info as well */ - key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid)); - key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32)); - key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16)); - key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool)); - key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char)); - key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid)); - MemoryContextSwitchTo(oldcxt); - - /* determine support function number to search for */ - procnum = (key->strategy == PARTITION_STRATEGY_HASH) ? - HASHEXTENDED_PROC : BTORDER_PROC; - - /* Copy partattrs and fill other per-attribute info */ - memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16)); - partexprs_item = list_head(key->partexprs); - for (i = 0; i < key->partnatts; i++) - { - AttrNumber attno = key->partattrs[i]; - HeapTuple opclasstup; - Form_pg_opclass opclassform; - Oid funcid; - - /* Collect opfamily information */ - opclasstup = SearchSysCache1(CLAOID, - ObjectIdGetDatum(opclass->values[i])); - if (!HeapTupleIsValid(opclasstup)) - elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]); - - opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup); - key->partopfamily[i] = opclassform->opcfamily; - key->partopcintype[i] = opclassform->opcintype; - - /* Get a support function for the specified opfamily and datatypes */ - funcid = get_opfamily_proc(opclassform->opcfamily, - opclassform->opcintype, - opclassform->opcintype, - procnum); - if (!OidIsValid(funcid)) - ereport(ERROR, - (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), - errmsg("operator class \"%s\" of access method %s is missing support function %d for type %s", - NameStr(opclassform->opcname), - (key->strategy == PARTITION_STRATEGY_HASH) ? - "hash" : "btree", - procnum, - format_type_be(opclassform->opcintype)))); - - fmgr_info_cxt(funcid, &key->partsupfunc[i], partkeycxt); - - /* Collation */ - key->partcollation[i] = collation->values[i]; - - /* Collect type information */ - if (attno != 0) - { - Form_pg_attribute att = TupleDescAttr(relation->rd_att, attno - 1); - - key->parttypid[i] = att->atttypid; - key->parttypmod[i] = att->atttypmod; - key->parttypcoll[i] = att->attcollation; - } - else - { - if (partexprs_item == NULL) - elog(ERROR, "wrong number of partition key expressions"); - - key->parttypid[i] = exprType(lfirst(partexprs_item)); - key->parttypmod[i] = exprTypmod(lfirst(partexprs_item)); - key->parttypcoll[i] = exprCollation(lfirst(partexprs_item)); - - partexprs_item = lnext(partexprs_item); - } - get_typlenbyvalalign(key->parttypid[i], - &key->parttyplen[i], - &key->parttypbyval[i], - &key->parttypalign[i]); - - ReleaseSysCache(opclasstup); - } - - ReleaseSysCache(tuple); - - /* - * Success --- reparent our context and make the relcache point to the - * newly constructed key - */ - MemoryContextSetParent(partkeycxt, CacheMemoryContext); - relation->rd_partkeycxt = partkeycxt; - relation->rd_partkey = key; -} - -/* * equalRuleLocks * * Determine whether two RuleLocks are equivalent diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c index 8ab7d1337fa..3fc8b6a8a84 100644 --- a/src/backend/utils/misc/pg_controldata.c +++ b/src/backend/utils/misc/pg_controldata.c @@ -15,13 +15,14 @@ #include "postgres.h" -#include "funcapi.h" -#include "miscadmin.h" #include "access/htup_details.h" #include "access/xlog_internal.h" +#include "access/xlog.h" #include "catalog/pg_control.h" #include "catalog/pg_type.h" #include "common/controldata_utils.h" +#include "funcapi.h" +#include "miscadmin.h" #include "utils/builtins.h" #include "utils/pg_lsn.h" #include "utils/timestamp.h" |