aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils')
-rw-r--r--src/backend/utils/adt/ruleutils.c2
-rw-r--r--src/backend/utils/cache/Makefile6
-rw-r--r--src/backend/utils/cache/partcache.c967
-rw-r--r--src/backend/utils/cache/relcache.c208
-rw-r--r--src/backend/utils/misc/pg_controldata.c5
5 files changed, 976 insertions, 212 deletions
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index 99643e83b25..74e1cd8afb1 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -24,7 +24,6 @@
#include "access/sysattr.h"
#include "catalog/dependency.h"
#include "catalog/indexing.h"
-#include "catalog/partition.h"
#include "catalog/pg_aggregate.h"
#include "catalog/pg_am.h"
#include "catalog/pg_authid.h"
@@ -64,6 +63,7 @@
#include "utils/guc.h"
#include "utils/hsearch.h"
#include "utils/lsyscache.h"
+#include "utils/partcache.h"
#include "utils/rel.h"
#include "utils/ruleutils.h"
#include "utils/snapmgr.h"
diff --git a/src/backend/utils/cache/Makefile b/src/backend/utils/cache/Makefile
index a943f8ea4bc..e6a6b9f395f 100644
--- a/src/backend/utils/cache/Makefile
+++ b/src/backend/utils/cache/Makefile
@@ -12,8 +12,8 @@ subdir = src/backend/utils/cache
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = attoptcache.o catcache.o evtcache.o inval.o plancache.o relcache.o \
- relmapper.o relfilenodemap.o spccache.o syscache.o lsyscache.o \
- typcache.o ts_cache.o
+OBJS = attoptcache.o catcache.o evtcache.o inval.o lsyscache.o \
+ partcache.o plancache.o relcache.o relmapper.o relfilenodemap.o \
+ spccache.o syscache.o ts_cache.o typcache.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/cache/partcache.c b/src/backend/utils/cache/partcache.c
new file mode 100644
index 00000000000..e2f677a46a4
--- /dev/null
+++ b/src/backend/utils/cache/partcache.c
@@ -0,0 +1,967 @@
+/*-------------------------------------------------------------------------
+ *
+ * partcache.c
+ * Support routines for manipulating partition information cached in
+ * relcache
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/utils/cache/partcache.c
+ *
+ *-------------------------------------------------------------------------
+*/
+#include "postgres.h"
+
+#include "access/hash.h"
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/nbtree.h"
+#include "catalog/partition.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_partitioned_table.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
+#include "optimizer/planner.h"
+#include "partitioning/partbounds.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/partcache.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+
+
+static List *generate_partition_qual(Relation rel);
+static int32 qsort_partition_hbound_cmp(const void *a, const void *b);
+static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
+ void *arg);
+static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
+ void *arg);
+
+
+/*
+ * RelationBuildPartitionKey
+ * Build and attach to relcache partition key data of relation
+ *
+ * Partitioning key data is a complex structure; to avoid complicated logic to
+ * free individual elements whenever the relcache entry is flushed, we give it
+ * its own memory context, child of CacheMemoryContext, which can easily be
+ * deleted on its own. To avoid leaking memory in that context in case of an
+ * error partway through this function, the context is initially created as a
+ * child of CurTransactionContext and only re-parented to CacheMemoryContext
+ * at the end, when no further errors are possible. Also, we don't make this
+ * context the current context except in very brief code sections, out of fear
+ * that some of our callees allocate memory on their own which would be leaked
+ * permanently.
+ */
+void
+RelationBuildPartitionKey(Relation relation)
+{
+ Form_pg_partitioned_table form;
+ HeapTuple tuple;
+ bool isnull;
+ int i;
+ PartitionKey key;
+ AttrNumber *attrs;
+ oidvector *opclass;
+ oidvector *collation;
+ ListCell *partexprs_item;
+ Datum datum;
+ MemoryContext partkeycxt,
+ oldcxt;
+ int16 procnum;
+
+ tuple = SearchSysCache1(PARTRELID,
+ ObjectIdGetDatum(RelationGetRelid(relation)));
+
+ /*
+ * The following happens when we have created our pg_class entry but not
+ * the pg_partitioned_table entry yet.
+ */
+ if (!HeapTupleIsValid(tuple))
+ return;
+
+ partkeycxt = AllocSetContextCreate(CurTransactionContext,
+ "partition key",
+ ALLOCSET_SMALL_SIZES);
+ MemoryContextCopyAndSetIdentifier(partkeycxt,
+ RelationGetRelationName(relation));
+
+ key = (PartitionKey) MemoryContextAllocZero(partkeycxt,
+ sizeof(PartitionKeyData));
+
+ /* Fixed-length attributes */
+ form = (Form_pg_partitioned_table) GETSTRUCT(tuple);
+ key->strategy = form->partstrat;
+ key->partnatts = form->partnatts;
+
+ /*
+ * We can rely on the first variable-length attribute being mapped to the
+ * relevant field of the catalog's C struct, because all previous
+ * attributes are non-nullable and fixed-length.
+ */
+ attrs = form->partattrs.values;
+
+ /* But use the hard way to retrieve further variable-length attributes */
+ /* Operator class */
+ datum = SysCacheGetAttr(PARTRELID, tuple,
+ Anum_pg_partitioned_table_partclass, &isnull);
+ Assert(!isnull);
+ opclass = (oidvector *) DatumGetPointer(datum);
+
+ /* Collation */
+ datum = SysCacheGetAttr(PARTRELID, tuple,
+ Anum_pg_partitioned_table_partcollation, &isnull);
+ Assert(!isnull);
+ collation = (oidvector *) DatumGetPointer(datum);
+
+ /* Expressions */
+ datum = SysCacheGetAttr(PARTRELID, tuple,
+ Anum_pg_partitioned_table_partexprs, &isnull);
+ if (!isnull)
+ {
+ char *exprString;
+ Node *expr;
+
+ exprString = TextDatumGetCString(datum);
+ expr = stringToNode(exprString);
+ pfree(exprString);
+
+ /*
+ * Run the expressions through const-simplification since the planner
+ * will be comparing them to similarly-processed qual clause operands,
+ * and may fail to detect valid matches without this step; fix
+ * opfuncids while at it. We don't need to bother with
+ * canonicalize_qual() though, because partition expressions should be
+ * in canonical form already (ie, no need for OR-merging or constant
+ * elimination).
+ */
+ expr = eval_const_expressions(NULL, expr);
+ fix_opfuncids(expr);
+
+ oldcxt = MemoryContextSwitchTo(partkeycxt);
+ key->partexprs = (List *) copyObject(expr);
+ MemoryContextSwitchTo(oldcxt);
+ }
+
+ oldcxt = MemoryContextSwitchTo(partkeycxt);
+ key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber));
+ key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid));
+ key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid));
+ key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo));
+
+ key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid));
+
+ /* Gather type and collation info as well */
+ key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid));
+ key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32));
+ key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16));
+ key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool));
+ key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char));
+ key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid));
+ MemoryContextSwitchTo(oldcxt);
+
+ /* determine support function number to search for */
+ procnum = (key->strategy == PARTITION_STRATEGY_HASH) ?
+ HASHEXTENDED_PROC : BTORDER_PROC;
+
+ /* Copy partattrs and fill other per-attribute info */
+ memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16));
+ partexprs_item = list_head(key->partexprs);
+ for (i = 0; i < key->partnatts; i++)
+ {
+ AttrNumber attno = key->partattrs[i];
+ HeapTuple opclasstup;
+ Form_pg_opclass opclassform;
+ Oid funcid;
+
+ /* Collect opfamily information */
+ opclasstup = SearchSysCache1(CLAOID,
+ ObjectIdGetDatum(opclass->values[i]));
+ if (!HeapTupleIsValid(opclasstup))
+ elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]);
+
+ opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup);
+ key->partopfamily[i] = opclassform->opcfamily;
+ key->partopcintype[i] = opclassform->opcintype;
+
+ /* Get a support function for the specified opfamily and datatypes */
+ funcid = get_opfamily_proc(opclassform->opcfamily,
+ opclassform->opcintype,
+ opclassform->opcintype,
+ procnum);
+ if (!OidIsValid(funcid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("operator class \"%s\" of access method %s is missing support function %d for type %s",
+ NameStr(opclassform->opcname),
+ (key->strategy == PARTITION_STRATEGY_HASH) ?
+ "hash" : "btree",
+ procnum,
+ format_type_be(opclassform->opcintype))));
+
+ fmgr_info_cxt(funcid, &key->partsupfunc[i], partkeycxt);
+
+ /* Collation */
+ key->partcollation[i] = collation->values[i];
+
+ /* Collect type information */
+ if (attno != 0)
+ {
+ Form_pg_attribute att = TupleDescAttr(relation->rd_att, attno - 1);
+
+ key->parttypid[i] = att->atttypid;
+ key->parttypmod[i] = att->atttypmod;
+ key->parttypcoll[i] = att->attcollation;
+ }
+ else
+ {
+ if (partexprs_item == NULL)
+ elog(ERROR, "wrong number of partition key expressions");
+
+ key->parttypid[i] = exprType(lfirst(partexprs_item));
+ key->parttypmod[i] = exprTypmod(lfirst(partexprs_item));
+ key->parttypcoll[i] = exprCollation(lfirst(partexprs_item));
+
+ partexprs_item = lnext(partexprs_item);
+ }
+ get_typlenbyvalalign(key->parttypid[i],
+ &key->parttyplen[i],
+ &key->parttypbyval[i],
+ &key->parttypalign[i]);
+
+ ReleaseSysCache(opclasstup);
+ }
+
+ ReleaseSysCache(tuple);
+
+ /*
+ * Success --- reparent our context and make the relcache point to the
+ * newly constructed key
+ */
+ MemoryContextSetParent(partkeycxt, CacheMemoryContext);
+ relation->rd_partkeycxt = partkeycxt;
+ relation->rd_partkey = key;
+}
+
+/*
+ * RelationBuildPartitionDesc
+ * Form rel's partition descriptor
+ *
+ * Not flushed from the cache by RelationClearRelation() unless changed because
+ * of addition or removal of partition.
+ */
+void
+RelationBuildPartitionDesc(Relation rel)
+{
+ List *inhoids,
+ *partoids;
+ Oid *oids = NULL;
+ List *boundspecs = NIL;
+ ListCell *cell;
+ int i,
+ nparts;
+ PartitionKey key = RelationGetPartitionKey(rel);
+ PartitionDesc result;
+ MemoryContext oldcxt;
+
+ int ndatums = 0;
+ int default_index = -1;
+
+ /* Hash partitioning specific */
+ PartitionHashBound **hbounds = NULL;
+
+ /* List partitioning specific */
+ PartitionListValue **all_values = NULL;
+ int null_index = -1;
+
+ /* Range partitioning specific */
+ PartitionRangeBound **rbounds = NULL;
+
+ /* Get partition oids from pg_inherits */
+ inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock);
+
+ /* Collect bound spec nodes in a list */
+ i = 0;
+ partoids = NIL;
+ foreach(cell, inhoids)
+ {
+ Oid inhrelid = lfirst_oid(cell);
+ HeapTuple tuple;
+ Datum datum;
+ bool isnull;
+ Node *boundspec;
+
+ tuple = SearchSysCache1(RELOID, inhrelid);
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", inhrelid);
+
+ /*
+ * It is possible that the pg_class tuple of a partition has not been
+ * updated yet to set its relpartbound field. The only case where
+ * this happens is when we open the parent relation to check using its
+ * partition descriptor that a new partition's bound does not overlap
+ * some existing partition.
+ */
+ if (!((Form_pg_class) GETSTRUCT(tuple))->relispartition)
+ {
+ ReleaseSysCache(tuple);
+ continue;
+ }
+
+ datum = SysCacheGetAttr(RELOID, tuple,
+ Anum_pg_class_relpartbound,
+ &isnull);
+ Assert(!isnull);
+ boundspec = (Node *) stringToNode(TextDatumGetCString(datum));
+
+ /*
+ * Sanity check: If the PartitionBoundSpec says this is the default
+ * partition, its OID should correspond to whatever's stored in
+ * pg_partitioned_table.partdefid; if not, the catalog is corrupt.
+ */
+ if (castNode(PartitionBoundSpec, boundspec)->is_default)
+ {
+ Oid partdefid;
+
+ partdefid = get_default_partition_oid(RelationGetRelid(rel));
+ if (partdefid != inhrelid)
+ elog(ERROR, "expected partdefid %u, but got %u",
+ inhrelid, partdefid);
+ }
+
+ boundspecs = lappend(boundspecs, boundspec);
+ partoids = lappend_oid(partoids, inhrelid);
+ ReleaseSysCache(tuple);
+ }
+
+ nparts = list_length(partoids);
+
+ if (nparts > 0)
+ {
+ oids = (Oid *) palloc(nparts * sizeof(Oid));
+ i = 0;
+ foreach(cell, partoids)
+ oids[i++] = lfirst_oid(cell);
+
+ /* Convert from node to the internal representation */
+ if (key->strategy == PARTITION_STRATEGY_HASH)
+ {
+ ndatums = nparts;
+ hbounds = (PartitionHashBound **)
+ palloc(nparts * sizeof(PartitionHashBound *));
+
+ i = 0;
+ foreach(cell, boundspecs)
+ {
+ PartitionBoundSpec *spec = castNode(PartitionBoundSpec,
+ lfirst(cell));
+
+ if (spec->strategy != PARTITION_STRATEGY_HASH)
+ elog(ERROR, "invalid strategy in partition bound spec");
+
+ hbounds[i] = (PartitionHashBound *)
+ palloc(sizeof(PartitionHashBound));
+
+ hbounds[i]->modulus = spec->modulus;
+ hbounds[i]->remainder = spec->remainder;
+ hbounds[i]->index = i;
+ i++;
+ }
+
+ /* Sort all the bounds in ascending order */
+ qsort(hbounds, nparts, sizeof(PartitionHashBound *),
+ qsort_partition_hbound_cmp);
+ }
+ else if (key->strategy == PARTITION_STRATEGY_LIST)
+ {
+ List *non_null_values = NIL;
+
+ /*
+ * Create a unified list of non-null values across all partitions.
+ */
+ i = 0;
+ null_index = -1;
+ foreach(cell, boundspecs)
+ {
+ PartitionBoundSpec *spec = castNode(PartitionBoundSpec,
+ lfirst(cell));
+ ListCell *c;
+
+ if (spec->strategy != PARTITION_STRATEGY_LIST)
+ elog(ERROR, "invalid strategy in partition bound spec");
+
+ /*
+ * Note the index of the partition bound spec for the default
+ * partition. There's no datum to add to the list of non-null
+ * datums for this partition.
+ */
+ if (spec->is_default)
+ {
+ default_index = i;
+ i++;
+ continue;
+ }
+
+ foreach(c, spec->listdatums)
+ {
+ Const *val = castNode(Const, lfirst(c));
+ PartitionListValue *list_value = NULL;
+
+ if (!val->constisnull)
+ {
+ list_value = (PartitionListValue *)
+ palloc0(sizeof(PartitionListValue));
+ list_value->index = i;
+ list_value->value = val->constvalue;
+ }
+ else
+ {
+ /*
+ * Never put a null into the values array, flag
+ * instead for the code further down below where we
+ * construct the actual relcache struct.
+ */
+ if (null_index != -1)
+ elog(ERROR, "found null more than once");
+ null_index = i;
+ }
+
+ if (list_value)
+ non_null_values = lappend(non_null_values,
+ list_value);
+ }
+
+ i++;
+ }
+
+ ndatums = list_length(non_null_values);
+
+ /*
+ * Collect all list values in one array. Alongside the value, we
+ * also save the index of partition the value comes from.
+ */
+ all_values = (PartitionListValue **) palloc(ndatums *
+ sizeof(PartitionListValue *));
+ i = 0;
+ foreach(cell, non_null_values)
+ {
+ PartitionListValue *src = lfirst(cell);
+
+ all_values[i] = (PartitionListValue *)
+ palloc(sizeof(PartitionListValue));
+ all_values[i]->value = src->value;
+ all_values[i]->index = src->index;
+ i++;
+ }
+
+ qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
+ qsort_partition_list_value_cmp, (void *) key);
+ }
+ else if (key->strategy == PARTITION_STRATEGY_RANGE)
+ {
+ int k;
+ PartitionRangeBound **all_bounds,
+ *prev;
+
+ all_bounds = (PartitionRangeBound **) palloc0(2 * nparts *
+ sizeof(PartitionRangeBound *));
+
+ /*
+ * Create a unified list of range bounds across all the
+ * partitions.
+ */
+ i = ndatums = 0;
+ foreach(cell, boundspecs)
+ {
+ PartitionBoundSpec *spec = castNode(PartitionBoundSpec,
+ lfirst(cell));
+ PartitionRangeBound *lower,
+ *upper;
+
+ if (spec->strategy != PARTITION_STRATEGY_RANGE)
+ elog(ERROR, "invalid strategy in partition bound spec");
+
+ /*
+ * Note the index of the partition bound spec for the default
+ * partition. There's no datum to add to the allbounds array
+ * for this partition.
+ */
+ if (spec->is_default)
+ {
+ default_index = i++;
+ continue;
+ }
+
+ lower = make_one_range_bound(key, i, spec->lowerdatums,
+ true);
+ upper = make_one_range_bound(key, i, spec->upperdatums,
+ false);
+ all_bounds[ndatums++] = lower;
+ all_bounds[ndatums++] = upper;
+ i++;
+ }
+
+ Assert(ndatums == nparts * 2 ||
+ (default_index != -1 && ndatums == (nparts - 1) * 2));
+
+ /* Sort all the bounds in ascending order */
+ qsort_arg(all_bounds, ndatums,
+ sizeof(PartitionRangeBound *),
+ qsort_partition_rbound_cmp,
+ (void *) key);
+
+ /* Save distinct bounds from all_bounds into rbounds. */
+ rbounds = (PartitionRangeBound **)
+ palloc(ndatums * sizeof(PartitionRangeBound *));
+ k = 0;
+ prev = NULL;
+ for (i = 0; i < ndatums; i++)
+ {
+ PartitionRangeBound *cur = all_bounds[i];
+ bool is_distinct = false;
+ int j;
+
+ /* Is the current bound distinct from the previous one? */
+ for (j = 0; j < key->partnatts; j++)
+ {
+ Datum cmpval;
+
+ if (prev == NULL || cur->kind[j] != prev->kind[j])
+ {
+ is_distinct = true;
+ break;
+ }
+
+ /*
+ * If the bounds are both MINVALUE or MAXVALUE, stop now
+ * and treat them as equal, since any values after this
+ * point must be ignored.
+ */
+ if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE)
+ break;
+
+ cmpval = FunctionCall2Coll(&key->partsupfunc[j],
+ key->partcollation[j],
+ cur->datums[j],
+ prev->datums[j]);
+ if (DatumGetInt32(cmpval) != 0)
+ {
+ is_distinct = true;
+ break;
+ }
+ }
+
+ /*
+ * Only if the bound is distinct save it into a temporary
+ * array i.e. rbounds which is later copied into boundinfo
+ * datums array.
+ */
+ if (is_distinct)
+ rbounds[k++] = all_bounds[i];
+
+ prev = cur;
+ }
+
+ /* Update ndatums to hold the count of distinct datums. */
+ ndatums = k;
+ }
+ else
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) key->strategy);
+ }
+
+ /* Now build the actual relcache partition descriptor */
+ rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext,
+ "partition descriptor",
+ ALLOCSET_DEFAULT_SIZES);
+ MemoryContextCopyAndSetIdentifier(rel->rd_pdcxt, RelationGetRelationName(rel));
+
+ oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt);
+
+ result = (PartitionDescData *) palloc0(sizeof(PartitionDescData));
+ result->nparts = nparts;
+ if (nparts > 0)
+ {
+ PartitionBoundInfo boundinfo;
+ int *mapping;
+ int next_index = 0;
+
+ result->oids = (Oid *) palloc0(nparts * sizeof(Oid));
+
+ boundinfo = (PartitionBoundInfoData *)
+ palloc0(sizeof(PartitionBoundInfoData));
+ boundinfo->strategy = key->strategy;
+ boundinfo->default_index = -1;
+ boundinfo->ndatums = ndatums;
+ boundinfo->null_index = -1;
+ boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
+
+ /* Initialize mapping array with invalid values */
+ mapping = (int *) palloc(sizeof(int) * nparts);
+ for (i = 0; i < nparts; i++)
+ mapping[i] = -1;
+
+ switch (key->strategy)
+ {
+ case PARTITION_STRATEGY_HASH:
+ {
+ /* Modulus are stored in ascending order */
+ int greatest_modulus = hbounds[ndatums - 1]->modulus;
+
+ boundinfo->indexes = (int *) palloc(greatest_modulus *
+ sizeof(int));
+
+ for (i = 0; i < greatest_modulus; i++)
+ boundinfo->indexes[i] = -1;
+
+ for (i = 0; i < nparts; i++)
+ {
+ int modulus = hbounds[i]->modulus;
+ int remainder = hbounds[i]->remainder;
+
+ boundinfo->datums[i] = (Datum *) palloc(2 *
+ sizeof(Datum));
+ boundinfo->datums[i][0] = Int32GetDatum(modulus);
+ boundinfo->datums[i][1] = Int32GetDatum(remainder);
+
+ while (remainder < greatest_modulus)
+ {
+ /* overlap? */
+ Assert(boundinfo->indexes[remainder] == -1);
+ boundinfo->indexes[remainder] = i;
+ remainder += modulus;
+ }
+
+ mapping[hbounds[i]->index] = i;
+ pfree(hbounds[i]);
+ }
+ pfree(hbounds);
+ break;
+ }
+
+ case PARTITION_STRATEGY_LIST:
+ {
+ boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
+
+ /*
+ * Copy values. Indexes of individual values are mapped
+ * to canonical values so that they match for any two list
+ * partitioned tables with same number of partitions and
+ * same lists per partition. One way to canonicalize is
+ * to assign the index in all_values[] of the smallest
+ * value of each partition, as the index of all of the
+ * partition's values.
+ */
+ for (i = 0; i < ndatums; i++)
+ {
+ boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
+ boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
+ key->parttypbyval[0],
+ key->parttyplen[0]);
+
+ /* If the old index has no mapping, assign one */
+ if (mapping[all_values[i]->index] == -1)
+ mapping[all_values[i]->index] = next_index++;
+
+ boundinfo->indexes[i] = mapping[all_values[i]->index];
+ }
+
+ /*
+ * If null-accepting partition has no mapped index yet,
+ * assign one. This could happen if such partition
+ * accepts only null and hence not covered in the above
+ * loop which only handled non-null values.
+ */
+ if (null_index != -1)
+ {
+ Assert(null_index >= 0);
+ if (mapping[null_index] == -1)
+ mapping[null_index] = next_index++;
+ boundinfo->null_index = mapping[null_index];
+ }
+
+ /* Assign mapped index for the default partition. */
+ if (default_index != -1)
+ {
+ /*
+ * The default partition accepts any value not
+ * specified in the lists of other partitions, hence
+ * it should not get mapped index while assigning
+ * those for non-null datums.
+ */
+ Assert(default_index >= 0 &&
+ mapping[default_index] == -1);
+ mapping[default_index] = next_index++;
+ boundinfo->default_index = mapping[default_index];
+ }
+
+ /* All partition must now have a valid mapping */
+ Assert(next_index == nparts);
+ break;
+ }
+
+ case PARTITION_STRATEGY_RANGE:
+ {
+ boundinfo->kind = (PartitionRangeDatumKind **)
+ palloc(ndatums *
+ sizeof(PartitionRangeDatumKind *));
+ boundinfo->indexes = (int *) palloc((ndatums + 1) *
+ sizeof(int));
+
+ for (i = 0; i < ndatums; i++)
+ {
+ int j;
+
+ boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
+ sizeof(Datum));
+ boundinfo->kind[i] = (PartitionRangeDatumKind *)
+ palloc(key->partnatts *
+ sizeof(PartitionRangeDatumKind));
+ for (j = 0; j < key->partnatts; j++)
+ {
+ if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE)
+ boundinfo->datums[i][j] =
+ datumCopy(rbounds[i]->datums[j],
+ key->parttypbyval[j],
+ key->parttyplen[j]);
+ boundinfo->kind[i][j] = rbounds[i]->kind[j];
+ }
+
+ /*
+ * There is no mapping for invalid indexes.
+ *
+ * Any lower bounds in the rbounds array have invalid
+ * indexes assigned, because the values between the
+ * previous bound (if there is one) and this (lower)
+ * bound are not part of the range of any existing
+ * partition.
+ */
+ if (rbounds[i]->lower)
+ boundinfo->indexes[i] = -1;
+ else
+ {
+ int orig_index = rbounds[i]->index;
+
+ /* If the old index has no mapping, assign one */
+ if (mapping[orig_index] == -1)
+ mapping[orig_index] = next_index++;
+
+ boundinfo->indexes[i] = mapping[orig_index];
+ }
+ }
+
+ /* Assign mapped index for the default partition. */
+ if (default_index != -1)
+ {
+ Assert(default_index >= 0 && mapping[default_index] == -1);
+ mapping[default_index] = next_index++;
+ boundinfo->default_index = mapping[default_index];
+ }
+ boundinfo->indexes[i] = -1;
+ break;
+ }
+
+ default:
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) key->strategy);
+ }
+
+ result->boundinfo = boundinfo;
+
+ /*
+ * Now assign OIDs from the original array into mapped indexes of the
+ * result array. Order of OIDs in the former is defined by the
+ * catalog scan that retrieved them, whereas that in the latter is
+ * defined by canonicalized representation of the partition bounds.
+ */
+ for (i = 0; i < nparts; i++)
+ result->oids[mapping[i]] = oids[i];
+ pfree(mapping);
+ }
+
+ MemoryContextSwitchTo(oldcxt);
+ rel->rd_partdesc = result;
+}
+
+/*
+ * RelationGetPartitionQual
+ *
+ * Returns a list of partition quals
+ */
+List *
+RelationGetPartitionQual(Relation rel)
+{
+ /* Quick exit */
+ if (!rel->rd_rel->relispartition)
+ return NIL;
+
+ return generate_partition_qual(rel);
+}
+
+/*
+ * get_partition_qual_relid
+ *
+ * Returns an expression tree describing the passed-in relation's partition
+ * constraint. If there is no partition constraint returns NULL; this can
+ * happen if the default partition is the only partition.
+ */
+Expr *
+get_partition_qual_relid(Oid relid)
+{
+ Relation rel = heap_open(relid, AccessShareLock);
+ Expr *result = NULL;
+ List *and_args;
+
+ /* Do the work only if this relation is a partition. */
+ if (rel->rd_rel->relispartition)
+ {
+ and_args = generate_partition_qual(rel);
+
+ if (and_args == NIL)
+ result = NULL;
+ else if (list_length(and_args) > 1)
+ result = makeBoolExpr(AND_EXPR, and_args, -1);
+ else
+ result = linitial(and_args);
+ }
+
+ /* Keep the lock. */
+ heap_close(rel, NoLock);
+
+ return result;
+}
+
+/*
+ * generate_partition_qual
+ *
+ * Generate partition predicate from rel's partition bound expression. The
+ * function returns a NIL list if there is no predicate.
+ *
+ * Result expression tree is stored CacheMemoryContext to ensure it survives
+ * as long as the relcache entry. But we should be running in a less long-lived
+ * working context. To avoid leaking cache memory if this routine fails partway
+ * through, we build in working memory and then copy the completed structure
+ * into cache memory.
+ */
+static List *
+generate_partition_qual(Relation rel)
+{
+ HeapTuple tuple;
+ MemoryContext oldcxt;
+ Datum boundDatum;
+ bool isnull;
+ PartitionBoundSpec *bound;
+ List *my_qual = NIL,
+ *result = NIL;
+ Relation parent;
+ bool found_whole_row;
+
+ /* Guard against stack overflow due to overly deep partition tree */
+ check_stack_depth();
+
+ /* Quick copy */
+ if (rel->rd_partcheck != NIL)
+ return copyObject(rel->rd_partcheck);
+
+ /* Grab at least an AccessShareLock on the parent table */
+ parent = heap_open(get_partition_parent(RelationGetRelid(rel)),
+ AccessShareLock);
+
+ /* Get pg_class.relpartbound */
+ tuple = SearchSysCache1(RELOID, RelationGetRelid(rel));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u",
+ RelationGetRelid(rel));
+
+ boundDatum = SysCacheGetAttr(RELOID, tuple,
+ Anum_pg_class_relpartbound,
+ &isnull);
+ if (isnull) /* should not happen */
+ elog(ERROR, "relation \"%s\" has relpartbound = null",
+ RelationGetRelationName(rel));
+ bound = castNode(PartitionBoundSpec,
+ stringToNode(TextDatumGetCString(boundDatum)));
+ ReleaseSysCache(tuple);
+
+ my_qual = get_qual_from_partbound(rel, parent, bound);
+
+ /* Add the parent's quals to the list (if any) */
+ if (parent->rd_rel->relispartition)
+ result = list_concat(generate_partition_qual(parent), my_qual);
+ else
+ result = my_qual;
+
+ /*
+ * Change Vars to have partition's attnos instead of the parent's. We do
+ * this after we concatenate the parent's quals, because we want every Var
+ * in it to bear this relation's attnos. It's safe to assume varno = 1
+ * here.
+ */
+ result = map_partition_varattnos(result, 1, rel, parent,
+ &found_whole_row);
+ /* There can never be a whole-row reference here */
+ if (found_whole_row)
+ elog(ERROR, "unexpected whole-row reference found in partition key");
+
+ /* Save a copy in the relcache */
+ oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
+ rel->rd_partcheck = copyObject(result);
+ MemoryContextSwitchTo(oldcxt);
+
+ /* Keep the parent locked until commit */
+ heap_close(parent, NoLock);
+
+ return result;
+}
+
+/*
+ * qsort_partition_hbound_cmp
+ *
+ * We sort hash bounds by modulus, then by remainder.
+ */
+static int32
+qsort_partition_hbound_cmp(const void *a, const void *b)
+{
+ PartitionHashBound *h1 = (*(PartitionHashBound *const *) a);
+ PartitionHashBound *h2 = (*(PartitionHashBound *const *) b);
+
+ return partition_hbound_cmp(h1->modulus, h1->remainder,
+ h2->modulus, h2->remainder);
+}
+
+/*
+ * qsort_partition_list_value_cmp
+ *
+ * Compare two list partition bound datums
+ */
+static int32
+qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
+{
+ Datum val1 = (*(const PartitionListValue **) a)->value,
+ val2 = (*(const PartitionListValue **) b)->value;
+ PartitionKey key = (PartitionKey) arg;
+
+ return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
+ key->partcollation[0],
+ val1, val2));
+}
+
+/* Used when sorting range bounds across all range partitions */
+static int32
+qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
+{
+ PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
+ PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
+ PartitionKey key = (PartitionKey) arg;
+
+ return partition_rbound_cmp(key->partnatts, key->partsupfunc,
+ key->partcollation, b1->datums, b1->kind,
+ b1->lower, b2);
+}
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index dfa95fed300..22ff36714ca 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -73,6 +73,7 @@
#include "optimizer/cost.h"
#include "optimizer/prep.h"
#include "optimizer/var.h"
+#include "partitioning/partbounds.h"
#include "pgstat.h"
#include "rewrite/rewriteDefine.h"
#include "rewrite/rowsecurity.h"
@@ -85,6 +86,7 @@
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
+#include "utils/partcache.h"
#include "utils/relmapper.h"
#include "utils/resowner_private.h"
#include "utils/snapmgr.h"
@@ -265,7 +267,6 @@ static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_hi
static Relation AllocateRelationDesc(Form_pg_class relp);
static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
static void RelationBuildTupleDesc(Relation relation);
-static void RelationBuildPartitionKey(Relation relation);
static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
static void RelationInitPhysicalAddr(Relation relation);
static void load_critical_index(Oid indexoid, Oid heapoid);
@@ -874,211 +875,6 @@ RelationBuildRuleLock(Relation relation)
}
/*
- * RelationBuildPartitionKey
- * Build and attach to relcache partition key data of relation
- *
- * Partitioning key data is a complex structure; to avoid complicated logic to
- * free individual elements whenever the relcache entry is flushed, we give it
- * its own memory context, child of CacheMemoryContext, which can easily be
- * deleted on its own. To avoid leaking memory in that context in case of an
- * error partway through this function, the context is initially created as a
- * child of CurTransactionContext and only re-parented to CacheMemoryContext
- * at the end, when no further errors are possible. Also, we don't make this
- * context the current context except in very brief code sections, out of fear
- * that some of our callees allocate memory on their own which would be leaked
- * permanently.
- */
-static void
-RelationBuildPartitionKey(Relation relation)
-{
- Form_pg_partitioned_table form;
- HeapTuple tuple;
- bool isnull;
- int i;
- PartitionKey key;
- AttrNumber *attrs;
- oidvector *opclass;
- oidvector *collation;
- ListCell *partexprs_item;
- Datum datum;
- MemoryContext partkeycxt,
- oldcxt;
- int16 procnum;
-
- tuple = SearchSysCache1(PARTRELID,
- ObjectIdGetDatum(RelationGetRelid(relation)));
-
- /*
- * The following happens when we have created our pg_class entry but not
- * the pg_partitioned_table entry yet.
- */
- if (!HeapTupleIsValid(tuple))
- return;
-
- partkeycxt = AllocSetContextCreate(CurTransactionContext,
- "partition key",
- ALLOCSET_SMALL_SIZES);
- MemoryContextCopyAndSetIdentifier(partkeycxt,
- RelationGetRelationName(relation));
-
- key = (PartitionKey) MemoryContextAllocZero(partkeycxt,
- sizeof(PartitionKeyData));
-
- /* Fixed-length attributes */
- form = (Form_pg_partitioned_table) GETSTRUCT(tuple);
- key->strategy = form->partstrat;
- key->partnatts = form->partnatts;
-
- /*
- * We can rely on the first variable-length attribute being mapped to the
- * relevant field of the catalog's C struct, because all previous
- * attributes are non-nullable and fixed-length.
- */
- attrs = form->partattrs.values;
-
- /* But use the hard way to retrieve further variable-length attributes */
- /* Operator class */
- datum = SysCacheGetAttr(PARTRELID, tuple,
- Anum_pg_partitioned_table_partclass, &isnull);
- Assert(!isnull);
- opclass = (oidvector *) DatumGetPointer(datum);
-
- /* Collation */
- datum = SysCacheGetAttr(PARTRELID, tuple,
- Anum_pg_partitioned_table_partcollation, &isnull);
- Assert(!isnull);
- collation = (oidvector *) DatumGetPointer(datum);
-
- /* Expressions */
- datum = SysCacheGetAttr(PARTRELID, tuple,
- Anum_pg_partitioned_table_partexprs, &isnull);
- if (!isnull)
- {
- char *exprString;
- Node *expr;
-
- exprString = TextDatumGetCString(datum);
- expr = stringToNode(exprString);
- pfree(exprString);
-
- /*
- * Run the expressions through const-simplification since the planner
- * will be comparing them to similarly-processed qual clause operands,
- * and may fail to detect valid matches without this step; fix
- * opfuncids while at it. We don't need to bother with
- * canonicalize_qual() though, because partition expressions should be
- * in canonical form already (ie, no need for OR-merging or constant
- * elimination).
- */
- expr = eval_const_expressions(NULL, expr);
- fix_opfuncids(expr);
-
- oldcxt = MemoryContextSwitchTo(partkeycxt);
- key->partexprs = (List *) copyObject(expr);
- MemoryContextSwitchTo(oldcxt);
- }
-
- oldcxt = MemoryContextSwitchTo(partkeycxt);
- key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber));
- key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid));
- key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid));
- key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo));
-
- key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid));
-
- /* Gather type and collation info as well */
- key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid));
- key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32));
- key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16));
- key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool));
- key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char));
- key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid));
- MemoryContextSwitchTo(oldcxt);
-
- /* determine support function number to search for */
- procnum = (key->strategy == PARTITION_STRATEGY_HASH) ?
- HASHEXTENDED_PROC : BTORDER_PROC;
-
- /* Copy partattrs and fill other per-attribute info */
- memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16));
- partexprs_item = list_head(key->partexprs);
- for (i = 0; i < key->partnatts; i++)
- {
- AttrNumber attno = key->partattrs[i];
- HeapTuple opclasstup;
- Form_pg_opclass opclassform;
- Oid funcid;
-
- /* Collect opfamily information */
- opclasstup = SearchSysCache1(CLAOID,
- ObjectIdGetDatum(opclass->values[i]));
- if (!HeapTupleIsValid(opclasstup))
- elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]);
-
- opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup);
- key->partopfamily[i] = opclassform->opcfamily;
- key->partopcintype[i] = opclassform->opcintype;
-
- /* Get a support function for the specified opfamily and datatypes */
- funcid = get_opfamily_proc(opclassform->opcfamily,
- opclassform->opcintype,
- opclassform->opcintype,
- procnum);
- if (!OidIsValid(funcid))
- ereport(ERROR,
- (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
- errmsg("operator class \"%s\" of access method %s is missing support function %d for type %s",
- NameStr(opclassform->opcname),
- (key->strategy == PARTITION_STRATEGY_HASH) ?
- "hash" : "btree",
- procnum,
- format_type_be(opclassform->opcintype))));
-
- fmgr_info_cxt(funcid, &key->partsupfunc[i], partkeycxt);
-
- /* Collation */
- key->partcollation[i] = collation->values[i];
-
- /* Collect type information */
- if (attno != 0)
- {
- Form_pg_attribute att = TupleDescAttr(relation->rd_att, attno - 1);
-
- key->parttypid[i] = att->atttypid;
- key->parttypmod[i] = att->atttypmod;
- key->parttypcoll[i] = att->attcollation;
- }
- else
- {
- if (partexprs_item == NULL)
- elog(ERROR, "wrong number of partition key expressions");
-
- key->parttypid[i] = exprType(lfirst(partexprs_item));
- key->parttypmod[i] = exprTypmod(lfirst(partexprs_item));
- key->parttypcoll[i] = exprCollation(lfirst(partexprs_item));
-
- partexprs_item = lnext(partexprs_item);
- }
- get_typlenbyvalalign(key->parttypid[i],
- &key->parttyplen[i],
- &key->parttypbyval[i],
- &key->parttypalign[i]);
-
- ReleaseSysCache(opclasstup);
- }
-
- ReleaseSysCache(tuple);
-
- /*
- * Success --- reparent our context and make the relcache point to the
- * newly constructed key
- */
- MemoryContextSetParent(partkeycxt, CacheMemoryContext);
- relation->rd_partkeycxt = partkeycxt;
- relation->rd_partkey = key;
-}
-
-/*
* equalRuleLocks
*
* Determine whether two RuleLocks are equivalent
diff --git a/src/backend/utils/misc/pg_controldata.c b/src/backend/utils/misc/pg_controldata.c
index 8ab7d1337fa..3fc8b6a8a84 100644
--- a/src/backend/utils/misc/pg_controldata.c
+++ b/src/backend/utils/misc/pg_controldata.c
@@ -15,13 +15,14 @@
#include "postgres.h"
-#include "funcapi.h"
-#include "miscadmin.h"
#include "access/htup_details.h"
#include "access/xlog_internal.h"
+#include "access/xlog.h"
#include "catalog/pg_control.h"
#include "catalog/pg_type.h"
#include "common/controldata_utils.h"
+#include "funcapi.h"
+#include "miscadmin.h"
#include "utils/builtins.h"
#include "utils/pg_lsn.h"
#include "utils/timestamp.h"