diff options
Diffstat (limited to 'src/backend/executor')
-rw-r--r-- | src/backend/executor/execPartition.c | 174 | ||||
-rw-r--r-- | src/backend/executor/nodeLockRows.c | 161 |
2 files changed, 258 insertions, 77 deletions
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 615bd809735..c22c9ac0966 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -176,8 +176,9 @@ static void FormPartitionKeyDatum(PartitionDispatch pd, EState *estate, Datum *values, bool *isnull); -static int get_partition_for_tuple(PartitionDispatch pd, Datum *values, - bool *isnull); +static int get_partition_for_tuple(PartitionKey key, + PartitionDesc partdesc, + Datum *values, bool *isnull); static char *ExecBuildSlotPartitionKeyDescription(Relation rel, Datum *values, bool *isnull, @@ -318,7 +319,9 @@ ExecFindPartition(ModifyTableState *mtstate, * these values, error out. */ if (partdesc->nparts == 0 || - (partidx = get_partition_for_tuple(dispatch, values, isnull)) < 0) + (partidx = get_partition_for_tuple(dispatch->key, + dispatch->partdesc, + values, isnull)) < 0) { char *val_desc; @@ -1341,12 +1344,12 @@ FormPartitionKeyDatum(PartitionDispatch pd, * found or -1 if none found. */ static int -get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull) +get_partition_for_tuple(PartitionKey key, + PartitionDesc partdesc, + Datum *values, bool *isnull) { int bound_offset; int part_index = -1; - PartitionKey key = pd->key; - PartitionDesc partdesc = pd->partdesc; PartitionBoundInfo boundinfo = partdesc->boundinfo; /* Route as appropriate based on partitioning strategy. */ @@ -1439,6 +1442,165 @@ get_partition_for_tuple(PartitionDispatch pd, Datum *values, bool *isnull) } /* + * ExecGetLeafPartitionForKey + * Finds the leaf partition of partitioned table 'root_rel' that would + * contain the specified key tuple. + * + * A subset of the table's columns (including all of the partition key columns) + * must be specified: + * - 'key_natts' indicats the number of columns contained in the key + * - 'key_attnums' indicates their attribute numbers as defined in 'root_rel' + * - 'key_vals' and 'key_nulls' specify the key tuple + * + * Returns the leaf partition, locked with the given lockmode, or NULL if + * there isn't one. Caller is responsibly for closing it. All intermediate + * partitions are also locked with the same lockmode. Caller must have locked + * the root already. + * + * In addition, the OID of the index of a unique constraint on the root table + * must be given as 'root_idxoid'; *leaf_idxoid will be set to the OID of the + * corresponding index on the returned leaf partition. (This can be used by + * caller to search for a tuple matching the key in the leaf partition.) + * + * This works because the unique key defined on the root relation is required + * to contain the partition key columns of all of the ancestors that lead up to + * a given leaf partition. + */ +Relation +ExecGetLeafPartitionForKey(Relation root_rel, int key_natts, + const AttrNumber *key_attnums, + Datum *key_vals, char *key_nulls, + Oid root_idxoid, int lockmode, + Oid *leaf_idxoid) +{ + Relation found_leafpart = NULL; + Relation rel = root_rel; + Oid constr_idxoid = root_idxoid; + PartitionDirectory partdir; + + Assert(root_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE); + + *leaf_idxoid = InvalidOid; + + partdir = CreatePartitionDirectory(CurrentMemoryContext, true); + + /* + * Descend through partitioned parents to find the leaf partition that + * would accept a row with the provided key values, starting with the root + * parent. + */ + for (;;) + { + PartitionKey partkey = RelationGetPartitionKey(rel); + PartitionDesc partdesc; + Datum partkey_vals[PARTITION_MAX_KEYS]; + bool partkey_isnull[PARTITION_MAX_KEYS]; + AttrNumber *root_partattrs = partkey->partattrs; + int found_att; + int partidx; + Oid partoid; + + CHECK_FOR_INTERRUPTS(); + + /* + * Collect partition key values from the unique key. + * + * Because we only have the root table's copy of pk_attnums, must map + * any non-root table's partition key attribute numbers to the root + * table's. + */ + if (rel != root_rel) + { + /* + * map->attnums will contain root table attribute numbers for each + * attribute of the current partitioned relation. + */ + AttrMap *map; + + map = build_attrmap_by_name_if_req(RelationGetDescr(root_rel), + RelationGetDescr(rel)); + if (map) + { + root_partattrs = palloc(partkey->partnatts * + sizeof(AttrNumber)); + for (int att = 0; att < partkey->partnatts; att++) + { + AttrNumber partattno = partkey->partattrs[att]; + + root_partattrs[att] = map->attnums[partattno - 1]; + } + + free_attrmap(map); + } + } + + /* + * Map the values/isnulls to match the partition description, as + * necessary. + * + * (Referenced key specification does not allow expressions, so there + * would not be expressions in the partition keys either.) + */ + Assert(partkey->partexprs == NIL); + found_att = 0; + for (int keyatt = 0; keyatt < key_natts; keyatt++) + { + for (int att = 0; att < partkey->partnatts; att++) + { + if (root_partattrs[att] == key_attnums[keyatt]) + { + partkey_vals[found_att] = key_vals[keyatt]; + partkey_isnull[found_att] = (key_nulls[keyatt] == 'n'); + found_att++; + break; + } + } + } + /* We had better have found values for all partition keys */ + Assert(found_att == partkey->partnatts); + + if (root_partattrs != partkey->partattrs) + pfree(root_partattrs); + + /* Get the PartitionDesc using the partition directory machinery. */ + partdesc = PartitionDirectoryLookup(partdir, rel); + if (partdesc->nparts == 0) + break; + + /* Find the partition for the key. */ + partidx = get_partition_for_tuple(partkey, partdesc, + partkey_vals, partkey_isnull); + Assert(partidx < 0 || partidx < partdesc->nparts); + + /* close the previous parent if any, but keep lock */ + if (rel != root_rel) + table_close(rel, NoLock); + + /* No partition found. */ + if (partidx < 0) + break; + + partoid = partdesc->oids[partidx]; + rel = table_open(partoid, lockmode); + constr_idxoid = index_get_partition(rel, constr_idxoid); + + /* + * We're done if the partition is a leaf, else find its partition in + * the next iteration. + */ + if (partdesc->is_leaf[partidx]) + { + *leaf_idxoid = constr_idxoid; + found_leafpart = rel; + break; + } + } + + DestroyPartitionDirectory(partdir); + return found_leafpart; +} + +/* * ExecBuildSlotPartitionKeyDescription * * This works very much like BuildIndexValueDescription() and is currently diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c index 1a9dab25dd6..bbccafb2cfd 100644 --- a/src/backend/executor/nodeLockRows.c +++ b/src/backend/executor/nodeLockRows.c @@ -79,10 +79,7 @@ lnext: Datum datum; bool isNull; ItemPointerData tid; - TM_FailureData tmfd; LockTupleMode lockmode; - int lockflags = 0; - TM_Result test; TupleTableSlot *markSlot; /* clear any leftover test tuple for this rel */ @@ -179,74 +176,11 @@ lnext: break; } - lockflags = TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS; - if (!IsolationUsesXactSnapshot()) - lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION; - - test = table_tuple_lock(erm->relation, &tid, estate->es_snapshot, - markSlot, estate->es_output_cid, - lockmode, erm->waitPolicy, - lockflags, - &tmfd); - - switch (test) - { - case TM_WouldBlock: - /* couldn't lock tuple in SKIP LOCKED mode */ - goto lnext; - - case TM_SelfModified: - - /* - * The target tuple was already updated or deleted by the - * current command, or by a later command in the current - * transaction. We *must* ignore the tuple in the former - * case, so as to avoid the "Halloween problem" of repeated - * update attempts. In the latter case it might be sensible - * to fetch the updated tuple instead, but doing so would - * require changing heap_update and heap_delete to not - * complain about updating "invisible" tuples, which seems - * pretty scary (table_tuple_lock will not complain, but few - * callers expect TM_Invisible, and we're not one of them). So - * for now, treat the tuple as deleted and do not process. - */ - goto lnext; - - case TM_Ok: - - /* - * Got the lock successfully, the locked tuple saved in - * markSlot for, if needed, EvalPlanQual testing below. - */ - if (tmfd.traversed) - epq_needed = true; - break; - - case TM_Updated: - if (IsolationUsesXactSnapshot()) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("could not serialize access due to concurrent update"))); - elog(ERROR, "unexpected table_tuple_lock status: %u", - test); - break; - - case TM_Deleted: - if (IsolationUsesXactSnapshot()) - ereport(ERROR, - (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), - errmsg("could not serialize access due to concurrent update"))); - /* tuple was deleted so don't return it */ - goto lnext; - - case TM_Invisible: - elog(ERROR, "attempted to lock invisible tuple"); - break; - - default: - elog(ERROR, "unrecognized table_tuple_lock status: %u", - test); - } + /* skip tuple if it couldn't be locked */ + if (!ExecLockTableTuple(erm->relation, &tid, markSlot, + estate->es_snapshot, estate->es_output_cid, + lockmode, erm->waitPolicy, &epq_needed)) + goto lnext; /* Remember locked tuple's TID for EPQ testing and WHERE CURRENT OF */ erm->curCtid = tid; @@ -281,6 +215,91 @@ lnext: return slot; } +/* + * ExecLockTableTuple + * Locks tuple with the specified TID in lockmode following given wait + * policy + * + * Returns true if the tuple was successfully locked. Locked tuple is loaded + * into provided slot. + */ +bool +ExecLockTableTuple(Relation relation, ItemPointer tid, TupleTableSlot *slot, + Snapshot snapshot, CommandId cid, + LockTupleMode lockmode, LockWaitPolicy waitPolicy, + bool *epq_needed) +{ + TM_FailureData tmfd; + int lockflags = TUPLE_LOCK_FLAG_LOCK_UPDATE_IN_PROGRESS; + TM_Result test; + + if (!IsolationUsesXactSnapshot()) + lockflags |= TUPLE_LOCK_FLAG_FIND_LAST_VERSION; + + test = table_tuple_lock(relation, tid, snapshot, slot, cid, lockmode, + waitPolicy, lockflags, &tmfd); + + switch (test) + { + case TM_WouldBlock: + /* couldn't lock tuple in SKIP LOCKED mode */ + return false; + + case TM_SelfModified: + + /* + * The target tuple was already updated or deleted by the current + * command, or by a later command in the current transaction. We + * *must* ignore the tuple in the former case, so as to avoid the + * "Halloween problem" of repeated update attempts. In the latter + * case it might be sensible to fetch the updated tuple instead, + * but doing so would require changing heap_update and heap_delete + * to not complain about updating "invisible" tuples, which seems + * pretty scary (table_tuple_lock will not complain, but few + * callers expect TM_Invisible, and we're not one of them). So for + * now, treat the tuple as deleted and do not process. + */ + return false; + + case TM_Ok: + + /* + * Got the lock successfully, the locked tuple saved in slot for + * EvalPlanQual, if asked by the caller. + */ + if (tmfd.traversed && epq_needed) + *epq_needed = true; + break; + + case TM_Updated: + if (IsolationUsesXactSnapshot()) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent update"))); + elog(ERROR, "unexpected table_tuple_lock status: %u", + test); + break; + + case TM_Deleted: + if (IsolationUsesXactSnapshot()) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent update"))); + /* tuple was deleted so don't return it */ + return false; + + case TM_Invisible: + elog(ERROR, "attempted to lock invisible tuple"); + return false; + + default: + elog(ERROR, "unrecognized table_tuple_lock status: %u", test); + return false; + } + + return true; +} + /* ---------------------------------------------------------------- * ExecInitLockRows * |