diff options
Diffstat (limited to 'src/backend/executor')
-rw-r--r-- | src/backend/executor/execIndexing.c | 417 | ||||
-rw-r--r-- | src/backend/executor/execMain.c | 53 | ||||
-rw-r--r-- | src/backend/executor/nodeLockRows.c | 12 | ||||
-rw-r--r-- | src/backend/executor/nodeModifyTable.c | 459 |
4 files changed, 859 insertions, 82 deletions
diff --git a/src/backend/executor/execIndexing.c b/src/backend/executor/execIndexing.c index a697682b20e..e7cf72b3875 100644 --- a/src/backend/executor/execIndexing.c +++ b/src/backend/executor/execIndexing.c @@ -50,6 +50,50 @@ * to the caller. The caller must re-check them later by calling * check_exclusion_constraint(). * + * Speculative insertion + * --------------------- + * + * Speculative insertion is a is a two-phase mechanism, used to implement + * INSERT ... ON CONFLICT DO UPDATE/NOTHING. The tuple is first inserted + * to the heap and update the indexes as usual, but if a constraint is + * violated, we can still back out the insertion without aborting the whole + * transaction. In an INSERT ... ON CONFLICT statement, if a conflict is + * detected, the inserted tuple is backed out and the ON CONFLICT action is + * executed instead. + * + * Insertion to a unique index works as usual: the index AM checks for + * duplicate keys atomically with the insertion. But instead of throwing + * an error on a conflict, the speculatively inserted heap tuple is backed + * out. + * + * Exclusion constraints are slightly more complicated. As mentioned + * earlier, there is a risk of deadlock when two backends insert the same + * key concurrently. That was not a problem for regular insertions, when + * one of the transactions has to be aborted anyway, but with a speculative + * insertion we cannot let a deadlock happen, because we only want to back + * out the speculatively inserted tuple on conflict, not abort the whole + * transaction. + * + * When a backend detects that the speculative insertion conflicts with + * another in-progress tuple, it has two options: + * + * 1. back out the speculatively inserted tuple, then wait for the other + * transaction, and retry. Or, + * 2. wait for the other transaction, with the speculatively inserted tuple + * still in place. + * + * If two backends insert at the same time, and both try to wait for each + * other, they will deadlock. So option 2 is not acceptable. Option 1 + * avoids the deadlock, but it is prone to a livelock instead. Both + * transactions will wake up immediately as the other transaction backs + * out. Then they both retry, and conflict with each other again, lather, + * rinse, repeat. + * + * To avoid the livelock, one of the backends must back out first, and then + * wait, while the other one waits without backing out. It doesn't matter + * which one backs out, so we employ an arbitrary rule that the transaction + * with the higher XID backs out. + * * * Portions Copyright (c) 1996-2015, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -63,12 +107,30 @@ #include "postgres.h" #include "access/relscan.h" +#include "access/xact.h" #include "catalog/index.h" #include "executor/executor.h" #include "nodes/nodeFuncs.h" #include "storage/lmgr.h" #include "utils/tqual.h" +/* waitMode argument to check_exclusion_or_unique_constraint() */ +typedef enum +{ + CEOUC_WAIT, + CEOUC_NOWAIT, + CEOUC_LIVELOCK_PREVENTING_WAIT, +} CEOUC_WAIT_MODE; + +static bool check_exclusion_or_unique_constraint(Relation heap, Relation index, + IndexInfo *indexInfo, + ItemPointer tupleid, + Datum *values, bool *isnull, + EState *estate, bool newIndex, + CEOUC_WAIT_MODE waitMode, + bool errorOK, + ItemPointer conflictTid); + static bool index_recheck_constraint(Relation index, Oid *constr_procs, Datum *existing_values, bool *existing_isnull, Datum *new_values); @@ -84,7 +146,7 @@ static bool index_recheck_constraint(Relation index, Oid *constr_procs, * ---------------------------------------------------------------- */ void -ExecOpenIndices(ResultRelInfo *resultRelInfo) +ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative) { Relation resultRelation = resultRelInfo->ri_RelationDesc; List *indexoidlist; @@ -137,6 +199,13 @@ ExecOpenIndices(ResultRelInfo *resultRelInfo) /* extract index key information from the index's pg_index info */ ii = BuildIndexInfo(indexDesc); + /* + * If the indexes are to be used for speculative insertion, add extra + * information required by unique index entries. + */ + if (speculative && ii->ii_Unique) + BuildSpeculativeIndexInfo(indexDesc, ii); + relationDescs[i] = indexDesc; indexInfoArray[i] = ii; i++; @@ -186,7 +255,9 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) * Unique and exclusion constraints are enforced at the same * time. This returns a list of index OIDs for any unique or * exclusion constraints that are deferred and that had - * potential (unconfirmed) conflicts. + * potential (unconfirmed) conflicts. (if noDupErr == true, + * the same is done for non-deferred constraints, but report + * if conflict was speculative or deferred conflict to caller) * * CAUTION: this must not be called for a HOT update. * We can't defend against that here for lack of info. @@ -196,7 +267,10 @@ ExecCloseIndices(ResultRelInfo *resultRelInfo) List * ExecInsertIndexTuples(TupleTableSlot *slot, ItemPointer tupleid, - EState *estate) + EState *estate, + bool noDupErr, + bool *specConflict, + List *arbiterIndexes) { List *result = NIL; ResultRelInfo *resultRelInfo; @@ -236,12 +310,17 @@ ExecInsertIndexTuples(TupleTableSlot *slot, IndexInfo *indexInfo; IndexUniqueCheck checkUnique; bool satisfiesConstraint; + bool arbiter; if (indexRelation == NULL) continue; indexInfo = indexInfoArray[i]; + /* Record if speculative insertion arbiter */ + arbiter = list_member_oid(arbiterIndexes, + indexRelation->rd_index->indexrelid); + /* If the index is marked as read-only, ignore it */ if (!indexInfo->ii_ReadyForInserts) continue; @@ -288,9 +367,14 @@ ExecInsertIndexTuples(TupleTableSlot *slot, * For a deferrable unique index, we tell the index AM to just detect * possible non-uniqueness, and we add the index OID to the result * list if further checking is needed. + * + * For a speculative insertion (used by INSERT ... ON CONFLICT), do + * the same as for a deferrable unique index. */ if (!indexRelation->rd_index->indisunique) checkUnique = UNIQUE_CHECK_NO; + else if (noDupErr && (arbiterIndexes == NIL || arbiter)) + checkUnique = UNIQUE_CHECK_PARTIAL; else if (indexRelation->rd_index->indimmediate) checkUnique = UNIQUE_CHECK_YES; else @@ -308,8 +392,11 @@ ExecInsertIndexTuples(TupleTableSlot *slot, * If the index has an associated exclusion constraint, check that. * This is simpler than the process for uniqueness checks since we * always insert first and then check. If the constraint is deferred, - * we check now anyway, but don't throw error on violation; instead - * we'll queue a recheck event. + * we check now anyway, but don't throw error on violation or wait for + * a conclusive outcome from a concurrent insertion; instead we'll + * queue a recheck event. Similarly, noDupErr callers (speculative + * inserters) will recheck later, and wait for a conclusive outcome + * then. * * An index for an exclusion constraint can't also be UNIQUE (not an * essential property, we just don't allow it in the grammar), so no @@ -317,13 +404,31 @@ ExecInsertIndexTuples(TupleTableSlot *slot, */ if (indexInfo->ii_ExclusionOps != NULL) { - bool errorOK = !indexRelation->rd_index->indimmediate; + bool violationOK; + bool waitMode; + + if (noDupErr) + { + violationOK = true; + waitMode = CEOUC_LIVELOCK_PREVENTING_WAIT; + } + else if (!indexRelation->rd_index->indimmediate) + { + violationOK = true; + waitMode = CEOUC_NOWAIT; + } + else + { + violationOK = false; + waitMode = CEOUC_WAIT; + } satisfiesConstraint = - check_exclusion_constraint(heapRelation, - indexRelation, indexInfo, - tupleid, values, isnull, - estate, false, errorOK); + check_exclusion_or_unique_constraint(heapRelation, + indexRelation, indexInfo, + tupleid, values, isnull, + estate, false, + waitMode, violationOK, NULL); } if ((checkUnique == UNIQUE_CHECK_PARTIAL || @@ -333,46 +438,213 @@ ExecInsertIndexTuples(TupleTableSlot *slot, /* * The tuple potentially violates the uniqueness or exclusion * constraint, so make a note of the index so that we can re-check - * it later. + * it later. Speculative inserters are told if there was a + * speculative conflict, since that always requires a restart. */ result = lappend_oid(result, RelationGetRelid(indexRelation)); + if (indexRelation->rd_index->indimmediate && specConflict) + *specConflict = true; } } return result; } +/* ---------------------------------------------------------------- + * ExecCheckIndexConstraints + * + * This routine checks if a tuple violates any unique or + * exclusion constraints. Returns true if there is no no conflict. + * Otherwise returns false, and the TID of the conflicting + * tuple is returned in *conflictTid. + * + * If 'arbiterIndexes' is given, only those indexes are checked. + * NIL means all indexes. + * + * Note that this doesn't lock the values in any way, so it's + * possible that a conflicting tuple is inserted immediately + * after this returns. But this can be used for a pre-check + * before insertion. + * ---------------------------------------------------------------- + */ +bool +ExecCheckIndexConstraints(TupleTableSlot *slot, + EState *estate, ItemPointer conflictTid, + List *arbiterIndexes) +{ + ResultRelInfo *resultRelInfo; + int i; + int numIndices; + RelationPtr relationDescs; + Relation heapRelation; + IndexInfo **indexInfoArray; + ExprContext *econtext; + Datum values[INDEX_MAX_KEYS]; + bool isnull[INDEX_MAX_KEYS]; + ItemPointerData invalidItemPtr; + bool checkedIndex = false; + + ItemPointerSetInvalid(conflictTid); + ItemPointerSetInvalid(&invalidItemPtr); + + /* + * Get information from the result relation info structure. + */ + resultRelInfo = estate->es_result_relation_info; + numIndices = resultRelInfo->ri_NumIndices; + relationDescs = resultRelInfo->ri_IndexRelationDescs; + indexInfoArray = resultRelInfo->ri_IndexRelationInfo; + heapRelation = resultRelInfo->ri_RelationDesc; + + /* + * We will use the EState's per-tuple context for evaluating predicates + * and index expressions (creating it if it's not already there). + */ + econtext = GetPerTupleExprContext(estate); + + /* Arrange for econtext's scan tuple to be the tuple under test */ + econtext->ecxt_scantuple = slot; + + /* + * For each index, form index tuple and check if it satisfies the + * constraint. + */ + for (i = 0; i < numIndices; i++) + { + Relation indexRelation = relationDescs[i]; + IndexInfo *indexInfo; + bool satisfiesConstraint; + + if (indexRelation == NULL) + continue; + + indexInfo = indexInfoArray[i]; + + if (!indexInfo->ii_Unique && !indexInfo->ii_ExclusionOps) + continue; + + /* If the index is marked as read-only, ignore it */ + if (!indexInfo->ii_ReadyForInserts) + continue; + + /* When specific arbiter indexes requested, only examine them */ + if (arbiterIndexes != NIL && + !list_member_oid(arbiterIndexes, + indexRelation->rd_index->indexrelid)) + continue; + + if (!indexRelation->rd_index->indimmediate) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("ON CONFLICT does not support deferred unique constraints/exclusion constraints as arbiters"), + errtableconstraint(heapRelation, + RelationGetRelationName(indexRelation)))); + + checkedIndex = true; + + /* Check for partial index */ + if (indexInfo->ii_Predicate != NIL) + { + List *predicate; + + /* + * If predicate state not set up yet, create it (in the estate's + * per-query context) + */ + predicate = indexInfo->ii_PredicateState; + if (predicate == NIL) + { + predicate = (List *) + ExecPrepareExpr((Expr *) indexInfo->ii_Predicate, + estate); + indexInfo->ii_PredicateState = predicate; + } + + /* Skip this index-update if the predicate isn't satisfied */ + if (!ExecQual(predicate, econtext, false)) + continue; + } + + /* + * FormIndexDatum fills in its values and isnull parameters with the + * appropriate values for the column(s) of the index. + */ + FormIndexDatum(indexInfo, + slot, + estate, + values, + isnull); + + satisfiesConstraint = + check_exclusion_or_unique_constraint(heapRelation, indexRelation, + indexInfo, &invalidItemPtr, + values, isnull, estate, false, + CEOUC_WAIT, true, + conflictTid); + if (!satisfiesConstraint) + return false; + } + + if (arbiterIndexes != NIL && !checkedIndex) + elog(ERROR, "unexpected failure to find arbiter index"); + + return true; +} + /* - * Check for violation of an exclusion constraint + * Check for violation of an exclusion or unique constraint * * heap: the table containing the new tuple - * index: the index supporting the exclusion constraint + * index: the index supporting the constraint * indexInfo: info about the index, including the exclusion properties - * tupleid: heap TID of the new tuple we have just inserted + * tupleid: heap TID of the new tuple we have just inserted (invalid if we + * haven't inserted a new tuple yet) * values, isnull: the *index* column values computed for the new tuple * estate: an EState we can do evaluation in * newIndex: if true, we are trying to build a new index (this affects * only the wording of error messages) - * errorOK: if true, don't throw error for violation + * waitMode: whether to wait for concurrent inserters/deleters + * violationOK: if true, don't throw error for violation + * conflictTid: if not-NULL, the TID of the conflicting tuple is returned here * * Returns true if OK, false if actual or potential violation * - * When errorOK is true, we report violation without waiting to see if any - * concurrent transaction has committed or not; so the violation is only - * potential, and the caller must recheck sometime later. This behavior - * is convenient for deferred exclusion checks; we need not bother queuing - * a deferred event if there is definitely no conflict at insertion time. + * 'waitMode' determines what happens if a conflict is detected with a tuple + * that was inserted or deleted by a transaction that's still running. + * CEOUC_WAIT means that we wait for the transaction to commit, before + * throwing an error or returning. CEOUC_NOWAIT means that we report the + * violation immediately; so the violation is only potential, and the caller + * must recheck sometime later. This behavior is convenient for deferred + * exclusion checks; we need not bother queuing a deferred event if there is + * definitely no conflict at insertion time. + * + * CEOUC_LIVELOCK_PREVENTING_WAIT is like CEOUC_NOWAIT, but we will sometimes + * wait anyway, to prevent livelocking if two transactions try inserting at + * the same time. This is used with speculative insertions, for INSERT ON + * CONFLICT statements. (See notes in file header) * - * When errorOK is false, we'll throw error on violation, so a false result - * is impossible. + * If violationOK is true, we just report the potential or actual violation to + * the caller by returning 'false'. Otherwise we throw a descriptive error + * message here. When violationOK is false, a false result is impossible. + * + * Note: The indexam is normally responsible for checking unique constraints, + * so this normally only needs to be used for exclusion constraints. But this + * function is also called when doing a "pre-check" for conflicts on a unique + * constraint, when doing speculative insertion. Caller may use the returned + * conflict TID to take further steps. */ -bool -check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo, - ItemPointer tupleid, Datum *values, bool *isnull, - EState *estate, bool newIndex, bool errorOK) +static bool +check_exclusion_or_unique_constraint(Relation heap, Relation index, + IndexInfo *indexInfo, + ItemPointer tupleid, + Datum *values, bool *isnull, + EState *estate, bool newIndex, + CEOUC_WAIT_MODE waitMode, + bool violationOK, + ItemPointer conflictTid) { - Oid *constr_procs = indexInfo->ii_ExclusionProcs; - uint16 *constr_strats = indexInfo->ii_ExclusionStrats; + Oid *constr_procs; + uint16 *constr_strats; Oid *index_collations = index->rd_indcollation; int index_natts = index->rd_index->indnatts; IndexScanDesc index_scan; @@ -386,6 +658,17 @@ check_exclusion_constraint(Relation heap, Relation index, IndexInfo *indexInfo, TupleTableSlot *existing_slot; TupleTableSlot *save_scantuple; + if (indexInfo->ii_ExclusionOps) + { + constr_procs = indexInfo->ii_ExclusionProcs; + constr_strats = indexInfo->ii_ExclusionStrats; + } + else + { + constr_procs = indexInfo->ii_UniqueProcs; + constr_strats = indexInfo->ii_UniqueStrats; + } + /* * If any of the input values are NULL, the constraint check is assumed to * pass (i.e., we assume the operators are strict). @@ -450,7 +733,8 @@ retry: /* * Ignore the entry for the tuple we're trying to check. */ - if (ItemPointerEquals(tupleid, &tup->t_self)) + if (ItemPointerIsValid(tupleid) && + ItemPointerEquals(tupleid, &tup->t_self)) { if (found_self) /* should not happen */ elog(ERROR, "found self tuple multiple times in index \"%s\"", @@ -480,39 +764,47 @@ retry: } /* - * At this point we have either a conflict or a potential conflict. If - * we're not supposed to raise error, just return the fact of the - * potential conflict without waiting to see if it's real. - */ - if (errorOK) - { - conflict = true; - break; - } - - /* + * At this point we have either a conflict or a potential conflict. + * * If an in-progress transaction is affecting the visibility of this - * tuple, we need to wait for it to complete and then recheck. For - * simplicity we do rechecking by just restarting the whole scan --- - * this case probably doesn't happen often enough to be worth trying - * harder, and anyway we don't want to hold any index internal locks - * while waiting. + * tuple, we need to wait for it to complete and then recheck (unless + * the caller requested not to). For simplicity we do rechecking by + * just restarting the whole scan --- this case probably doesn't + * happen often enough to be worth trying harder, and anyway we don't + * want to hold any index internal locks while waiting. */ xwait = TransactionIdIsValid(DirtySnapshot.xmin) ? DirtySnapshot.xmin : DirtySnapshot.xmax; - if (TransactionIdIsValid(xwait)) + if (TransactionIdIsValid(xwait) && + (waitMode == CEOUC_WAIT || + (waitMode == CEOUC_LIVELOCK_PREVENTING_WAIT && + DirtySnapshot.speculativeToken && + TransactionIdPrecedes(GetCurrentTransactionId(), xwait)))) { ctid_wait = tup->t_data->t_ctid; index_endscan(index_scan); - XactLockTableWait(xwait, heap, &ctid_wait, - XLTW_RecheckExclusionConstr); + if (DirtySnapshot.speculativeToken) + SpeculativeInsertionWait(DirtySnapshot.xmin, + DirtySnapshot.speculativeToken); + else + XactLockTableWait(xwait, heap, &ctid_wait, + XLTW_RecheckExclusionConstr); goto retry; } /* - * We have a definite conflict. Report it. + * We have a definite conflict (or a potential one, but the caller + * didn't want to wait). Return it to caller, or report it. */ + if (violationOK) + { + conflict = true; + if (conflictTid) + *conflictTid = tup->t_self; + break; + } + error_new = BuildIndexValueDescription(index, values, isnull); error_existing = BuildIndexValueDescription(index, existing_values, existing_isnull); @@ -544,10 +836,10 @@ retry: /* * Ordinarily, at this point the search should have found the originally - * inserted tuple, unless we exited the loop early because of conflict. - * However, it is possible to define exclusion constraints for which that - * wouldn't be true --- for instance, if the operator is <>. So we no - * longer complain if found_self is still false. + * inserted tuple (if any), unless we exited the loop early because of + * conflict. However, it is possible to define exclusion constraints for + * which that wouldn't be true --- for instance, if the operator is <>. + * So we no longer complain if found_self is still false. */ econtext->ecxt_scantuple = save_scantuple; @@ -558,6 +850,25 @@ retry: } /* + * Check for violation of an exclusion constraint + * + * This is a dumbed down version of check_exclusion_or_unique_constraint + * for external callers. They don't need all the special modes. + */ +void +check_exclusion_constraint(Relation heap, Relation index, + IndexInfo *indexInfo, + ItemPointer tupleid, + Datum *values, bool *isnull, + EState *estate, bool newIndex) +{ + (void) check_exclusion_or_unique_constraint(heap, index, indexInfo, tupleid, + values, isnull, + estate, newIndex, + CEOUC_WAIT, false, NULL); +} + +/* * Check existing tuple's index values to see if it really matches the * exclusion condition against the new_values. Returns true if conflict. */ diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 4272d9bc155..0dee9491788 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -1813,6 +1813,12 @@ ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo, errmsg("new row violates row level security policy for \"%s\"", wco->relname))); break; + case WCO_RLS_CONFLICT_CHECK: + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("new row violates row level security policy (USING expression) for \"%s\"", + wco->relname))); + break; default: elog(ERROR, "unrecognized WCO kind: %u", wco->kind); break; @@ -1973,6 +1979,31 @@ ExecBuildSlotValueDescription(Oid reloid, /* + * ExecUpdateLockMode -- find the appropriate UPDATE tuple lock mode for a + * given ResultRelInfo + */ +LockTupleMode +ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo) +{ + Bitmapset *keyCols; + Bitmapset *updatedCols; + + /* + * Compute lock mode to use. If columns that are part of the key have not + * been modified, then we can use a weaker lock, allowing for better + * concurrency. + */ + updatedCols = GetUpdatedColumns(relinfo, estate); + keyCols = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc, + INDEX_ATTR_BITMAP_KEY); + + if (bms_overlap(keyCols, updatedCols)) + return LockTupleExclusive; + + return LockTupleNoKeyExclusive; +} + +/* * ExecFindRowMark -- find the ExecRowMark struct for given rangetable index */ ExecRowMark * @@ -2186,8 +2217,9 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode, * recycled and reused for an unrelated tuple. This implies that * the latest version of the row was deleted, so we need do * nothing. (Should be safe to examine xmin without getting - * buffer's content lock, since xmin never changes in an existing - * tuple.) + * buffer's content lock. We assume reading a TransactionId to be + * atomic, and Xmin never changes in an existing tuple, except to + * invalid or frozen, and neither of those can match priorXmax.) */ if (!TransactionIdEquals(HeapTupleHeaderGetXmin(tuple.t_data), priorXmax)) @@ -2268,11 +2300,12 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode, * case, so as to avoid the "Halloween problem" of * repeated update attempts. In the latter case it might * be sensible to fetch the updated tuple instead, but - * doing so would require changing heap_lock_tuple as well - * as heap_update and heap_delete to not complain about - * updating "invisible" tuples, which seems pretty scary. - * So for now, treat the tuple as deleted and do not - * process. + * doing so would require changing heap_update and + * heap_delete to not complain about updating "invisible" + * tuples, which seems pretty scary (heap_lock_tuple will + * not complain, but few callers expect HeapTupleInvisible, + * and we're not one of them). So for now, treat the tuple + * as deleted and do not process. */ ReleaseBuffer(buffer); return NULL; @@ -2287,6 +2320,9 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode, ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); + + /* Should not encounter speculative tuple on recheck */ + Assert(!HeapTupleHeaderIsSpeculative(tuple.t_data)); if (!ItemPointerEquals(&hufd.ctid, &tuple.t_self)) { /* it was updated, so look at the updated version */ @@ -2302,6 +2338,9 @@ EvalPlanQualFetch(EState *estate, Relation relation, int lockmode, ReleaseBuffer(buffer); return NULL; + case HeapTupleInvisible: + elog(ERROR, "attempted to lock invisible tuple"); + default: ReleaseBuffer(buffer); elog(ERROR, "unrecognized heap_lock_tuple status: %u", diff --git a/src/backend/executor/nodeLockRows.c b/src/backend/executor/nodeLockRows.c index bb6df47a95d..5ae106c06ad 100644 --- a/src/backend/executor/nodeLockRows.c +++ b/src/backend/executor/nodeLockRows.c @@ -152,10 +152,11 @@ lnext: * case, so as to avoid the "Halloween problem" of repeated * update attempts. In the latter case it might be sensible * to fetch the updated tuple instead, but doing so would - * require changing heap_lock_tuple as well as heap_update and - * heap_delete to not complain about updating "invisible" - * tuples, which seems pretty scary. So for now, treat the - * tuple as deleted and do not process. + * require changing heap_update and heap_delete to not complain + * about updating "invisible" tuples, which seems pretty scary + * (heap_lock_tuple will not complain, but few callers expect + * HeapTupleInvisible, and we're not one of them). So for now, + * treat the tuple as deleted and do not process. */ goto lnext; @@ -228,6 +229,9 @@ lnext: /* Continue loop until we have all target tuples */ break; + case HeapTupleInvisible: + elog(ERROR, "attempted to lock invisible tuple"); + default: elog(ERROR, "unrecognized heap_lock_tuple status: %u", test); diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 31666edfa8a..34435c7e50a 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -46,12 +46,22 @@ #include "miscadmin.h" #include "nodes/nodeFuncs.h" #include "storage/bufmgr.h" +#include "storage/lmgr.h" #include "utils/builtins.h" #include "utils/memutils.h" #include "utils/rel.h" #include "utils/tqual.h" +static bool ExecOnConflictUpdate(ModifyTableState *mtstate, + ResultRelInfo *resultRelInfo, + ItemPointer conflictTid, + TupleTableSlot *planSlot, + TupleTableSlot *excludedSlot, + EState *estate, + bool canSetTag, + TupleTableSlot **returning); + /* * Verify that the tuples to be produced by INSERT or UPDATE match the * target relation's rowtype @@ -151,6 +161,51 @@ ExecProcessReturning(ProjectionInfo *projectReturning, return ExecProject(projectReturning, NULL); } +/* + * ExecCheckHeapTupleVisible -- verify heap tuple is visible + * + * It would not be consistent with guarantees of the higher isolation levels to + * proceed with avoiding insertion (taking speculative insertion's alternative + * path) on the basis of another tuple that is not visible to MVCC snapshot. + * Check for the need to raise a serialization failure, and do so as necessary. + */ +static void +ExecCheckHeapTupleVisible(EState *estate, + HeapTuple tuple, + Buffer buffer) +{ + if (!IsolationUsesXactSnapshot()) + return; + + if (!HeapTupleSatisfiesVisibility(tuple, estate->es_snapshot, buffer)) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent update"))); +} + +/* + * ExecCheckTIDVisible -- convenience variant of ExecCheckHeapTupleVisible() + */ +static void +ExecCheckTIDVisible(EState *estate, + ResultRelInfo *relinfo, + ItemPointer tid) +{ + Relation rel = relinfo->ri_RelationDesc; + Buffer buffer; + HeapTupleData tuple; + + /* Redundantly check isolation level */ + if (!IsolationUsesXactSnapshot()) + return; + + tuple.t_self = *tid; + if (!heap_fetch(rel, SnapshotAny, &tuple, &buffer, false, NULL)) + elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT"); + ExecCheckHeapTupleVisible(estate, &tuple, buffer); + ReleaseBuffer(buffer); +} + /* ---------------------------------------------------------------- * ExecInsert * @@ -161,8 +216,11 @@ ExecProcessReturning(ProjectionInfo *projectReturning, * ---------------------------------------------------------------- */ static TupleTableSlot * -ExecInsert(TupleTableSlot *slot, +ExecInsert(ModifyTableState *mtstate, + TupleTableSlot *slot, TupleTableSlot *planSlot, + List *arbiterIndexes, + OnConflictAction onconflict, EState *estate, bool canSetTag) { @@ -199,7 +257,15 @@ ExecInsert(TupleTableSlot *slot, if (resultRelationDesc->rd_rel->relhasoids) HeapTupleSetOid(tuple, InvalidOid); - /* BEFORE ROW INSERT Triggers */ + /* + * BEFORE ROW INSERT Triggers. + * + * Note: We fire BEFORE ROW TRIGGERS for every attempted insertion in an + * INSERT ... ON CONFLICT statement. We cannot check for constraint + * violations before firing these triggers, because they can change the + * values to insert. Also, they can run arbitrary user-defined code with + * side-effects that we can't cancel by just not inserting the tuple. + */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->trig_insert_before_row) { @@ -268,21 +334,132 @@ ExecInsert(TupleTableSlot *slot, if (resultRelationDesc->rd_att->constr) ExecConstraints(resultRelInfo, slot, estate); - /* - * insert the tuple - * - * Note: heap_insert returns the tid (location) of the new tuple in - * the t_self field. - */ - newId = heap_insert(resultRelationDesc, tuple, - estate->es_output_cid, 0, NULL); + if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0) + { + /* Perform a speculative insertion. */ + uint32 specToken; + ItemPointerData conflictTid; + bool specConflict; - /* - * insert index entries for tuple - */ - if (resultRelInfo->ri_NumIndices > 0) + /* + * Do a non-conclusive check for conflicts first. + * + * We're not holding any locks yet, so this doesn't guarantee that + * the later insert won't conflict. But it avoids leaving behind + * a lot of canceled speculative insertions, if you run a lot of + * INSERT ON CONFLICT statements that do conflict. + * + * We loop back here if we find a conflict below, either during + * the pre-check, or when we re-check after inserting the tuple + * speculatively. See the executor README for a full discussion + * of speculative insertion. + */ + vlock: + specConflict = false; + if (!ExecCheckIndexConstraints(slot, estate, &conflictTid, + arbiterIndexes)) + { + /* committed conflict tuple found */ + if (onconflict == ONCONFLICT_UPDATE) + { + /* + * In case of ON CONFLICT DO UPDATE, execute the UPDATE + * part. Be prepared to retry if the UPDATE fails because + * of another concurrent UPDATE/DELETE to the conflict + * tuple. + */ + TupleTableSlot *returning = NULL; + + if (ExecOnConflictUpdate(mtstate, resultRelInfo, + &conflictTid, planSlot, slot, + estate, canSetTag, &returning)) + { + InstrCountFiltered2(&mtstate->ps, 1); + return returning; + } + else + goto vlock; + } + else + { + /* + * In case of ON CONFLICT DO NOTHING, do nothing. + * However, verify that the tuple is visible to the + * executor's MVCC snapshot at higher isolation levels. + */ + Assert(onconflict == ONCONFLICT_NOTHING); + ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid); + InstrCountFiltered2(&mtstate->ps, 1); + return NULL; + } + } + + /* + * Before we start insertion proper, acquire our "speculative + * insertion lock". Others can use that to wait for us to decide + * if we're going to go ahead with the insertion, instead of + * waiting for the whole transaction to complete. + */ + specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId()); + HeapTupleHeaderSetSpeculativeToken(tuple->t_data, specToken); + + /* insert the tuple, with the speculative token */ + newId = heap_insert(resultRelationDesc, tuple, + estate->es_output_cid, + HEAP_INSERT_SPECULATIVE, + NULL); + + /* insert index entries for tuple */ recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate); + estate, true, &specConflict, + arbiterIndexes); + + /* adjust the tuple's state accordingly */ + if (!specConflict) + heap_finish_speculative(resultRelationDesc, tuple); + else + heap_abort_speculative(resultRelationDesc, tuple); + + /* + * Wake up anyone waiting for our decision. They will re-check + * the tuple, see that it's no longer speculative, and wait on our + * XID as if this was a regularly inserted tuple all along. Or if + * we killed the tuple, they will see it's dead, and proceed as if + * the tuple never existed. + */ + SpeculativeInsertionLockRelease(GetCurrentTransactionId()); + + /* + * If there was a conflict, start from the beginning. We'll do + * the pre-check again, which will now find the conflicting tuple + * (unless it aborts before we get there). + */ + if (specConflict) + { + list_free(recheckIndexes); + goto vlock; + } + + /* Since there was no insertion conflict, we're done */ + } + else + { + /* + * insert the tuple normally. + * + * Note: heap_insert returns the tid (location) of the new tuple + * in the t_self field. + */ + newId = heap_insert(resultRelationDesc, tuple, + estate->es_output_cid, + 0, NULL); + + /* insert index entries for tuple */ + if (resultRelInfo->ri_NumIndices > 0) + recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), + estate, false, NULL, + arbiterIndexes); + } } if (canSetTag) @@ -800,7 +977,7 @@ lreplace:; */ if (resultRelInfo->ri_NumIndices > 0 && !HeapTupleIsHeapOnly(tuple)) recheckIndexes = ExecInsertIndexTuples(slot, &(tuple->t_self), - estate); + estate, false, NULL, NIL); } if (canSetTag) @@ -832,6 +1009,190 @@ lreplace:; return NULL; } +/* + * ExecOnConflictUpdate --- execute UPDATE of INSERT ON CONFLICT DO UPDATE + * + * Try to lock tuple for update as part of speculative insertion. If + * a qual originating from ON CONFLICT DO UPDATE is satisfied, update + * (but still lock row, even though it may not satisfy estate's + * snapshot). + * + * Returns true if if we're done (with or without an update), or false if + * the caller must retry the INSERT from scratch. + */ +static bool +ExecOnConflictUpdate(ModifyTableState *mtstate, + ResultRelInfo *resultRelInfo, + ItemPointer conflictTid, + TupleTableSlot *planSlot, + TupleTableSlot *excludedSlot, + EState *estate, + bool canSetTag, + TupleTableSlot **returning) +{ + ExprContext *econtext = mtstate->ps.ps_ExprContext; + Relation relation = resultRelInfo->ri_RelationDesc; + List *onConflictSetWhere = resultRelInfo->ri_onConflictSetWhere; + HeapTupleData tuple; + HeapUpdateFailureData hufd; + LockTupleMode lockmode; + HTSU_Result test; + Buffer buffer; + + /* Determine lock mode to use */ + lockmode = ExecUpdateLockMode(estate, resultRelInfo); + + /* + * Lock tuple for update. Don't follow updates when tuple cannot be + * locked without doing so. A row locking conflict here means our + * previous conclusion that the tuple is conclusively committed is not + * true anymore. + */ + tuple.t_self = *conflictTid; + test = heap_lock_tuple(relation, &tuple, estate->es_output_cid, + lockmode, LockWaitBlock, false, &buffer, + &hufd); + switch (test) + { + case HeapTupleMayBeUpdated: + /* success! */ + break; + + case HeapTupleInvisible: + + /* + * This can occur when a just inserted tuple is updated again in + * the same command. E.g. because multiple rows with the same + * conflicting key values are inserted. + * + * This is somewhat similar to the ExecUpdate() + * HeapTupleSelfUpdated case. We do not want to proceed because + * it would lead to the same row being updated a second time in + * some unspecified order, and in contrast to plain UPDATEs + * there's no historical behavior to break. + * + * It is the user's responsibility to prevent this situation from + * occurring. These problems are why SQL-2003 similarly specifies + * that for SQL MERGE, an exception must be raised in the event of + * an attempt to update the same row twice. + */ + if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmin(tuple.t_data))) + ereport(ERROR, + (errcode(ERRCODE_CARDINALITY_VIOLATION), + errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"), + errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values."))); + + /* This shouldn't happen */ + elog(ERROR, "attempted to lock invisible tuple"); + + case HeapTupleSelfUpdated: + + /* + * This state should never be reached. As a dirty snapshot is used + * to find conflicting tuples, speculative insertion wouldn't have + * seen this row to conflict with. + */ + elog(ERROR, "unexpected self-updated tuple"); + + case HeapTupleUpdated: + if (IsolationUsesXactSnapshot()) + ereport(ERROR, + (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), + errmsg("could not serialize access due to concurrent update"))); + + /* + * Tell caller to try again from the very start. + * + * It does not make sense to use the usual EvalPlanQual() style + * loop here, as the new version of the row might not conflict + * anymore, or the conflicting tuple has actually been deleted. + */ + ReleaseBuffer(buffer); + return false; + + default: + elog(ERROR, "unrecognized heap_lock_tuple status: %u", test); + } + + /* + * Success, the tuple is locked. + * + * Reset per-tuple memory context to free any expression evaluation + * storage allocated in the previous cycle. + */ + ResetExprContext(econtext); + + /* + * Verify that the tuple is visible to our MVCC snapshot if the current + * isolation level mandates that. + * + * It's not sufficient to rely on the check within ExecUpdate() as e.g. + * CONFLICT ... WHERE clause may prevent us from reaching that. + * + * This means we only ever continue when a new command in the current + * transaction could see the row, even though in READ COMMITTED mode the + * tuple will not be visible according to the current statement's + * snapshot. This is in line with the way UPDATE deals with newer tuple + * versions. + */ + ExecCheckHeapTupleVisible(estate, &tuple, buffer); + + /* Store target's existing tuple in the state's dedicated slot */ + ExecStoreTuple(&tuple, mtstate->mt_existing, buffer, false); + + /* + * Make tuple and any needed join variables available to ExecQual and + * ExecProject. The EXCLUDED tuple is installed in ecxt_innertuple, while + * the target's existing tuple is installed in the scantuple. EXCLUDED has + * been made to reference INNER_VAR in setrefs.c, but there is no other + * redirection. + */ + econtext->ecxt_scantuple = mtstate->mt_existing; + econtext->ecxt_innertuple = excludedSlot; + econtext->ecxt_outertuple = NULL; + + if (!ExecQual(onConflictSetWhere, econtext, false)) + { + ReleaseBuffer(buffer); + InstrCountFiltered1(&mtstate->ps, 1); + return true; /* done with the tuple */ + } + + if (resultRelInfo->ri_WithCheckOptions != NIL) + { + /* + * Check target's existing tuple against UPDATE-applicable USING + * security barrier quals (if any), enforced here as RLS checks/WCOs. + * + * The rewriter creates UPDATE RLS checks/WCOs for UPDATE security + * quals, and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK, + * but that's almost the extent of its special handling for ON + * CONFLICT DO UPDATE. + * + * The rewriter will also have associated UPDATE applicable straight + * RLS checks/WCOs for the benefit of the ExecUpdate() call that + * follows. INSERTs and UPDATEs naturally have mutually exclusive WCO + * kinds, so there is no danger of spurious over-enforcement in the + * INSERT or UPDATE path. + */ + ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo, + mtstate->mt_existing, + mtstate->ps.state); + } + + /* Project the new tuple version */ + ExecProject(resultRelInfo->ri_onConflictSetProj, NULL); + + /* Execute UPDATE with projection */ + *returning = ExecUpdate(&tuple.t_data->t_ctid, NULL, + mtstate->mt_conflproj, planSlot, + &mtstate->mt_epqstate, mtstate->ps.state, + canSetTag); + + ReleaseBuffer(buffer); + return true; +} + /* * Process BEFORE EACH STATEMENT triggers @@ -843,6 +1204,9 @@ fireBSTriggers(ModifyTableState *node) { case CMD_INSERT: ExecBSInsertTriggers(node->ps.state, node->resultRelInfo); + if (node->mt_onconflict == ONCONFLICT_UPDATE) + ExecBSUpdateTriggers(node->ps.state, + node->resultRelInfo); break; case CMD_UPDATE: ExecBSUpdateTriggers(node->ps.state, node->resultRelInfo); @@ -865,6 +1229,9 @@ fireASTriggers(ModifyTableState *node) switch (node->operation) { case CMD_INSERT: + if (node->mt_onconflict == ONCONFLICT_UPDATE) + ExecASUpdateTriggers(node->ps.state, + node->resultRelInfo); ExecASInsertTriggers(node->ps.state, node->resultRelInfo); break; case CMD_UPDATE: @@ -1062,7 +1429,9 @@ ExecModifyTable(ModifyTableState *node) switch (operation) { case CMD_INSERT: - slot = ExecInsert(slot, planSlot, estate, node->canSetTag); + slot = ExecInsert(node, slot, planSlot, + node->mt_arbiterindexes, node->mt_onconflict, + estate, node->canSetTag); break; case CMD_UPDATE: slot = ExecUpdate(tupleid, oldtuple, slot, planSlot, @@ -1137,6 +1506,8 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex; mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans); mtstate->mt_nplans = nplans; + mtstate->mt_onconflict = node->onConflictAction; + mtstate->mt_arbiterindexes = node->arbiterIndexes; /* set up epqstate with dummy subplan data for the moment */ EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam); @@ -1175,7 +1546,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex && operation != CMD_DELETE && resultRelInfo->ri_IndexRelationDescs == NULL) - ExecOpenIndices(resultRelInfo); + ExecOpenIndices(resultRelInfo, mtstate->mt_onconflict != ONCONFLICT_NONE); /* Now init the plan for this result rel */ estate->es_result_relation_info = resultRelInfo; @@ -1280,6 +1651,58 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) } /* + * If needed, Initialize target list, projection and qual for ON CONFLICT + * DO UPDATE. + */ + resultRelInfo = mtstate->resultRelInfo; + if (node->onConflictAction == ONCONFLICT_UPDATE) + { + ExprContext *econtext; + ExprState *setexpr; + TupleDesc tupDesc; + + /* insert may only have one plan, inheritance is not expanded */ + Assert(nplans == 1); + + /* already exists if created by RETURNING processing above */ + if (mtstate->ps.ps_ExprContext == NULL) + ExecAssignExprContext(estate, &mtstate->ps); + + econtext = mtstate->ps.ps_ExprContext; + + /* initialize slot for the existing tuple */ + mtstate->mt_existing = ExecInitExtraTupleSlot(mtstate->ps.state); + ExecSetSlotDescriptor(mtstate->mt_existing, + resultRelInfo->ri_RelationDesc->rd_att); + + mtstate->mt_excludedtlist = node->exclRelTlist; + + /* create target slot for UPDATE SET projection */ + tupDesc = ExecTypeFromTL((List *) node->onConflictSet, + false); + mtstate->mt_conflproj = ExecInitExtraTupleSlot(mtstate->ps.state); + ExecSetSlotDescriptor(mtstate->mt_conflproj, tupDesc); + + /* build UPDATE SET expression and projection state */ + setexpr = ExecInitExpr((Expr *) node->onConflictSet, &mtstate->ps); + resultRelInfo->ri_onConflictSetProj = + ExecBuildProjectionInfo((List *) setexpr, econtext, + mtstate->mt_conflproj, + resultRelInfo->ri_RelationDesc->rd_att); + + /* build DO UPDATE WHERE clause expression */ + if (node->onConflictWhere) + { + ExprState *qualexpr; + + qualexpr = ExecInitExpr((Expr *) node->onConflictWhere, + mtstate->mt_plans[0]); + + resultRelInfo->ri_onConflictSetWhere = (List *) qualexpr; + } + } + + /* * If we have any secondary relations in an UPDATE or DELETE, they need to * be treated like non-locked relations in SELECT FOR UPDATE, ie, the * EvalPlanQual mechanism needs to be told about them. Locate the |