aboutsummaryrefslogtreecommitdiff
path: root/src/backend/commands/trigger.c
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2022-03-20 18:43:40 +0100
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2022-03-20 18:43:40 +0100
commitba9a7e392171c83eb3332a757279e7088487f9a2 (patch)
treebb30fa47d030733e048c3ff77d21b6803bce941c /src/backend/commands/trigger.c
parent3f513ac7935db86f72511aac24fa6b52ed29bfe7 (diff)
downloadpostgresql-ba9a7e392171c83eb3332a757279e7088487f9a2.tar.gz
postgresql-ba9a7e392171c83eb3332a757279e7088487f9a2.zip
Enforce foreign key correctly during cross-partition updates
When an update on a partitioned table referenced in foreign key constraints causes a row to move from one partition to another, the fact that the move is implemented as a delete followed by an insert on the target partition causes the foreign key triggers to have surprising behavior. For example, a given foreign key's delete trigger which implements the ON DELETE CASCADE clause of that key will delete any referencing rows when triggered for that internal DELETE, although it should not, because the referenced row is simply being moved from one partition of the referenced root partitioned table into another, not being deleted from it. This commit teaches trigger.c to skip queuing such delete trigger events on the leaf partitions in favor of an UPDATE event fired on the root target relation. Doing so is sensible because both the old and the new tuple "logically" belong to the root relation. The after trigger event queuing interface now allows passing the source and the target partitions of a particular cross-partition update when registering the update event for the root partitioned table. Along with the two ctids of the old and the new tuple, the after trigger event now also stores the OIDs of those partitions. The tuples fetched from the source and the target partitions are converted into the root table format, if necessary, before they are passed to the trigger function. The implementation currently has a limitation that only the foreign keys pointing into the query's target relation are considered, not those of its sub-partitioned partitions. That seems like a reasonable limitation, because it sounds rare to have distinct foreign keys pointing to sub-partitioned partitions instead of to the root table. This misbehavior stems from commit f56f8f8da6af (which added support for foreign keys to reference partitioned tables) not paying sufficient attention to commit 2f178441044b (which had introduced cross-partition updates a year earlier). Even though the former commit goes back to Postgres 12, we're not backpatching this fix at this time for fear of destabilizing things too much, and because there are a few ABI breaks in it that we'd have to work around in older branches. It also depends on commit f4566345cf40, which had its own share of backpatchability issues as well. Author: Amit Langote <amitlangote09@gmail.com> Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com> Reviewed-by: Álvaro Herrera <alvherre@alvh.no-ip.org> Reported-by: Eduard Català <eduard.catala@gmail.com> Discussion: https://postgr.es/m/CA+HiwqFvkBCmfwkQX_yBqv2Wz8ugUGiBDxum8=WvVbfU1TXaNg@mail.gmail.com Discussion: https://postgr.es/m/CAL54xNZsLwEM1XCk5yW9EqaRzsZYHuWsHQkA2L5MOSKXAwviCQ@mail.gmail.com
Diffstat (limited to 'src/backend/commands/trigger.c')
-rw-r--r--src/backend/commands/trigger.c394
1 files changed, 338 insertions, 56 deletions
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index e08bd9a370f..fce79b02a57 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -95,10 +95,13 @@ static HeapTuple ExecCallTriggerFunc(TriggerData *trigdata,
Instrumentation *instr,
MemoryContext per_tuple_context);
static void AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
+ ResultRelInfo *src_partinfo,
+ ResultRelInfo *dst_partinfo,
int event, bool row_trigger,
TupleTableSlot *oldtup, TupleTableSlot *newtup,
List *recheckIndexes, Bitmapset *modifiedCols,
- TransitionCaptureState *transition_capture);
+ TransitionCaptureState *transition_capture,
+ bool is_crosspart_update);
static void AfterTriggerEnlargeQueryState(void);
static bool before_stmt_triggers_fired(Oid relid, CmdType cmdType);
@@ -2458,8 +2461,10 @@ ExecASInsertTriggers(EState *estate, ResultRelInfo *relinfo,
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
if (trigdesc && trigdesc->trig_insert_after_statement)
- AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_INSERT,
- false, NULL, NULL, NIL, NULL, transition_capture);
+ AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
+ TRIGGER_EVENT_INSERT,
+ false, NULL, NULL, NIL, NULL, transition_capture,
+ false);
}
bool
@@ -2547,10 +2552,12 @@ ExecARInsertTriggers(EState *estate, ResultRelInfo *relinfo,
if ((trigdesc && trigdesc->trig_insert_after_row) ||
(transition_capture && transition_capture->tcs_insert_new_table))
- AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_INSERT,
+ AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
+ TRIGGER_EVENT_INSERT,
true, NULL, slot,
recheckIndexes, NULL,
- transition_capture);
+ transition_capture,
+ false);
}
bool
@@ -2672,8 +2679,10 @@ ExecASDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
if (trigdesc && trigdesc->trig_delete_after_statement)
- AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_DELETE,
- false, NULL, NULL, NIL, NULL, transition_capture);
+ AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
+ TRIGGER_EVENT_DELETE,
+ false, NULL, NULL, NIL, NULL, transition_capture,
+ false);
}
/*
@@ -2768,11 +2777,17 @@ ExecBRDeleteTriggers(EState *estate, EPQState *epqstate,
return result;
}
+/*
+ * Note: is_crosspart_update must be true if the DELETE is being performed
+ * as part of a cross-partition update.
+ */
void
-ExecARDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
+ExecARDeleteTriggers(EState *estate,
+ ResultRelInfo *relinfo,
ItemPointer tupleid,
HeapTuple fdw_trigtuple,
- TransitionCaptureState *transition_capture)
+ TransitionCaptureState *transition_capture,
+ bool is_crosspart_update)
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
@@ -2793,9 +2808,11 @@ ExecARDeleteTriggers(EState *estate, ResultRelInfo *relinfo,
else
ExecForceStoreHeapTuple(fdw_trigtuple, slot, false);
- AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_DELETE,
+ AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
+ TRIGGER_EVENT_DELETE,
true, slot, NULL, NIL, NULL,
- transition_capture);
+ transition_capture,
+ is_crosspart_update);
}
}
@@ -2914,10 +2931,12 @@ ExecASUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
Assert(relinfo->ri_RootResultRelInfo == NULL);
if (trigdesc && trigdesc->trig_update_after_statement)
- AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_UPDATE,
+ AfterTriggerSaveEvent(estate, relinfo, NULL, NULL,
+ TRIGGER_EVENT_UPDATE,
false, NULL, NULL, NIL,
ExecGetAllUpdatedCols(relinfo, estate),
- transition_capture);
+ transition_capture,
+ false);
}
bool
@@ -3052,13 +3071,26 @@ ExecBRUpdateTriggers(EState *estate, EPQState *epqstate,
return true;
}
+/*
+ * Note: 'src_partinfo' and 'dst_partinfo', when non-NULL, refer to the source
+ * and destination partitions, respectively, of a cross-partition update of
+ * the root partitioned table mentioned in the query, given by 'relinfo'.
+ * 'tupleid' in that case refers to the ctid of the "old" tuple in the source
+ * partition, and 'newslot' contains the "new" tuple in the destination
+ * partition. This interface allows to support the requirements of
+ * ExecCrossPartitionUpdateForeignKey(); is_crosspart_update must be true in
+ * that case.
+ */
void
ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
+ ResultRelInfo *src_partinfo,
+ ResultRelInfo *dst_partinfo,
ItemPointer tupleid,
HeapTuple fdw_trigtuple,
TupleTableSlot *newslot,
List *recheckIndexes,
- TransitionCaptureState *transition_capture)
+ TransitionCaptureState *transition_capture,
+ bool is_crosspart_update)
{
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
@@ -3073,12 +3105,19 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
* separately for DELETE and INSERT to capture transition table rows.
* In such case, either old tuple or new tuple can be NULL.
*/
- TupleTableSlot *oldslot = ExecGetTriggerOldSlot(estate, relinfo);
+ TupleTableSlot *oldslot;
+ ResultRelInfo *tupsrc;
+
+ Assert((src_partinfo != NULL && dst_partinfo != NULL) ||
+ !is_crosspart_update);
+
+ tupsrc = src_partinfo ? src_partinfo : relinfo;
+ oldslot = ExecGetTriggerOldSlot(estate, tupsrc);
if (fdw_trigtuple == NULL && ItemPointerIsValid(tupleid))
GetTupleForTrigger(estate,
NULL,
- relinfo,
+ tupsrc,
tupleid,
LockTupleExclusive,
oldslot,
@@ -3088,10 +3127,14 @@ ExecARUpdateTriggers(EState *estate, ResultRelInfo *relinfo,
else
ExecClearTuple(oldslot);
- AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_UPDATE,
- true, oldslot, newslot, recheckIndexes,
+ AfterTriggerSaveEvent(estate, relinfo,
+ src_partinfo, dst_partinfo,
+ TRIGGER_EVENT_UPDATE,
+ true,
+ oldslot, newslot, recheckIndexes,
ExecGetAllUpdatedCols(relinfo, estate),
- transition_capture);
+ transition_capture,
+ is_crosspart_update);
}
}
@@ -3214,8 +3257,11 @@ ExecASTruncateTriggers(EState *estate, ResultRelInfo *relinfo)
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
if (trigdesc && trigdesc->trig_truncate_after_statement)
- AfterTriggerSaveEvent(estate, relinfo, TRIGGER_EVENT_TRUNCATE,
- false, NULL, NULL, NIL, NULL, NULL);
+ AfterTriggerSaveEvent(estate, relinfo,
+ NULL, NULL,
+ TRIGGER_EVENT_TRUNCATE,
+ false, NULL, NULL, NIL, NULL, NULL,
+ false);
}
@@ -3496,9 +3542,9 @@ typedef SetConstraintStateData *SetConstraintState;
* Per-trigger-event data
*
* The actual per-event data, AfterTriggerEventData, includes DONE/IN_PROGRESS
- * status bits and up to two tuple CTIDs. Each event record also has an
- * associated AfterTriggerSharedData that is shared across all instances of
- * similar events within a "chunk".
+ * status bits, up to two tuple CTIDs, and optionally two OIDs of partitions.
+ * Each event record also has an associated AfterTriggerSharedData that is
+ * shared across all instances of similar events within a "chunk".
*
* For row-level triggers, we arrange not to waste storage on unneeded ctid
* fields. Updates of regular tables use two; inserts and deletes of regular
@@ -3509,6 +3555,11 @@ typedef SetConstraintStateData *SetConstraintState;
* tuple(s). This permits storing tuples once regardless of the number of
* row-level triggers on a foreign table.
*
+ * When updates on partitioned tables cause rows to move between partitions,
+ * the OIDs of both partitions are stored too, so that the tuples can be
+ * fetched; such entries are marked AFTER_TRIGGER_CP_UPDATE (for "cross-
+ * partition update").
+ *
* Note that we need triggers on foreign tables to be fired in exactly the
* order they were queued, so that the tuples come out of the tuplestore in
* the right order. To ensure that, we forbid deferrable (constraint)
@@ -3531,16 +3582,16 @@ typedef SetConstraintStateData *SetConstraintState;
*/
typedef uint32 TriggerFlags;
-#define AFTER_TRIGGER_OFFSET 0x0FFFFFFF /* must be low-order bits */
-#define AFTER_TRIGGER_DONE 0x10000000
-#define AFTER_TRIGGER_IN_PROGRESS 0x20000000
+#define AFTER_TRIGGER_OFFSET 0x07FFFFFF /* must be low-order bits */
+#define AFTER_TRIGGER_DONE 0x80000000
+#define AFTER_TRIGGER_IN_PROGRESS 0x40000000
/* bits describing the size and tuple sources of this event */
#define AFTER_TRIGGER_FDW_REUSE 0x00000000
-#define AFTER_TRIGGER_FDW_FETCH 0x80000000
-#define AFTER_TRIGGER_1CTID 0x40000000
-#define AFTER_TRIGGER_2CTID 0xC0000000
-#define AFTER_TRIGGER_TUP_BITS 0xC0000000
-
+#define AFTER_TRIGGER_FDW_FETCH 0x20000000
+#define AFTER_TRIGGER_1CTID 0x10000000
+#define AFTER_TRIGGER_2CTID 0x30000000
+#define AFTER_TRIGGER_CP_UPDATE 0x08000000
+#define AFTER_TRIGGER_TUP_BITS 0x38000000
typedef struct AfterTriggerSharedData *AfterTriggerShared;
typedef struct AfterTriggerSharedData
@@ -3560,27 +3611,45 @@ typedef struct AfterTriggerEventData
TriggerFlags ate_flags; /* status bits and offset to shared data */
ItemPointerData ate_ctid1; /* inserted, deleted, or old updated tuple */
ItemPointerData ate_ctid2; /* new updated tuple */
+
+ /*
+ * During a cross-partition update of a partitioned table, we also store
+ * the OIDs of source and destination partitions that are needed to fetch
+ * the old (ctid1) and the new tuple (ctid2) from, respectively.
+ */
+ Oid ate_src_part;
+ Oid ate_dst_part;
} AfterTriggerEventData;
-/* AfterTriggerEventData, minus ate_ctid2 */
+/* AfterTriggerEventData, minus ate_src_part, ate_dst_part */
+typedef struct AfterTriggerEventDataNoOids
+{
+ TriggerFlags ate_flags;
+ ItemPointerData ate_ctid1;
+ ItemPointerData ate_ctid2;
+} AfterTriggerEventDataNoOids;
+
+/* AfterTriggerEventData, minus ate_*_part and ate_ctid2 */
typedef struct AfterTriggerEventDataOneCtid
{
TriggerFlags ate_flags; /* status bits and offset to shared data */
ItemPointerData ate_ctid1; /* inserted, deleted, or old updated tuple */
} AfterTriggerEventDataOneCtid;
-/* AfterTriggerEventData, minus ate_ctid1 and ate_ctid2 */
+/* AfterTriggerEventData, minus ate_*_part, ate_ctid1 and ate_ctid2 */
typedef struct AfterTriggerEventDataZeroCtids
{
TriggerFlags ate_flags; /* status bits and offset to shared data */
} AfterTriggerEventDataZeroCtids;
#define SizeofTriggerEvent(evt) \
- (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ? \
+ (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_CP_UPDATE ? \
sizeof(AfterTriggerEventData) : \
- ((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_1CTID ? \
- sizeof(AfterTriggerEventDataOneCtid) : \
- sizeof(AfterTriggerEventDataZeroCtids))
+ (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ? \
+ sizeof(AfterTriggerEventDataNoOids) : \
+ (((evt)->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_1CTID ? \
+ sizeof(AfterTriggerEventDataOneCtid) : \
+ sizeof(AfterTriggerEventDataZeroCtids))))
#define GetTriggerSharedData(evt) \
((AfterTriggerShared) ((char *) (evt) + ((evt)->ate_flags & AFTER_TRIGGER_OFFSET)))
@@ -3762,6 +3831,8 @@ static AfterTriggersData afterTriggers;
static void AfterTriggerExecute(EState *estate,
AfterTriggerEvent event,
ResultRelInfo *relInfo,
+ ResultRelInfo *src_relInfo,
+ ResultRelInfo *dst_relInfo,
TriggerDesc *trigdesc,
FmgrInfo *finfo,
Instrumentation *instr,
@@ -4096,8 +4167,16 @@ afterTriggerDeleteHeadEventChunk(AfterTriggersQueryData *qs)
* fmgr lookup cache space at the caller level. (For triggers fired at
* the end of a query, we can even piggyback on the executor's state.)
*
+ * When fired for a cross-partition update of a partitioned table, the old
+ * tuple is fetched using 'src_relInfo' (the source leaf partition) and
+ * the new tuple using 'dst_relInfo' (the destination leaf partition), though
+ * both are converted into the root partitioned table's format before passing
+ * to the trigger function.
+ *
* event: event currently being fired.
- * rel: open relation for event.
+ * relInfo: result relation for event.
+ * src_relInfo: source partition of a cross-partition update
+ * dst_relInfo: its destination partition
* trigdesc: working copy of rel's trigger info.
* finfo: array of fmgr lookup cache entries (one per trigger in trigdesc).
* instr: array of EXPLAIN ANALYZE instrumentation nodes (one per trigger),
@@ -4111,6 +4190,8 @@ static void
AfterTriggerExecute(EState *estate,
AfterTriggerEvent event,
ResultRelInfo *relInfo,
+ ResultRelInfo *src_relInfo,
+ ResultRelInfo *dst_relInfo,
TriggerDesc *trigdesc,
FmgrInfo *finfo, Instrumentation *instr,
MemoryContext per_tuple_context,
@@ -4118,6 +4199,8 @@ AfterTriggerExecute(EState *estate,
TupleTableSlot *trig_tuple_slot2)
{
Relation rel = relInfo->ri_RelationDesc;
+ Relation src_rel = src_relInfo->ri_RelationDesc;
+ Relation dst_rel = dst_relInfo->ri_RelationDesc;
AfterTriggerShared evtshared = GetTriggerSharedData(event);
Oid tgoid = evtshared->ats_tgoid;
TriggerData LocTriggerData = {0};
@@ -4198,12 +4281,35 @@ AfterTriggerExecute(EState *estate,
default:
if (ItemPointerIsValid(&(event->ate_ctid1)))
{
- LocTriggerData.tg_trigslot = ExecGetTriggerOldSlot(estate, relInfo);
+ TupleTableSlot *src_slot = ExecGetTriggerOldSlot(estate,
+ src_relInfo);
- if (!table_tuple_fetch_row_version(rel, &(event->ate_ctid1),
+ if (!table_tuple_fetch_row_version(src_rel,
+ &(event->ate_ctid1),
SnapshotAny,
- LocTriggerData.tg_trigslot))
+ src_slot))
elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
+
+ /*
+ * Store the tuple fetched from the source partition into the
+ * target (root partitioned) table slot, converting if needed.
+ */
+ if (src_relInfo != relInfo)
+ {
+ TupleConversionMap *map = ExecGetChildToRootMap(src_relInfo);
+
+ LocTriggerData.tg_trigslot = ExecGetTriggerOldSlot(estate, relInfo);
+ if (map)
+ {
+ execute_attr_map_slot(map->attrMap,
+ src_slot,
+ LocTriggerData.tg_trigslot);
+ }
+ else
+ ExecCopySlot(LocTriggerData.tg_trigslot, src_slot);
+ }
+ else
+ LocTriggerData.tg_trigslot = src_slot;
LocTriggerData.tg_trigtuple =
ExecFetchSlotHeapTuple(LocTriggerData.tg_trigslot, false, &should_free_trig);
}
@@ -4213,16 +4319,40 @@ AfterTriggerExecute(EState *estate,
}
/* don't touch ctid2 if not there */
- if ((event->ate_flags & AFTER_TRIGGER_TUP_BITS) ==
- AFTER_TRIGGER_2CTID &&
+ if (((event->ate_flags & AFTER_TRIGGER_TUP_BITS) == AFTER_TRIGGER_2CTID ||
+ (event->ate_flags & AFTER_TRIGGER_CP_UPDATE)) &&
ItemPointerIsValid(&(event->ate_ctid2)))
{
- LocTriggerData.tg_newslot = ExecGetTriggerNewSlot(estate, relInfo);
+ TupleTableSlot *dst_slot = ExecGetTriggerNewSlot(estate,
+ dst_relInfo);
- if (!table_tuple_fetch_row_version(rel, &(event->ate_ctid2),
+ if (!table_tuple_fetch_row_version(dst_rel,
+ &(event->ate_ctid2),
SnapshotAny,
- LocTriggerData.tg_newslot))
+ dst_slot))
elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
+
+ /*
+ * Store the tuple fetched from the destination partition into
+ * the target (root partitioned) table slot, converting if
+ * needed.
+ */
+ if (dst_relInfo != relInfo)
+ {
+ TupleConversionMap *map = ExecGetChildToRootMap(dst_relInfo);
+
+ LocTriggerData.tg_newslot = ExecGetTriggerNewSlot(estate, relInfo);
+ if (map)
+ {
+ execute_attr_map_slot(map->attrMap,
+ dst_slot,
+ LocTriggerData.tg_newslot);
+ }
+ else
+ ExecCopySlot(LocTriggerData.tg_newslot, dst_slot);
+ }
+ else
+ LocTriggerData.tg_newslot = dst_slot;
LocTriggerData.tg_newtuple =
ExecFetchSlotHeapTuple(LocTriggerData.tg_newslot, false, &should_free_new);
}
@@ -4451,13 +4581,17 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events,
if ((event->ate_flags & AFTER_TRIGGER_IN_PROGRESS) &&
evtshared->ats_firing_id == firing_id)
{
+ ResultRelInfo *src_rInfo,
+ *dst_rInfo;
+
/*
* So let's fire it... but first, find the correct relation if
* this is not the same relation as before.
*/
if (rel == NULL || RelationGetRelid(rel) != evtshared->ats_relid)
{
- rInfo = ExecGetTriggerResultRel(estate, evtshared->ats_relid);
+ rInfo = ExecGetTriggerResultRel(estate, evtshared->ats_relid,
+ NULL);
rel = rInfo->ri_RelationDesc;
/* Catch calls with insufficient relcache refcounting */
Assert(!RelationHasReferenceCountZero(rel));
@@ -4483,11 +4617,32 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events,
}
/*
+ * Look up source and destination partition result rels of a
+ * cross-partition update event.
+ */
+ if ((event->ate_flags & AFTER_TRIGGER_TUP_BITS) ==
+ AFTER_TRIGGER_CP_UPDATE)
+ {
+ Assert(OidIsValid(event->ate_src_part) &&
+ OidIsValid(event->ate_dst_part));
+ src_rInfo = ExecGetTriggerResultRel(estate,
+ event->ate_src_part,
+ rInfo);
+ dst_rInfo = ExecGetTriggerResultRel(estate,
+ event->ate_dst_part,
+ rInfo);
+ }
+ else
+ src_rInfo = dst_rInfo = rInfo;
+
+ /*
* Fire it. Note that the AFTER_TRIGGER_IN_PROGRESS flag is
* still set, so recursive examinations of the event list
* won't try to re-fire it.
*/
- AfterTriggerExecute(estate, event, rInfo, trigdesc, finfo, instr,
+ AfterTriggerExecute(estate, event, rInfo,
+ src_rInfo, dst_rInfo,
+ trigdesc, finfo, instr,
per_tuple_context, slot1, slot2);
/*
@@ -5767,14 +5922,35 @@ AfterTriggerPendingOnRel(Oid relid)
* Transition tuplestores are built now, rather than when events are pulled
* off of the queue because AFTER ROW triggers are allowed to select from the
* transition tables for the statement.
+ *
+ * This contains special support to queue the update events for the case where
+ * a partitioned table undergoing a cross-partition update may have foreign
+ * keys pointing into it. Normally, a partitioned table's row triggers are
+ * not fired because the leaf partition(s) which are modified as a result of
+ * the operation on the partitioned table contain the same triggers which are
+ * fired instead. But that general scheme can cause problematic behavior with
+ * foreign key triggers during cross-partition updates, which are implemented
+ * as DELETE on the source partition followed by INSERT into the destination
+ * partition. Specifically, firing DELETE triggers would lead to the wrong
+ * foreign key action to be enforced considering that the original command is
+ * UPDATE; in this case, this function is called with relinfo as the
+ * partitioned table, and src_partinfo and dst_partinfo referring to the
+ * source and target leaf partitions, respectively.
+ *
+ * is_crosspart_update is true either when a DELETE event is fired on the
+ * source partition (which is to be ignored) or an UPDATE event is fired on
+ * the root partitioned table.
* ----------
*/
static void
AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
+ ResultRelInfo *src_partinfo,
+ ResultRelInfo *dst_partinfo,
int event, bool row_trigger,
TupleTableSlot *oldslot, TupleTableSlot *newslot,
List *recheckIndexes, Bitmapset *modifiedCols,
- TransitionCaptureState *transition_capture)
+ TransitionCaptureState *transition_capture,
+ bool is_crosspart_update)
{
Relation rel = relinfo->ri_RelationDesc;
TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
@@ -5855,6 +6031,19 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
}
/*
+ * We normally don't see partitioned tables here for row level triggers
+ * except in the special case of a cross-partition update. In that case,
+ * nodeModifyTable.c:ExecCrossPartitionUpdateForeignKey() calls here to
+ * queue an update event on the root target partitioned table, also
+ * passing the source and destination partitions and their tuples.
+ */
+ Assert(!row_trigger ||
+ rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE ||
+ (is_crosspart_update &&
+ TRIGGER_FIRED_BY_UPDATE(event) &&
+ src_partinfo != NULL && dst_partinfo != NULL));
+
+ /*
* Validate the event code and collect the associated tuple CTIDs.
*
* The event code will be used both as a bitmask and an array offset, so
@@ -5914,6 +6103,19 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
Assert(newslot != NULL);
ItemPointerCopy(&(oldslot->tts_tid), &(new_event.ate_ctid1));
ItemPointerCopy(&(newslot->tts_tid), &(new_event.ate_ctid2));
+
+ /*
+ * Also remember the OIDs of partitions to fetch these tuples
+ * out of later in AfterTriggerExecute().
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ Assert(src_partinfo != NULL && dst_partinfo != NULL);
+ new_event.ate_src_part =
+ RelationGetRelid(src_partinfo->ri_RelationDesc);
+ new_event.ate_dst_part =
+ RelationGetRelid(dst_partinfo->ri_RelationDesc);
+ }
}
else
{
@@ -5938,13 +6140,53 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
break;
}
+ /* Determine flags */
if (!(relkind == RELKIND_FOREIGN_TABLE && row_trigger))
- new_event.ate_flags = (row_trigger && event == TRIGGER_EVENT_UPDATE) ?
- AFTER_TRIGGER_2CTID : AFTER_TRIGGER_1CTID;
+ {
+ if (row_trigger && event == TRIGGER_EVENT_UPDATE)
+ {
+ if (relkind == RELKIND_PARTITIONED_TABLE)
+ new_event.ate_flags = AFTER_TRIGGER_CP_UPDATE;
+ else
+ new_event.ate_flags = AFTER_TRIGGER_2CTID;
+ }
+ else
+ new_event.ate_flags = AFTER_TRIGGER_1CTID;
+ }
+
/* else, we'll initialize ate_flags for each trigger */
tgtype_level = (row_trigger ? TRIGGER_TYPE_ROW : TRIGGER_TYPE_STATEMENT);
+ /*
+ * Must convert/copy the source and destination partition tuples into the
+ * root partitioned table's format/slot, because the processing in the
+ * loop below expects both oldslot and newslot tuples to be in that form.
+ */
+ if (row_trigger && rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ {
+ TupleTableSlot *rootslot;
+ TupleConversionMap *map;
+
+ rootslot = ExecGetTriggerOldSlot(estate, relinfo);
+ map = ExecGetChildToRootMap(src_partinfo);
+ if (map)
+ oldslot = execute_attr_map_slot(map->attrMap,
+ oldslot,
+ rootslot);
+ else
+ oldslot = ExecCopySlot(rootslot, oldslot);
+
+ rootslot = ExecGetTriggerNewSlot(estate, relinfo);
+ map = ExecGetChildToRootMap(dst_partinfo);
+ if (map)
+ newslot = execute_attr_map_slot(map->attrMap,
+ newslot,
+ rootslot);
+ else
+ newslot = ExecCopySlot(rootslot, newslot);
+ }
+
for (i = 0; i < trigdesc->numtriggers; i++)
{
Trigger *trigger = &trigdesc->triggers[i];
@@ -5973,13 +6215,30 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
/*
* If the trigger is a foreign key enforcement trigger, there are
* certain cases where we can skip queueing the event because we can
- * tell by inspection that the FK constraint will still pass.
+ * tell by inspection that the FK constraint will still pass. There
+ * are also some cases during cross-partition updates of a partitioned
+ * table where queuing the event can be skipped.
*/
if (TRIGGER_FIRED_BY_UPDATE(event) || TRIGGER_FIRED_BY_DELETE(event))
{
switch (RI_FKey_trigger_type(trigger->tgfoid))
{
case RI_TRIGGER_PK:
+
+ /*
+ * For cross-partitioned updates of partitioned PK table,
+ * skip the event fired by the component delete on the
+ * source leaf partition unless the constraint originates
+ * in the partition itself (!tgisclone), because the
+ * update event that will be fired on the root
+ * (partitioned) target table will be used to perform the
+ * necessary foreign key enforcement action.
+ */
+ if (is_crosspart_update &&
+ TRIGGER_FIRED_BY_DELETE(event) &&
+ trigger->tgisclone)
+ continue;
+
/* Update or delete on trigger's PK table */
if (!RI_FKey_pk_upd_check_required(trigger, rel,
oldslot, newslot))
@@ -5990,8 +6249,20 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
break;
case RI_TRIGGER_FK:
- /* Update on trigger's FK table */
- if (!RI_FKey_fk_upd_check_required(trigger, rel,
+
+ /*
+ * Update on trigger's FK table. We can skip the update
+ * event fired on a partitioned table during a
+ * cross-partition of that table, because the insert event
+ * that is fired on the destination leaf partition would
+ * suffice to perform the necessary foreign key check.
+ * Moreover, RI_FKey_fk_upd_check_required() expects to be
+ * passed a tuple that contains system attributes, most of
+ * which are not present in the virtual slot belonging to
+ * a partitioned table.
+ */
+ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ||
+ !RI_FKey_fk_upd_check_required(trigger, rel,
oldslot, newslot))
{
/* skip queuing this event */
@@ -6000,7 +6271,18 @@ AfterTriggerSaveEvent(EState *estate, ResultRelInfo *relinfo,
break;
case RI_TRIGGER_NONE:
- /* Not an FK trigger */
+
+ /*
+ * Not an FK trigger. No need to queue the update event
+ * fired during a cross-partitioned update of a
+ * partitioned table, because the same row trigger must be
+ * present in the leaf partition(s) that are affected as
+ * part of this update and the events fired on them are
+ * queued instead.
+ */
+ if (row_trigger &&
+ rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
+ continue;
break;
}
}