diff options
author | Amit Langote <amitlan@postgresql.org> | 2025-03-19 12:14:24 +0900 |
---|---|---|
committer | Amit Langote <amitlan@postgresql.org> | 2025-03-19 12:14:24 +0900 |
commit | 28317de723b60b61c40e7de4341a3029f698ddaf (patch) | |
tree | 8825f1161b48eaedf1efb6daaf32adfc72ab02c6 | |
parent | 06fb5612c970b3af95aca3db5a955669b07537ca (diff) | |
download | postgresql-28317de723b60b61c40e7de4341a3029f698ddaf.tar.gz postgresql-28317de723b60b61c40e7de4341a3029f698ddaf.zip |
Ensure first ModifyTable rel initialized if all are pruned
Commit cbc127917e introduced tracking of unpruned relids to avoid
processing pruned relations, and changed ExecInitModifyTable() to
initialize only unpruned result relations. As a result, MERGE
statements that prune all target partitions can now lead to crashes
or incorrect behavior during execution.
The crash occurs because some executor code paths rely on
ModifyTableState.resultRelInfo[0] being present and initialized,
even when no result relations remain after pruning. For example,
ExecMerge() and ExecMergeNotMatched() use the first resultRelInfo
to determine the appropriate action. Similarly,
ExecInitPartitionInfo() assumes that at least one result relation
exists.
To preserve these assumptions, ExecInitModifyTable() now includes the
first result relation in the initialized result relation list if all
result relations for that ModifyTable were pruned. To enable that,
ExecDoInitialPruning() ensures the first relation is locked if it was
pruned and locking is necessary.
To support this exception to the pruning logic, PlannedStmt now
includes a list of RT indexes identifying the first result relation
of each ModifyTable node in the plan. This allows
ExecDoInitialPruning() to check whether each such relation was
pruned and, if so, lock it if necessary.
Bug: #18830
Reported-by: Robins Tharakan <tharakan@gmail.com>
Diagnozed-by: Tender Wang <tndrwang@gmail.com>
Diagnozed-by: Dean Rasheed <dean.a.rasheed@gmail.com>
Co-authored-by: Dean Rasheed <dean.a.rasheed@gmail.com>
Reviewed-by: Tender Wang <tndrwang@gmail.com>
Reviewed-by: Dean Rasheed <dean.a.rasheed@gmail.com>
Discussion: https://postgr.es/m/18830-1f31ea1dc930d444%40postgresql.org
-rw-r--r-- | src/backend/commands/explain.c | 14 | ||||
-rw-r--r-- | src/backend/executor/execMain.c | 2 | ||||
-rw-r--r-- | src/backend/executor/execPartition.c | 31 | ||||
-rw-r--r-- | src/backend/executor/execUtils.c | 18 | ||||
-rw-r--r-- | src/backend/executor/nodeModifyTable.c | 32 | ||||
-rw-r--r-- | src/backend/optimizer/plan/planner.c | 1 | ||||
-rw-r--r-- | src/backend/optimizer/plan/setrefs.c | 3 | ||||
-rw-r--r-- | src/include/executor/execPartition.h | 4 | ||||
-rw-r--r-- | src/include/executor/executor.h | 3 | ||||
-rw-r--r-- | src/include/nodes/pathnodes.h | 3 | ||||
-rw-r--r-- | src/include/nodes/plannodes.h | 7 | ||||
-rw-r--r-- | src/test/regress/expected/partition_prune.out | 82 | ||||
-rw-r--r-- | src/test/regress/sql/partition_prune.sql | 32 |
13 files changed, 214 insertions, 18 deletions
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 22616cf7add..33a16d2d8e2 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -4575,10 +4575,20 @@ show_modifytable_info(ModifyTableState *mtstate, List *ancestors, break; } - /* Should we explicitly label target relations? */ + /* + * Should we explicitly label target relations? + * + * If there's only one target relation, do not list it if it's the + * relation named in the query, or if it has been pruned. (Normally + * mtstate->resultRelInfo doesn't include pruned relations, but a single + * pruned target relation may be present, if all other target relations + * have been pruned. See ExecInitModifyTable().) + */ labeltargets = (mtstate->mt_nrels > 1 || (mtstate->mt_nrels == 1 && - mtstate->resultRelInfo[0].ri_RangeTableIndex != node->nominalRelation)); + mtstate->resultRelInfo[0].ri_RangeTableIndex != node->nominalRelation && + bms_is_member(mtstate->resultRelInfo[0].ri_RangeTableIndex, + mtstate->ps.state->es_unpruned_relids))); if (labeltargets) ExplainOpenGroup("Target Tables", "Target Tables", false, es); diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index 0493b7d5365..e9bd98c7738 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -1006,7 +1006,7 @@ InitPlan(QueryDesc *queryDesc, int eflags) case ROW_MARK_SHARE: case ROW_MARK_KEYSHARE: case ROW_MARK_REFERENCE: - relation = ExecGetRangeTableRelation(estate, rc->rti); + relation = ExecGetRangeTableRelation(estate, rc->rti, false); break; case ROW_MARK_COPY: /* no physical table access is required */ diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c index 5cd5e2eeb80..84ccd7d457d 100644 --- a/src/backend/executor/execPartition.c +++ b/src/backend/executor/execPartition.c @@ -1819,6 +1819,7 @@ adjust_partition_colnos_using_map(List *colnos, AttrMap *attrMap) void ExecDoInitialPruning(EState *estate) { + PlannedStmt *stmt = estate->es_plannedstmt; ListCell *lc; List *locked_relids = NIL; @@ -1868,6 +1869,34 @@ ExecDoInitialPruning(EState *estate) } /* + * Lock the first result relation of each ModifyTable node, even if it was + * pruned. This is required for ExecInitModifyTable(), which keeps its + * first result relation if all other result relations have been pruned, + * because some executor paths (e.g., in nodeModifyTable.c and + * execPartition.c) rely on there being at least one result relation. + * + * There's room for improvement here --- we actually only need to do this + * if all other result relations of the ModifyTable node were pruned, but + * we don't have an easy way to tell that here. + */ + if (stmt->resultRelations && ExecShouldLockRelations(estate)) + { + foreach(lc, stmt->firstResultRels) + { + Index firstResultRel = lfirst_int(lc); + + if (!bms_is_member(firstResultRel, estate->es_unpruned_relids)) + { + RangeTblEntry *rte = exec_rt_fetch(firstResultRel, estate); + + Assert(rte->rtekind == RTE_RELATION && rte->rellockmode != NoLock); + LockRelationOid(rte->relid, rte->rellockmode); + locked_relids = lappend_int(locked_relids, firstResultRel); + } + } + } + + /* * Release the useless locks if the plan won't be executed. This is the * same as what CheckCachedPlan() in plancache.c does. */ @@ -2076,7 +2105,7 @@ CreatePartitionPruneState(EState *estate, PartitionPruneInfo *pruneinfo, * because that entry will be held open and locked for the * duration of this executor run. */ - partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex); + partrel = ExecGetRangeTableRelation(estate, pinfo->rtindex, false); /* Remember for InitExecPartitionPruneContext(). */ pprune->partrel = partrel; diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 39d6f4d819e..55ab18fb826 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -746,7 +746,7 @@ ExecOpenScanRelation(EState *estate, Index scanrelid, int eflags) Relation rel; /* Open the relation. */ - rel = ExecGetRangeTableRelation(estate, scanrelid); + rel = ExecGetRangeTableRelation(estate, scanrelid, false); /* * Complain if we're attempting a scan of an unscannable relation, except @@ -815,18 +815,22 @@ ExecInitRangeTable(EState *estate, List *rangeTable, List *permInfos, * * The Relations will be closed in ExecEndPlan(). * - * Note: The caller must ensure that 'rti' refers to an unpruned relation - * (i.e., it is a member of estate->es_unpruned_relids) before calling this - * function. Attempting to open a pruned relation will result in an error. + * If isResultRel is true, the relation is being used as a result relation. + * Such a relation might have been pruned, which is OK for result relations, + * but not for scan relations; see the details in ExecInitModifyTable(). If + * isResultRel is false, the caller must ensure that 'rti' refers to an + * unpruned relation (i.e., it is a member of estate->es_unpruned_relids) + * before calling this function. Attempting to open a pruned relation for + * scanning will result in an error. */ Relation -ExecGetRangeTableRelation(EState *estate, Index rti) +ExecGetRangeTableRelation(EState *estate, Index rti, bool isResultRel) { Relation rel; Assert(rti > 0 && rti <= estate->es_range_table_size); - if (!bms_is_member(rti, estate->es_unpruned_relids)) + if (!isResultRel && !bms_is_member(rti, estate->es_unpruned_relids)) elog(ERROR, "trying to open a pruned relation"); rel = estate->es_relations[rti - 1]; @@ -880,7 +884,7 @@ ExecInitResultRelation(EState *estate, ResultRelInfo *resultRelInfo, { Relation resultRelationDesc; - resultRelationDesc = ExecGetRangeTableRelation(estate, rti); + resultRelationDesc = ExecGetRangeTableRelation(estate, rti, true); InitResultRelInfo(resultRelInfo, resultRelationDesc, rti, diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index b0fe50075ad..87c820276a8 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -4471,6 +4471,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) ModifyTableState *mtstate; Plan *subplan = outerPlan(node); CmdType operation = node->operation; + int total_nrels = list_length(node->resultRelations); int nrels; List *resultRelations = NIL; List *withCheckOptionLists = NIL; @@ -4490,13 +4491,35 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) /* * Only consider unpruned relations for initializing their ResultRelInfo * struct and other fields such as withCheckOptions, etc. + * + * Note: We must avoid pruning every result relation. This is important + * for MERGE, since even if every result relation is pruned from the + * subplan, there might still be NOT MATCHED rows, for which there may be + * INSERT actions to perform. To allow these actions to be found, at + * least one result relation must be kept. Also, when inserting into a + * partitioned table, ExecInitPartitionInfo() needs a ResultRelInfo struct + * as a reference for building the ResultRelInfo of the target partition. + * In either case, it doesn't matter which result relation is kept, so we + * just keep the first one, if all others have been pruned. See also, + * ExecDoInitialPruning(), which ensures that this first result relation + * has been locked. */ i = 0; foreach(l, node->resultRelations) { Index rti = lfirst_int(l); + bool keep_rel; + + keep_rel = bms_is_member(rti, estate->es_unpruned_relids); + if (!keep_rel && i == total_nrels - 1 && resultRelations == NIL) + { + /* all result relations pruned; keep the first one */ + keep_rel = true; + rti = linitial_int(node->resultRelations); + i = 0; + } - if (bms_is_member(rti, estate->es_unpruned_relids)) + if (keep_rel) { resultRelations = lappend_int(resultRelations, rti); if (node->withCheckOptionLists) @@ -4537,6 +4560,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) i++; } nrels = list_length(resultRelations); + Assert(nrels > 0); /* * create state structure @@ -4735,7 +4759,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) */ mtstate->mt_resultOidAttno = ExecFindJunkAttributeInTlist(subplan->targetlist, "tableoid"); - Assert(AttributeNumberIsValid(mtstate->mt_resultOidAttno) || nrels == 1); + Assert(AttributeNumberIsValid(mtstate->mt_resultOidAttno) || total_nrels == 1); mtstate->mt_lastResultOid = InvalidOid; /* force lookup at first tuple */ mtstate->mt_lastResultIndex = 0; /* must be zero if no such attr */ @@ -4832,7 +4856,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) if (node->onConflictAction != ONCONFLICT_NONE) { /* insert may only have one relation, inheritance is not expanded */ - Assert(nrels == 1); + Assert(total_nrels == 1); resultRelInfo->ri_onConflictArbiterIndexes = node->arbiterIndexes; } @@ -4979,7 +5003,7 @@ ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags) if (operation == CMD_INSERT) { /* insert may only have one relation, inheritance is not expanded */ - Assert(nrels == 1); + Assert(total_nrels == 1); resultRelInfo = mtstate->resultRelInfo; if (!resultRelInfo->ri_usesFdwDirectModify && resultRelInfo->ri_FdwRoutine != NULL && diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index a4d523dcb0f..141177e7413 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -562,6 +562,7 @@ standard_planner(Query *parse, const char *query_string, int cursorOptions, glob->prunableRelids); result->permInfos = glob->finalrteperminfos; result->resultRelations = glob->resultRelations; + result->firstResultRels = glob->firstResultRels; result->appendRelations = glob->appendRelations; result->subplans = glob->subplans; result->rewindPlanIDs = glob->rewindPlanIDs; diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 999a5a8ab5a..150e9f060ee 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -1248,6 +1248,9 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) lappend_int(root->glob->resultRelations, splan->rootRelation); } + root->glob->firstResultRels = + lappend_int(root->glob->firstResultRels, + linitial_int(splan->resultRelations)); } break; case T_Append: diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h index 626613012f9..3b3f46aced0 100644 --- a/src/include/executor/execPartition.h +++ b/src/include/executor/execPartition.h @@ -43,8 +43,8 @@ extern void ExecCleanupTupleRouting(ModifyTableState *mtstate, * subpart_map contains indexes into PartitionPruningData.partrelprunedata[]. * * partrel Partitioned table Relation; obtained by - * ExecGetRangeTableRelation(estate, rti), where - * rti is PartitionedRelPruneInfo.rtindex. + * ExecGetRangeTableRelation(estate, rti, false), + * where rti is PartitionedRelPruneInfo.rtindex. * nparts Length of subplan_map[] and subpart_map[]. * subplan_map Subplan index by partition index, or -1. * subpart_map Subpart index by partition index, or -1. diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h index 0d2ffabda68..0db5d18ba22 100644 --- a/src/include/executor/executor.h +++ b/src/include/executor/executor.h @@ -680,7 +680,8 @@ exec_rt_fetch(Index rti, EState *estate) return (RangeTblEntry *) list_nth(estate->es_range_table, rti - 1); } -extern Relation ExecGetRangeTableRelation(EState *estate, Index rti); +extern Relation ExecGetRangeTableRelation(EState *estate, Index rti, + bool isResultRel); extern void ExecInitResultRelation(EState *estate, ResultRelInfo *resultRelInfo, Index rti); diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index fbf05322c75..c24a1fc8514 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -138,6 +138,9 @@ typedef struct PlannerGlobal /* "flat" list of integer RT indexes */ List *resultRelations; + /* "flat" list of integer RT indexes (one per ModifyTable node) */ + List *firstResultRels; + /* "flat" list of AppendRelInfos */ List *appendRelations; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 22841211f48..f78bffd90cf 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -102,6 +102,13 @@ typedef struct PlannedStmt /* integer list of RT indexes, or NIL */ List *resultRelations; + /* + * rtable indexes of first target relation in each ModifyTable node in the + * plan for INSERT/UPDATE/DELETE/MERGE + */ + /* integer list of RT indexes, or NIL */ + List *firstResultRels; + /* list of AppendRelInfo nodes */ List *appendRelations; diff --git a/src/test/regress/expected/partition_prune.out b/src/test/regress/expected/partition_prune.out index 8097f4e9282..0bf35260b46 100644 --- a/src/test/regress/expected/partition_prune.out +++ b/src/test/regress/expected/partition_prune.out @@ -4662,6 +4662,88 @@ table part_abc_view; 2 | c | t (1 row) +-- MERGE ... INSERT when all pruned from MERGE source. +begin; +explain (costs off) +merge into part_abc_view pt +using (select stable_one() + 1 as pid) as q join part_abc_2 pt2 on (q.pid = pt2.a) on pt.a = stable_one() + 2 +when not matched then insert values (1, 'd', false) returning pt.a; + QUERY PLAN +------------------------------------------------ + Merge on part_abc + -> Nested Loop Left Join + -> Seq Scan on part_abc_2 pt2 + Filter: ((stable_one() + 1) = a) + -> Materialize + -> Append + Subplans Removed: 2 +(7 rows) + +merge into part_abc_view pt +using (select stable_one() + 1 as pid) as q join part_abc_2 pt2 on (q.pid = pt2.a) on pt.a = stable_one() + 2 +when not matched then insert values (1, 'd', false) returning pt.a; + a +--- + 1 +(1 row) + +table part_abc_view; + a | b | c +---+---+--- + 1 | d | f + 2 | c | t +(2 rows) + +rollback; +-- A case with multiple ModifyTable nodes. +begin; +create table part_abc_log (action text, a int, b text, c bool); +explain (costs off) +with t as ( + merge into part_abc_view pt + using (select stable_one() + 1 as pid) as q join part_abc_2 pt2 on (q.pid = pt2.a) on pt.a = stable_one() + 2 + when not matched then insert values (1, 'd', false) returning merge_action(), pt.* +) +insert into part_abc_log select * from t returning *; + QUERY PLAN +-------------------------------------------------------- + Insert on part_abc_log + CTE t + -> Merge on part_abc + -> Nested Loop Left Join + -> Seq Scan on part_abc_2 pt2 + Filter: ((stable_one() + 1) = a) + -> Materialize + -> Append + Subplans Removed: 2 + -> CTE Scan on t +(10 rows) + +with t as ( + merge into part_abc_view pt + using (select stable_one() + 1 as pid) as q join part_abc_2 pt2 on (q.pid = pt2.a) on pt.a = stable_one() + 2 + when not matched then insert values (1, 'd', false) returning merge_action(), pt.* +) +insert into part_abc_log select * from t returning *; + action | a | b | c +--------+---+---+--- + INSERT | 1 | d | f +(1 row) + +table part_abc_view; + a | b | c +---+---+--- + 1 | d | f + 2 | c | t +(2 rows) + +table part_abc_log; + action | a | b | c +--------+---+---+--- + INSERT | 1 | d | f +(1 row) + +rollback; -- A case with nested MergeAppend with its own PartitionPruneInfo. create index on part_abc (a); alter table part_abc add d int; diff --git a/src/test/regress/sql/partition_prune.sql b/src/test/regress/sql/partition_prune.sql index 4a2c74b0899..f6db9479f54 100644 --- a/src/test/regress/sql/partition_prune.sql +++ b/src/test/regress/sql/partition_prune.sql @@ -1401,6 +1401,38 @@ using (select stable_one() + 2 as pid) as q join part_abc_1 pt1 on (q.pid = pt1. when matched then delete returning pt.a; table part_abc_view; +-- MERGE ... INSERT when all pruned from MERGE source. +begin; +explain (costs off) +merge into part_abc_view pt +using (select stable_one() + 1 as pid) as q join part_abc_2 pt2 on (q.pid = pt2.a) on pt.a = stable_one() + 2 +when not matched then insert values (1, 'd', false) returning pt.a; +merge into part_abc_view pt +using (select stable_one() + 1 as pid) as q join part_abc_2 pt2 on (q.pid = pt2.a) on pt.a = stable_one() + 2 +when not matched then insert values (1, 'd', false) returning pt.a; +table part_abc_view; +rollback; + +-- A case with multiple ModifyTable nodes. +begin; +create table part_abc_log (action text, a int, b text, c bool); +explain (costs off) +with t as ( + merge into part_abc_view pt + using (select stable_one() + 1 as pid) as q join part_abc_2 pt2 on (q.pid = pt2.a) on pt.a = stable_one() + 2 + when not matched then insert values (1, 'd', false) returning merge_action(), pt.* +) +insert into part_abc_log select * from t returning *; +with t as ( + merge into part_abc_view pt + using (select stable_one() + 1 as pid) as q join part_abc_2 pt2 on (q.pid = pt2.a) on pt.a = stable_one() + 2 + when not matched then insert values (1, 'd', false) returning merge_action(), pt.* +) +insert into part_abc_log select * from t returning *; +table part_abc_view; +table part_abc_log; +rollback; + -- A case with nested MergeAppend with its own PartitionPruneInfo. create index on part_abc (a); alter table part_abc add d int; |