aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor/execPartition.c
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2021-04-22 15:13:25 -0400
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2021-04-22 15:13:25 -0400
commit8aba9322511f718f12b618470d8c07f0ee5f0700 (patch)
tree89d4a6f405b4088e7c8767410e3e7c9c97c598c5 /src/backend/executor/execPartition.c
parent82b13dbc4d4b46f71ca95ce1cc15c425deff5957 (diff)
downloadpostgresql-8aba9322511f718f12b618470d8c07f0ee5f0700.tar.gz
postgresql-8aba9322511f718f12b618470d8c07f0ee5f0700.zip
Fix relcache inconsistency hazard in partition detach
During queries coming from ri_triggers.c, we need to omit partitions that are marked pending detach -- otherwise, the RI query is tricked into allowing a row into the referencing table whose corresponding row is in the detached partition. Which is bogus: once the detach operation completes, the row becomes an orphan. However, the code was not doing that in repeatable-read transactions, because relcache kept a copy of the partition descriptor that included the partition, and used it in the RI query. This commit changes the partdesc cache code to only keep descriptors that aren't dependent on a snapshot (namely: those where no detached partition exist, and those where detached partitions are included). When a partdesc-without- detached-partitions is requested, we create one afresh each time; also, those partdescs are stored in PortalContext instead of CacheMemoryContext. find_inheritance_children gets a new output *detached_exist boolean, which indicates whether any partition marked pending-detach is found. Its "include_detached" input flag is changed to "omit_detached", because that name captures desired the semantics more naturally. CreatePartitionDirectory() and RelationGetPartitionDesc() arguments are identically renamed. This was noticed because a buildfarm member that runs with relcache clobbering, which would not keep the improperly cached partdesc, broke one test, which led us to realize that the expected output of that test was bogus. This commit also corrects that expected output. Author: Amit Langote <amitlangote09@gmail.com> Author: Álvaro Herrera <alvherre@alvh.no-ip.org> Discussion: https://postgr.es/m/3269784.1617215412@sss.pgh.pa.us
Diffstat (limited to 'src/backend/executor/execPartition.c')
-rw-r--r--src/backend/executor/execPartition.c19
1 files changed, 8 insertions, 11 deletions
diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index 99780ebb961..8afddca73a0 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -991,19 +991,16 @@ ExecInitPartitionDispatchInfo(EState *estate,
/*
* For data modification, it is better that executor does not include
- * partitions being detached, except in snapshot-isolation mode. This
- * means that a read-committed transaction immediately gets a "no
- * partition for tuple" error when a tuple is inserted into a partition
- * that's being detached concurrently, but a transaction in repeatable-
- * read mode can still use the partition. Note that because partition
- * detach uses ShareLock on the partition (which conflicts with DML),
- * we're certain that the detach won't be able to complete until any
- * inserting transaction is done.
+ * partitions being detached, except when running in snapshot-isolation
+ * mode. This means that a read-committed transaction immediately gets a
+ * "no partition for tuple" error when a tuple is inserted into a
+ * partition that's being detached concurrently, but a transaction in
+ * repeatable-read mode can still use such a partition.
*/
if (estate->es_partition_directory == NULL)
estate->es_partition_directory =
CreatePartitionDirectory(estate->es_query_cxt,
- IsolationUsesXactSnapshot());
+ !IsolationUsesXactSnapshot());
oldcxt = MemoryContextSwitchTo(proute->memcxt);
@@ -1571,10 +1568,10 @@ ExecCreatePartitionPruneState(PlanState *planstate,
ListCell *lc;
int i;
- /* Executor must always include detached partitions */
+ /* For data reading, executor always omits detached partitions */
if (estate->es_partition_directory == NULL)
estate->es_partition_directory =
- CreatePartitionDirectory(estate->es_query_cxt, true);
+ CreatePartitionDirectory(estate->es_query_cxt, false);
n_part_hierarchies = list_length(partitionpruneinfo->prune_infos);
Assert(n_part_hierarchies > 0);