Revert "Fix race in Parallel Hash Join batch cleanup."

This reverts commit 4e0f0995e923948631c4114ab353b256b51b58ad. Discussion: https://postgr.es/m/CA%2BhUKGJmcqAE3MZeDCLLXa62cWM0AJbKmp2JrJYaJ86bz36LFA%40mail.gmail.com
author: Thomas Munro <tmunro@postgresql.org> 2021-03-18 01:00:56 +1300
committer: Thomas Munro <tmunro@postgresql.org> 2021-03-18 01:00:56 +1300
commit: 69739ec317aef5df0ce9258142b3124b6c58b202 (patch)
tree: 96393b9d2685f81ca8585951a65c88ec38a623df /src/backend/executor/nodeHashjoin.c
parent: 4e0f0995e923948631c4114ab353b256b51b58ad (diff)
download: postgresql-69739ec317aef5df0ce9258142b3124b6c58b202.tar.gz
postgresql-69739ec317aef5df0ce9258142b3124b6c58b202.zip
1 files changed, 16 insertions, 24 deletions
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 0ca6656a24c..5532b91a71d 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -45,8 +45,7 @@
  *   PHJ_BUILD_ALLOCATING            -- one sets up the batches and table 0
  *   PHJ_BUILD_HASHING_INNER         -- all hash the inner rel
  *   PHJ_BUILD_HASHING_OUTER         -- (multi-batch only) all hash the outer
- *   PHJ_BUILD_RUNNING               -- building done, probing can begin
- *   PHJ_BUILD_DONE                  -- all work complete, one frees batches
+ *   PHJ_BUILD_DONE                  -- building done, probing can begin
  *
  * While in the phase PHJ_BUILD_HASHING_INNER a separate pair of barriers may
  * be used repeatedly as required to coordinate expansions in the number of
@@ -74,7 +73,7 @@
  * batches whenever it encounters them while scanning and probing, which it
  * can do because it processes batches in serial order.
  *
- * Once PHJ_BUILD_RUNNING is reached, backends then split up and process
+ * Once PHJ_BUILD_DONE is reached, backends then split up and process
  * different batches, or gang up and work together on probing batches if there
  * aren't enough to go around.  For each batch there is a separate barrier
  * with the following phases:
@@ -96,16 +95,11 @@
  *
  * To avoid deadlocks, we never wait for any barrier unless it is known that
  * all other backends attached to it are actively executing the node or have
- * finished.  Practically, that means that we never emit a tuple while attached
- * to a barrier, unless the barrier has reached a phase that means that no
- * process will wait on it again.  We emit tuples while attached to the build
- * barrier in phase PHJ_BUILD_RUNNING, and to a per-batch barrier in phase
- * PHJ_BATCH_PROBING.  These are advanced to PHJ_BUILD_DONE and PHJ_BATCH_DONE
- * respectively without waiting, using BarrierArriveAndDetach().  The last to
- * detach receives a different return value so that it knows that it's safe to
- * clean up.  Any straggler process that attaches after that phase is reached
- * will see that it's too late to participate or access the relevant shared
- * memory objects.
+ * already arrived.  Practically, that means that we never return a tuple
+ * while attached to a barrier, unless the barrier has reached its final
+ * state.  In the slightly special case of the per-batch barrier, we return
+ * tuples while in PHJ_BATCH_PROBING phase, but that's OK because we use
+ * BarrierArriveAndDetach() to advance it to PHJ_BATCH_DONE without waiting.
  *
  *-------------------------------------------------------------------------
  */
@@ -323,7 +317,6 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 
 					build_barrier = &parallel_state->build_barrier;
 					Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER ||
-						   BarrierPhase(build_barrier) == PHJ_BUILD_RUNNING ||
 						   BarrierPhase(build_barrier) == PHJ_BUILD_DONE);
 					if (BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER)
 					{
@@ -336,18 +329,9 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel)
 						BarrierArriveAndWait(build_barrier,
 											 WAIT_EVENT_HASH_BUILD_HASH_OUTER);
 					}
-					else if (BarrierPhase(build_barrier) == PHJ_BUILD_DONE)
-					{
-						/*
-						 * If we attached so late that the job is finished and
-						 * the batch state has been freed, we can return
-						 * immediately.
-						 */
-						return NULL;
-					}
+					Assert(BarrierPhase(build_barrier) == PHJ_BUILD_DONE);
 
 					/* Each backend should now select a batch to work on. */
-					Assert(BarrierPhase(build_barrier) == PHJ_BUILD_RUNNING);
 					hashtable->curbatch = -1;
 					node->hj_JoinState = HJ_NEED_NEW_BATCH;
 
@@ -1107,6 +1091,14 @@ ExecParallelHashJoinNewBatch(HashJoinState *hjstate)
 	int			batchno;
 
 	/*
+	 * If we started up so late that the batch tracking array has been freed
+	 * already by ExecHashTableDetach(), then we are finished.  See also
+	 * ExecParallelHashEnsureBatchAccessors().
+	 */
+	if (hashtable->batches == NULL)
+		return false;
+
+	/*
 	 * If we were already attached to a batch, remember not to bother checking
 	 * it again, and detach from it (possibly freeing the hash table if we are
 	 * last to detach).
author	Thomas Munro <tmunro@postgresql.org>	2021-03-18 01:00:56 +1300
committer	Thomas Munro <tmunro@postgresql.org>	2021-03-18 01:00:56 +1300
commit	69739ec317aef5df0ce9258142b3124b6c58b202 (patch)
tree	96393b9d2685f81ca8585951a65c88ec38a623df /src/backend/executor/nodeHashjoin.c
parent	4e0f0995e923948631c4114ab353b256b51b58ad (diff)
download	postgresql-69739ec317aef5df0ce9258142b3124b6c58b202.tar.gz postgresql-69739ec317aef5df0ce9258142b3124b6c58b202.zip