diff options
author | Thomas Munro <tmunro@postgresql.org> | 2021-03-18 01:00:56 +1300 |
---|---|---|
committer | Thomas Munro <tmunro@postgresql.org> | 2021-03-18 01:00:56 +1300 |
commit | 69739ec317aef5df0ce9258142b3124b6c58b202 (patch) | |
tree | 96393b9d2685f81ca8585951a65c88ec38a623df /src/backend/executor/nodeHashjoin.c | |
parent | 4e0f0995e923948631c4114ab353b256b51b58ad (diff) | |
download | postgresql-69739ec317aef5df0ce9258142b3124b6c58b202.tar.gz postgresql-69739ec317aef5df0ce9258142b3124b6c58b202.zip |
Revert "Fix race in Parallel Hash Join batch cleanup."
This reverts commit 4e0f0995e923948631c4114ab353b256b51b58ad.
Discussion: https://postgr.es/m/CA%2BhUKGJmcqAE3MZeDCLLXa62cWM0AJbKmp2JrJYaJ86bz36LFA%40mail.gmail.com
Diffstat (limited to 'src/backend/executor/nodeHashjoin.c')
-rw-r--r-- | src/backend/executor/nodeHashjoin.c | 40 |
1 files changed, 16 insertions, 24 deletions
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 0ca6656a24c..5532b91a71d 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -45,8 +45,7 @@ * PHJ_BUILD_ALLOCATING -- one sets up the batches and table 0 * PHJ_BUILD_HASHING_INNER -- all hash the inner rel * PHJ_BUILD_HASHING_OUTER -- (multi-batch only) all hash the outer - * PHJ_BUILD_RUNNING -- building done, probing can begin - * PHJ_BUILD_DONE -- all work complete, one frees batches + * PHJ_BUILD_DONE -- building done, probing can begin * * While in the phase PHJ_BUILD_HASHING_INNER a separate pair of barriers may * be used repeatedly as required to coordinate expansions in the number of @@ -74,7 +73,7 @@ * batches whenever it encounters them while scanning and probing, which it * can do because it processes batches in serial order. * - * Once PHJ_BUILD_RUNNING is reached, backends then split up and process + * Once PHJ_BUILD_DONE is reached, backends then split up and process * different batches, or gang up and work together on probing batches if there * aren't enough to go around. For each batch there is a separate barrier * with the following phases: @@ -96,16 +95,11 @@ * * To avoid deadlocks, we never wait for any barrier unless it is known that * all other backends attached to it are actively executing the node or have - * finished. Practically, that means that we never emit a tuple while attached - * to a barrier, unless the barrier has reached a phase that means that no - * process will wait on it again. We emit tuples while attached to the build - * barrier in phase PHJ_BUILD_RUNNING, and to a per-batch barrier in phase - * PHJ_BATCH_PROBING. These are advanced to PHJ_BUILD_DONE and PHJ_BATCH_DONE - * respectively without waiting, using BarrierArriveAndDetach(). The last to - * detach receives a different return value so that it knows that it's safe to - * clean up. Any straggler process that attaches after that phase is reached - * will see that it's too late to participate or access the relevant shared - * memory objects. + * already arrived. Practically, that means that we never return a tuple + * while attached to a barrier, unless the barrier has reached its final + * state. In the slightly special case of the per-batch barrier, we return + * tuples while in PHJ_BATCH_PROBING phase, but that's OK because we use + * BarrierArriveAndDetach() to advance it to PHJ_BATCH_DONE without waiting. * *------------------------------------------------------------------------- */ @@ -323,7 +317,6 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel) build_barrier = ¶llel_state->build_barrier; Assert(BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER || - BarrierPhase(build_barrier) == PHJ_BUILD_RUNNING || BarrierPhase(build_barrier) == PHJ_BUILD_DONE); if (BarrierPhase(build_barrier) == PHJ_BUILD_HASHING_OUTER) { @@ -336,18 +329,9 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel) BarrierArriveAndWait(build_barrier, WAIT_EVENT_HASH_BUILD_HASH_OUTER); } - else if (BarrierPhase(build_barrier) == PHJ_BUILD_DONE) - { - /* - * If we attached so late that the job is finished and - * the batch state has been freed, we can return - * immediately. - */ - return NULL; - } + Assert(BarrierPhase(build_barrier) == PHJ_BUILD_DONE); /* Each backend should now select a batch to work on. */ - Assert(BarrierPhase(build_barrier) == PHJ_BUILD_RUNNING); hashtable->curbatch = -1; node->hj_JoinState = HJ_NEED_NEW_BATCH; @@ -1107,6 +1091,14 @@ ExecParallelHashJoinNewBatch(HashJoinState *hjstate) int batchno; /* + * If we started up so late that the batch tracking array has been freed + * already by ExecHashTableDetach(), then we are finished. See also + * ExecParallelHashEnsureBatchAccessors(). + */ + if (hashtable->batches == NULL) + return false; + + /* * If we were already attached to a batch, remember not to bother checking * it again, and detach from it (possibly freeing the hash table if we are * last to detach). |