aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor
diff options
context:
space:
mode:
authorAndres Freund <andres@anarazel.de>2017-12-05 10:55:56 -0800
committerAndres Freund <andres@anarazel.de>2017-12-05 10:55:56 -0800
commit5bcf389ecfd40daf92238e1abbff4fc4d3f18b33 (patch)
tree085fae86ad5cb6cd155e4a9e2612d56fcae19f97 /src/backend/executor
parent82c5c533d1f55dfbf3eacc61aff64fedc8915d78 (diff)
downloadpostgresql-5bcf389ecfd40daf92238e1abbff4fc4d3f18b33.tar.gz
postgresql-5bcf389ecfd40daf92238e1abbff4fc4d3f18b33.zip
Fix EXPLAIN ANALYZE of hash join when the leader doesn't participate.
If a hash join appears in a parallel query, there may be no hash table available for explain.c to inspect even though a hash table may have been built in other processes. This could happen either because parallel_leader_participation was set to off or because the leader happened to hit the end of the outer relation immediately (even though the complete relation is not empty) and decided not to build the hash table. Commit bf11e7ee introduced a way for workers to exchange instrumentation via the DSM segment for Sort nodes even though they are not parallel-aware. This commit does the same for Hash nodes, so that explain.c has a way to find instrumentation data from an arbitrary participant that actually built the hash table. Author: Thomas Munro Reviewed-By: Andres Freund Discussion: https://postgr.es/m/CAEepm%3D3DUQC2-z252N55eOcZBer6DPdM%3DFzrxH9dZc5vYLsjaA%40mail.gmail.com
Diffstat (limited to 'src/backend/executor')
-rw-r--r--src/backend/executor/execParallel.c43
-rw-r--r--src/backend/executor/execProcnode.c3
-rw-r--r--src/backend/executor/nodeHash.c104
3 files changed, 140 insertions, 10 deletions
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index 53c5254be13..0aca00b0e68 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -29,6 +29,7 @@
#include "executor/nodeBitmapHeapscan.h"
#include "executor/nodeCustom.h"
#include "executor/nodeForeignscan.h"
+#include "executor/nodeHash.h"
#include "executor/nodeIndexscan.h"
#include "executor/nodeIndexonlyscan.h"
#include "executor/nodeSeqscan.h"
@@ -259,8 +260,12 @@ ExecParallelEstimate(PlanState *planstate, ExecParallelEstimateContext *e)
ExecBitmapHeapEstimate((BitmapHeapScanState *) planstate,
e->pcxt);
break;
+ case T_HashState:
+ /* even when not parallel-aware, for EXPLAIN ANALYZE */
+ ExecHashEstimate((HashState *) planstate, e->pcxt);
+ break;
case T_SortState:
- /* even when not parallel-aware */
+ /* even when not parallel-aware, for EXPLAIN ANALYZE */
ExecSortEstimate((SortState *) planstate, e->pcxt);
break;
@@ -458,8 +463,12 @@ ExecParallelInitializeDSM(PlanState *planstate,
ExecBitmapHeapInitializeDSM((BitmapHeapScanState *) planstate,
d->pcxt);
break;
+ case T_HashState:
+ /* even when not parallel-aware, for EXPLAIN ANALYZE */
+ ExecHashInitializeDSM((HashState *) planstate, d->pcxt);
+ break;
case T_SortState:
- /* even when not parallel-aware */
+ /* even when not parallel-aware, for EXPLAIN ANALYZE */
ExecSortInitializeDSM((SortState *) planstate, d->pcxt);
break;
@@ -872,8 +881,12 @@ ExecParallelReInitializeDSM(PlanState *planstate,
ExecBitmapHeapReInitializeDSM((BitmapHeapScanState *) planstate,
pcxt);
break;
+ case T_HashState:
+ /* even when not parallel-aware, for EXPLAIN ANALYZE */
+ ExecHashReInitializeDSM((HashState *) planstate, pcxt);
+ break;
case T_SortState:
- /* even when not parallel-aware */
+ /* even when not parallel-aware, for EXPLAIN ANALYZE */
ExecSortReInitializeDSM((SortState *) planstate, pcxt);
break;
@@ -928,12 +941,18 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate,
planstate->worker_instrument->num_workers = instrumentation->num_workers;
memcpy(&planstate->worker_instrument->instrument, instrument, ibytes);
- /*
- * Perform any node-type-specific work that needs to be done. Currently,
- * only Sort nodes need to do anything here.
- */
- if (IsA(planstate, SortState))
- ExecSortRetrieveInstrumentation((SortState *) planstate);
+ /* Perform any node-type-specific work that needs to be done. */
+ switch (nodeTag(planstate))
+ {
+ case T_SortState:
+ ExecSortRetrieveInstrumentation((SortState *) planstate);
+ break;
+ case T_HashState:
+ ExecHashRetrieveInstrumentation((HashState *) planstate);
+ break;
+ default:
+ break;
+ }
return planstate_tree_walker(planstate, ExecParallelRetrieveInstrumentation,
instrumentation);
@@ -1160,8 +1179,12 @@ ExecParallelInitializeWorker(PlanState *planstate, ParallelWorkerContext *pwcxt)
ExecBitmapHeapInitializeWorker((BitmapHeapScanState *) planstate,
pwcxt);
break;
+ case T_HashState:
+ /* even when not parallel-aware, for EXPLAIN ANALYZE */
+ ExecHashInitializeWorker((HashState *) planstate, pwcxt);
+ break;
case T_SortState:
- /* even when not parallel-aware */
+ /* even when not parallel-aware, for EXPLAIN ANALYZE */
ExecSortInitializeWorker((SortState *) planstate, pwcxt);
break;
diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c
index c1aa5064c90..9befca90161 100644
--- a/src/backend/executor/execProcnode.c
+++ b/src/backend/executor/execProcnode.c
@@ -751,6 +751,9 @@ ExecShutdownNode(PlanState *node)
case T_GatherMergeState:
ExecShutdownGatherMerge((GatherMergeState *) node);
break;
+ case T_HashState:
+ ExecShutdownHash((HashState *) node);
+ break;
default:
break;
}
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index f7cd8fb3472..6fe5d69d558 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -1638,6 +1638,110 @@ ExecHashRemoveNextSkewBucket(HashJoinTable hashtable)
}
/*
+ * Reserve space in the DSM segment for instrumentation data.
+ */
+void
+ExecHashEstimate(HashState *node, ParallelContext *pcxt)
+{
+ size_t size;
+
+ size = mul_size(pcxt->nworkers, sizeof(HashInstrumentation));
+ size = add_size(size, offsetof(SharedHashInfo, hinstrument));
+ shm_toc_estimate_chunk(&pcxt->estimator, size);
+ shm_toc_estimate_keys(&pcxt->estimator, 1);
+}
+
+/*
+ * Set up a space in the DSM for all workers to record instrumentation data
+ * about their hash table.
+ */
+void
+ExecHashInitializeDSM(HashState *node, ParallelContext *pcxt)
+{
+ size_t size;
+
+ size = offsetof(SharedHashInfo, hinstrument) +
+ pcxt->nworkers * sizeof(HashInstrumentation);
+ node->shared_info = (SharedHashInfo *) shm_toc_allocate(pcxt->toc, size);
+ memset(node->shared_info, 0, size);
+ node->shared_info->num_workers = pcxt->nworkers;
+ shm_toc_insert(pcxt->toc, node->ps.plan->plan_node_id,
+ node->shared_info);
+}
+
+/*
+ * Reset shared state before beginning a fresh scan.
+ */
+void
+ExecHashReInitializeDSM(HashState *node, ParallelContext *pcxt)
+{
+ if (node->shared_info != NULL)
+ {
+ memset(node->shared_info->hinstrument, 0,
+ node->shared_info->num_workers * sizeof(HashInstrumentation));
+ }
+}
+
+/*
+ * Locate the DSM space for hash table instrumentation data that we'll write
+ * to at shutdown time.
+ */
+void
+ExecHashInitializeWorker(HashState *node, ParallelWorkerContext *pwcxt)
+{
+ SharedHashInfo *shared_info;
+
+ shared_info = (SharedHashInfo *)
+ shm_toc_lookup(pwcxt->toc, node->ps.plan->plan_node_id, true);
+ node->hinstrument = &shared_info->hinstrument[ParallelWorkerNumber];
+}
+
+/*
+ * Copy instrumentation data from this worker's hash table (if it built one)
+ * to DSM memory so the leader can retrieve it. This must be done in an
+ * ExecShutdownHash() rather than ExecEndHash() because the latter runs after
+ * we've detached from the DSM segment.
+ */
+void
+ExecShutdownHash(HashState *node)
+{
+ if (node->hinstrument && node->hashtable)
+ ExecHashGetInstrumentation(node->hinstrument, node->hashtable);
+}
+
+/*
+ * Retrieve instrumentation data from workers before the DSM segment is
+ * detached, so that EXPLAIN can access it.
+ */
+void
+ExecHashRetrieveInstrumentation(HashState *node)
+{
+ SharedHashInfo *shared_info = node->shared_info;
+ size_t size;
+
+ /* Replace node->shared_info with a copy in backend-local memory. */
+ size = offsetof(SharedHashInfo, hinstrument) +
+ shared_info->num_workers * sizeof(HashInstrumentation);
+ node->shared_info = palloc(size);
+ memcpy(node->shared_info, shared_info, size);
+}
+
+/*
+ * Copy the instrumentation data from 'hashtable' into a HashInstrumentation
+ * struct.
+ */
+void
+ExecHashGetInstrumentation(HashInstrumentation *instrument,
+ HashJoinTable hashtable)
+{
+ instrument->nbuckets = hashtable->nbuckets;
+ instrument->nbuckets_original = hashtable->nbuckets_original;
+ instrument->nbatch = hashtable->nbatch;
+ instrument->nbatch_original = hashtable->nbatch_original;
+ instrument->space_peak = hashtable->spacePeak;
+}
+
+/*
* Allocate 'size' bytes from the currently active HashMemoryChunk
*/
static void *