aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor/nodeHashjoin.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2003-05-30 20:23:10 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2003-05-30 20:23:10 +0000
commitd24d75ff194e292fe49c6c84d0124cc61b182d3c (patch)
treec302d2f87a1ddd74ceddef180bee54d7715424f3 /src/backend/executor/nodeHashjoin.c
parent776d5309240bdadc5f905cacba5f237e532c6139 (diff)
downloadpostgresql-d24d75ff194e292fe49c6c84d0124cc61b182d3c.tar.gz
postgresql-d24d75ff194e292fe49c6c84d0124cc61b182d3c.zip
Small performance improvement for hash joins and hash aggregation:
when the plan is ReScanned, we don't have to rebuild the hash table if there is no parameter change for its child node. This idea has been used for a long time in Sort and Material nodes, but was not in the hash code till now.
Diffstat (limited to 'src/backend/executor/nodeHashjoin.c')
-rw-r--r--src/backend/executor/nodeHashjoin.c83
1 files changed, 45 insertions, 38 deletions
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 000063a8b7f..17585b2f0fc 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.50 2003/05/05 17:57:47 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/executor/nodeHashjoin.c,v 1.51 2003/05/30 20:23:10 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -56,9 +56,7 @@ ExecHashJoin(HashJoinState *node)
HashJoinTable hashtable;
HeapTuple curtuple;
TupleTableSlot *outerTupleSlot;
- TupleTableSlot *innerTupleSlot;
int i;
- bool hashPhaseDone;
/*
* get information from HashJoin node
@@ -69,7 +67,6 @@ ExecHashJoin(HashJoinState *node)
otherqual = node->js.ps.qual;
hashNode = (HashState *) innerPlanState(node);
outerNode = outerPlanState(node);
- hashPhaseDone = node->hj_hashdone;
dir = estate->es_direction;
/*
@@ -114,24 +111,20 @@ ExecHashJoin(HashJoinState *node)
/*
* if this is the first call, build the hash table for inner relation
*/
- if (!hashPhaseDone)
- { /* if the hash phase not completed */
- if (hashtable == NULL)
- { /* if the hash table has not been created */
-
- /*
- * create the hash table
- */
- hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan);
- node->hj_HashTable = hashtable;
+ if (!node->hj_hashdone)
+ {
+ /*
+ * create the hash table
+ */
+ Assert(hashtable == NULL);
+ hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan);
+ node->hj_HashTable = hashtable;
- /*
- * execute the Hash node, to build the hash table
- */
- hashNode->hashtable = hashtable;
- innerTupleSlot = ExecProcNode((PlanState *) hashNode);
- }
- node->hj_hashdone = true;
+ /*
+ * execute the Hash node, to build the hash table
+ */
+ hashNode->hashtable = hashtable;
+ (void) ExecProcNode((PlanState *) hashNode);
/*
* Open temp files for outer batches, if needed. Note that file
@@ -139,9 +132,9 @@ ExecHashJoin(HashJoinState *node)
*/
for (i = 0; i < hashtable->nbatch; i++)
hashtable->outerBatchFile[i] = BufFileCreateTemp(false);
+
+ node->hj_hashdone = true;
}
- else if (hashtable == NULL)
- return NULL;
/*
* Now get an outer tuple and probe into the hash table for matches
@@ -159,11 +152,7 @@ ExecHashJoin(HashJoinState *node)
node);
if (TupIsNull(outerTupleSlot))
{
- /*
- * when the last batch runs out, clean up and exit
- */
- ExecHashTableDestroy(hashtable);
- node->hj_HashTable = NULL;
+ /* end of join */
return NULL;
}
@@ -410,8 +399,8 @@ ExecInitHashJoin(HashJoin *node, EState *estate)
*/
hjstate->hj_hashdone = false;
-
hjstate->hj_HashTable = (HashJoinTable) NULL;
+
hjstate->hj_CurBucketNo = 0;
hjstate->hj_CurTuple = (HashJoinTuple) NULL;
@@ -461,7 +450,7 @@ void
ExecEndHashJoin(HashJoinState *node)
{
/*
- * free hash table in case we end plan before all tuples are retrieved
+ * Free hash table
*/
if (node->hj_HashTable)
{
@@ -682,21 +671,41 @@ ExecHashJoinSaveTuple(HeapTuple heapTuple,
void
ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
{
+ /*
+ * If we haven't yet built the hash table then we can just return;
+ * nothing done yet, so nothing to undo.
+ */
if (!node->hj_hashdone)
return;
-
- node->hj_hashdone = false;
+ Assert(node->hj_HashTable != NULL);
/*
- * Unfortunately, currently we have to destroy hashtable in all
- * cases...
+ * In a multi-batch join, we currently have to do rescans the hard way,
+ * primarily because batch temp files may have already been released.
+ * But if it's a single-batch join, and there is no parameter change
+ * for the inner subnode, then we can just re-use the existing hash
+ * table without rebuilding it.
*/
- if (node->hj_HashTable)
+ if (node->hj_HashTable->nbatch == 0 &&
+ ((PlanState *) node)->righttree->chgParam == NULL)
+ {
+ /* okay to reuse the hash table; needn't rescan inner, either */
+ }
+ else
{
+ /* must destroy and rebuild hash table */
+ node->hj_hashdone = false;
ExecHashTableDestroy(node->hj_HashTable);
node->hj_HashTable = NULL;
+ /*
+ * if chgParam of subnode is not null then plan will be re-scanned
+ * by first ExecProcNode.
+ */
+ if (((PlanState *) node)->righttree->chgParam == NULL)
+ ExecReScan(((PlanState *) node)->righttree, exprCtxt);
}
+ /* Always reset intra-tuple state */
node->hj_CurBucketNo = 0;
node->hj_CurTuple = (HashJoinTuple) NULL;
@@ -706,11 +715,9 @@ ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
node->hj_MatchedOuter = false;
/*
- * if chgParam of subnodes is not null then plans will be re-scanned
+ * if chgParam of subnode is not null then plan will be re-scanned
* by first ExecProcNode.
*/
if (((PlanState *) node)->lefttree->chgParam == NULL)
ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
- if (((PlanState *) node)->righttree->chgParam == NULL)
- ExecReScan(((PlanState *) node)->righttree, exprCtxt);
}