aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor/nodeHashjoin.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/executor/nodeHashjoin.c')
-rw-r--r--src/backend/executor/nodeHashjoin.c200
1 files changed, 83 insertions, 117 deletions
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 2a44a18adc1..4b0f9377ba8 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.72 2005/06/15 07:27:44 neilc Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.73 2005/09/25 19:37:34 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -31,7 +31,7 @@ static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
uint32 *hashvalue,
TupleTableSlot *tupleSlot);
static int ExecHashJoinNewBatch(HashJoinState *hjstate);
-static TupleTableSlot *ExecHashJoinReadOuterPlan(HashJoinState *hjstate);
+
/* ----------------------------------------------------------------
* ExecHashJoin
@@ -57,6 +57,8 @@ ExecHashJoin(HashJoinState *node)
HashJoinTable hashtable;
HeapTuple curtuple;
TupleTableSlot *outerTupleSlot;
+ uint32 hashvalue;
+ int batchno;
/*
* get information from HashJoin node
@@ -105,69 +107,57 @@ ExecHashJoin(HashJoinState *node)
*/
ResetExprContext(econtext);
+ /*
+ * if this is the first call, build the hash table for inner relation
+ */
if (hashtable == NULL)
{
/*
- * This is the first call to the node. When _either_ of the
- * hash join inputs are empty, we want to avoid doing
- * unnecessary work (e.g. building the hash table for the
- * inner join relation). We therefore read a single tuple from
- * both inputs before proceeding further. We choose which
- * input to probe first based on the startup cost of the plan
- * node.
+ * If the outer relation is completely empty, we can quit without
+ * building the hash table. However, for an inner join it is only
+ * a win to check this when the outer relation's startup cost is less
+ * than the projected cost of building the hash table. Otherwise
+ * it's best to build the hash table first and see if the inner
+ * relation is empty. (When it's an outer join, we should always
+ * make this check, since we aren't going to be able to skip the
+ * join on the strength of an empty inner relation anyway.)
*
- * Note that if we're executing an outer join and the inner
- * relation is empty, we still have work to do.
+ * The only way to make the check is to try to fetch a tuple from
+ * the outer plan node. If we succeed, we have to stash it away
+ * for later consumption by ExecHashJoinOuterGetTuple.
*/
-
- /* Consider probing the inner relation first */
- if (hashNode->ps.plan->startup_cost <= outerNode->plan->startup_cost &&
- node->js.jointype != JOIN_LEFT)
+ if (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost ||
+ node->js.jointype == JOIN_LEFT)
{
- /*
- * ExecHash() lets us get a single tuple from the inner
- * relation without building the entire hash table
- */
- TupleTableSlot *tup = ExecProcNode(&hashNode->ps);
- if (TupIsNull(tup))
+ node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
+ if (TupIsNull(node->hj_FirstOuterTupleSlot))
return NULL;
}
+ else
+ node->hj_FirstOuterTupleSlot = NULL;
/*
- * Before we can check the outer relation, we need to build
- * the hash table. This is somewhat a waste of time if the
- * outer relation is empty, but it would be awkward to avoid.
+ * create the hash table
*/
hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
node->hj_HashOperators);
node->hj_HashTable = hashtable;
- hashNode->hashtable = hashtable;
-
- /* Now check the outer relation */
- outerTupleSlot = ExecHashJoinReadOuterPlan(node);
-
- if (TupIsNull(outerTupleSlot))
- {
- ExecHashTableDestroy(node->hj_HashTable);
- node->hj_HashTable = NULL;
- return NULL;
- }
/*
- * Okay, we can't avoid it, so execute the Hash node to build
- * the hash table
+ * execute the Hash node, to build the hash table
*/
+ hashNode->hashtable = hashtable;
(void) MultiExecProcNode((PlanState *) hashNode);
/*
- * If the inner relation is empty but its startup cost was
- * less than the outer relation's startup cost, we can arrive
- * here -- we're done unless this is an outer join
+ * If the inner relation is completely empty, and we're not doing
+ * an outer join, we can quit without scanning the outer relation.
*/
if (hashtable->totalTuples == 0 && node->js.jointype != JOIN_LEFT)
{
- ExecHashTableDestroy(node->hj_HashTable);
+ ExecHashTableDestroy(hashtable);
node->hj_HashTable = NULL;
+ node->hj_FirstOuterTupleSlot = NULL;
return NULL;
}
@@ -188,9 +178,46 @@ ExecHashJoin(HashJoinState *node)
*/
if (node->hj_NeedNewOuter)
{
- outerTupleSlot = ExecHashJoinReadOuterPlan(node);
+ outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
+ node,
+ &hashvalue);
if (TupIsNull(outerTupleSlot))
- return NULL; /* end of join */
+ {
+ /* end of join */
+ return NULL;
+ }
+
+ node->js.ps.ps_OuterTupleSlot = outerTupleSlot;
+ econtext->ecxt_outertuple = outerTupleSlot;
+ node->hj_NeedNewOuter = false;
+ node->hj_MatchedOuter = false;
+
+ /*
+ * now we have an outer tuple, find the corresponding bucket
+ * for this tuple from the hash table
+ */
+ node->hj_CurHashValue = hashvalue;
+ ExecHashGetBucketAndBatch(hashtable, hashvalue,
+ &node->hj_CurBucketNo, &batchno);
+ node->hj_CurTuple = NULL;
+
+ /*
+ * Now we've got an outer tuple and the corresponding hash
+ * bucket, but this tuple may not belong to the current batch.
+ */
+ if (batchno != hashtable->curbatch)
+ {
+ /*
+ * Need to postpone this outer tuple to a later batch.
+ * Save it in the corresponding outer-batch file.
+ */
+ Assert(batchno > hashtable->curbatch);
+ ExecHashJoinSaveTuple(ExecFetchSlotTuple(outerTupleSlot),
+ hashvalue,
+ &hashtable->outerBatchFile[batchno]);
+ node->hj_NeedNewOuter = true;
+ continue; /* loop around for a new outer tuple */
+ }
}
/*
@@ -400,6 +427,7 @@ ExecInitHashJoin(HashJoin *node, EState *estate)
* initialize hash-specific info
*/
hjstate->hj_HashTable = NULL;
+ hjstate->hj_FirstOuterTupleSlot = NULL;
hjstate->hj_CurHashValue = 0;
hjstate->hj_CurBucketNo = 0;
@@ -464,6 +492,7 @@ ExecEndHashJoin(HashJoinState *node)
{
ExecHashTableDestroy(node->hj_HashTable);
node->hj_HashTable = NULL;
+ node->hj_FirstOuterTupleSlot = NULL;
}
/*
@@ -486,79 +515,6 @@ ExecEndHashJoin(HashJoinState *node)
}
/*
- * ExecHashJoinReadOuterPlan
- *
- * do all the work necessary to produce the next tuple from the
- * outer hash join relation that is in the current batch. Returns
- * NULL if there are no more tuples in the outer relation.
- */
-static TupleTableSlot *
-ExecHashJoinReadOuterPlan(HashJoinState *hjstate)
-{
- PlanState *outerNode;
- ExprContext *econtext;
- HashJoinTable hashtable;
-
- outerNode = outerPlanState(hjstate);
- econtext = hjstate->js.ps.ps_ExprContext;
- hashtable = hjstate->hj_HashTable;
-
- for (;;)
- {
- TupleTableSlot *result;
- uint32 hashvalue;
- int batchno;
-
- result = ExecHashJoinOuterGetTuple(outerNode,
- hjstate,
- &hashvalue);
- if (TupIsNull(result))
- {
- /* end of join */
- return NULL;
- }
-
- hjstate->js.ps.ps_OuterTupleSlot = result;
- econtext->ecxt_outertuple = result;
- hjstate->hj_NeedNewOuter = false;
- hjstate->hj_MatchedOuter = false;
-
- /*
- * now we have an outer tuple, find the corresponding bucket
- * for this tuple from the hash table
- */
- hjstate->hj_CurHashValue = hashvalue;
- ExecHashGetBucketAndBatch(hashtable, hashvalue,
- &hjstate->hj_CurBucketNo, &batchno);
- hjstate->hj_CurTuple = NULL;
-
- /*
- * Now we've got an outer tuple and the corresponding hash
- * bucket, but this tuple may not belong to the current batch.
- */
- if (batchno != hashtable->curbatch)
- {
- /*
- * Need to postpone this outer tuple to a later batch.
- * Save it in the corresponding outer-batch file.
- */
- Assert(batchno > hashtable->curbatch);
- ExecHashJoinSaveTuple(ExecFetchSlotTuple(result),
- hashvalue,
- &hashtable->outerBatchFile[batchno]);
- hjstate->hj_NeedNewOuter = true;
- continue; /* Get the next outer tuple */
- }
-
- /*
- * Otherwise, we have a tuple in the current batch, so we're
- * done
- */
- return result;
- }
-}
-
-/*
* ExecHashJoinOuterGetTuple
*
* get the next outer tuple for hashjoin: either by
@@ -580,7 +536,15 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
if (curbatch == 0)
{ /* if it is the first pass */
- slot = ExecProcNode(outerNode);
+ /*
+ * Check to see if first outer tuple was already fetched by
+ * ExecHashJoin() and not used yet.
+ */
+ slot = hjstate->hj_FirstOuterTupleSlot;
+ if (!TupIsNull(slot))
+ hjstate->hj_FirstOuterTupleSlot = NULL;
+ else
+ slot = ExecProcNode(outerNode);
if (!TupIsNull(slot))
{
/*
@@ -840,6 +804,7 @@ ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
return ExecStoreTuple(heapTuple, tupleSlot, InvalidBuffer, true);
}
+
void
ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
{
@@ -867,6 +832,7 @@ ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
/* must destroy and rebuild hash table */
ExecHashTableDestroy(node->hj_HashTable);
node->hj_HashTable = NULL;
+ node->hj_FirstOuterTupleSlot = NULL;
/*
* if chgParam of subnode is not null then plan will be re-scanned