diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2010-12-30 20:24:55 -0500 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2010-12-30 20:26:08 -0500 |
commit | f4e4b3274317d9ce30de7e7e5b04dece7c4e1791 (patch) | |
tree | 6e3b700d25cb841749b4313e69bb4c30583e666c /src/backend/executor/nodeHash.c | |
parent | 17cb9e8c984746d3bbdf0d94367a0c5a6e2b6aee (diff) | |
download | postgresql-f4e4b3274317d9ce30de7e7e5b04dece7c4e1791.tar.gz postgresql-f4e4b3274317d9ce30de7e7e5b04dece7c4e1791.zip |
Support RIGHT and FULL OUTER JOIN in hash joins.
This is advantageous first because it allows us to hash the smaller table
regardless of the outer-join type, and second because hash join can be more
flexible than merge join in dealing with arbitrary join quals in a FULL
join. For merge join all the join quals have to be mergejoinable, but hash
join will work so long as there's at least one hashjoinable qual --- the
others can be any condition. (This is true essentially because we don't
keep per-inner-tuple match flags in merge join, while hash join can do so.)
To do this, we need a has-it-been-matched flag for each tuple in the
hashtable, not just one for the current outer tuple. The key idea that
makes this practical is that we can store the match flag in the tuple's
infomask, since there are lots of bits there that are of no interest for a
MinimalTuple. So we aren't increasing the size of the hashtable at all for
the feature.
To write this without turning the hash code into even more of a pile of
spaghetti than it already was, I rewrote ExecHashJoin in a state-machine
style, similar to ExecMergeJoin. Other than that decision, it was pretty
straightforward.
Diffstat (limited to 'src/backend/executor/nodeHash.c')
-rw-r--r-- | src/backend/executor/nodeHash.c | 151 |
1 files changed, 146 insertions, 5 deletions
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 5cd7332237b..934a283b8d2 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -105,7 +105,8 @@ MultiExecHash(HashState *node) break; /* We have to compute the hash value */ econtext->ecxt_innertuple = slot; - if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false, false, + if (ExecHashGetHashValue(hashtable, econtext, hashkeys, + false, hashtable->keepNulls, &hashvalue)) { int bucketNumber; @@ -231,7 +232,7 @@ ExecEndHash(HashState *node) * ---------------------------------------------------------------- */ HashJoinTable -ExecHashTableCreate(Hash *node, List *hashOperators) +ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls) { HashJoinTable hashtable; Plan *outerNode; @@ -273,6 +274,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators) hashtable->nbuckets = nbuckets; hashtable->log2_nbuckets = log2_nbuckets; hashtable->buckets = NULL; + hashtable->keepNulls = keepNulls; hashtable->skewEnabled = false; hashtable->skewBucket = NULL; hashtable->skewBucketLen = 0; @@ -712,13 +714,26 @@ ExecHashTableInsert(HashJoinTable hashtable, HashJoinTuple hashTuple; int hashTupleSize; + /* Create the HashJoinTuple */ hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len; hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt, hashTupleSize); hashTuple->hashvalue = hashvalue; memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len); + + /* + * We always reset the tuple-matched flag on insertion. This is okay + * even when reloading a tuple from a batch file, since the tuple + * could not possibly have been matched to an outer tuple before it + * went into the batch file. + */ + HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple)); + + /* Push it onto the front of the bucket's list */ hashTuple->next = hashtable->buckets[bucketno]; hashtable->buckets[bucketno] = hashTuple; + + /* Account for space used, and back off if we've used too much */ hashtable->spaceUsed += hashTupleSize; if (hashtable->spaceUsed > hashtable->spacePeak) hashtable->spacePeak = hashtable->spaceUsed; @@ -878,8 +893,12 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable, * scan a hash bucket for matches to the current outer tuple * * The current outer tuple must be stored in econtext->ecxt_outertuple. + * + * On success, the inner tuple is stored into hjstate->hj_CurTuple and + * econtext->ecxt_innertuple, using hjstate->hj_HashTupleSlot as the slot + * for the latter. */ -HashJoinTuple +bool ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext) { @@ -920,7 +939,7 @@ ExecScanHashBucket(HashJoinState *hjstate, if (ExecQual(hjclauses, econtext, false)) { hjstate->hj_CurTuple = hashTuple; - return hashTuple; + return true; } } @@ -930,7 +949,99 @@ ExecScanHashBucket(HashJoinState *hjstate, /* * no match */ - return NULL; + return false; +} + +/* + * ExecPrepHashTableForUnmatched + * set up for a series of ExecScanHashTableForUnmatched calls + */ +void +ExecPrepHashTableForUnmatched(HashJoinState *hjstate) +{ + /* + *---------- + * During this scan we use the HashJoinState fields as follows: + * + * hj_CurBucketNo: next regular bucket to scan + * hj_CurSkewBucketNo: next skew bucket (an index into skewBucketNums) + * hj_CurTuple: last tuple returned, or NULL to start next bucket + *---------- + */ + hjstate->hj_CurBucketNo = 0; + hjstate->hj_CurSkewBucketNo = 0; + hjstate->hj_CurTuple = NULL; +} + +/* + * ExecScanHashTableForUnmatched + * scan the hash table for unmatched inner tuples + * + * On success, the inner tuple is stored into hjstate->hj_CurTuple and + * econtext->ecxt_innertuple, using hjstate->hj_HashTupleSlot as the slot + * for the latter. + */ +bool +ExecScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext) +{ + HashJoinTable hashtable = hjstate->hj_HashTable; + HashJoinTuple hashTuple = hjstate->hj_CurTuple; + + for (;;) + { + /* + * hj_CurTuple is the address of the tuple last returned from the + * current bucket, or NULL if it's time to start scanning a new + * bucket. + */ + if (hashTuple != NULL) + hashTuple = hashTuple->next; + else if (hjstate->hj_CurBucketNo < hashtable->nbuckets) + { + hashTuple = hashtable->buckets[hjstate->hj_CurBucketNo]; + hjstate->hj_CurBucketNo++; + } + else if (hjstate->hj_CurSkewBucketNo < hashtable->nSkewBuckets) + { + int j = hashtable->skewBucketNums[hjstate->hj_CurSkewBucketNo]; + + hashTuple = hashtable->skewBucket[j]->tuples; + hjstate->hj_CurSkewBucketNo++; + } + else + break; /* finished all buckets */ + + while (hashTuple != NULL) + { + if (!HeapTupleHeaderHasMatch(HJTUPLE_MINTUPLE(hashTuple))) + { + TupleTableSlot *inntuple; + + /* insert hashtable's tuple into exec slot */ + inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple), + hjstate->hj_HashTupleSlot, + false); /* do not pfree */ + econtext->ecxt_innertuple = inntuple; + + /* + * Reset temp memory each time; although this function doesn't + * do any qual eval, the caller will, so let's keep it + * parallel to ExecScanHashBucket. + */ + ResetExprContext(econtext); + + hjstate->hj_CurTuple = hashTuple; + return true; + } + + hashTuple = hashTuple->next; + } + } + + /* + * no more unmatched tuples + */ + return false; } /* @@ -960,6 +1071,35 @@ ExecHashTableReset(HashJoinTable hashtable) MemoryContextSwitchTo(oldcxt); } +/* + * ExecHashTableResetMatchFlags + * Clear all the HeapTupleHeaderHasMatch flags in the table + */ +void +ExecHashTableResetMatchFlags(HashJoinTable hashtable) +{ + HashJoinTuple tuple; + int i; + + /* Reset all flags in the main table ... */ + for (i = 0; i < hashtable->nbuckets; i++) + { + for (tuple = hashtable->buckets[i]; tuple != NULL; tuple = tuple->next) + HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(tuple)); + } + + /* ... and the same for the skew buckets, if any */ + for (i = 0; i < hashtable->nSkewBuckets; i++) + { + int j = hashtable->skewBucketNums[i]; + HashSkewBucket *skewBucket = hashtable->skewBucket[j]; + + for (tuple = skewBucket->tuples; tuple != NULL; tuple = tuple->next) + HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(tuple)); + } +} + + void ExecReScanHash(HashState *node) { @@ -1203,6 +1343,7 @@ ExecHashSkewTableInsert(HashJoinTable hashtable, hashTupleSize); hashTuple->hashvalue = hashvalue; memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len); + HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple)); /* Push it onto the front of the skew bucket's list */ hashTuple->next = hashtable->skewBucket[bucketNumber]->tuples; |