Support RIGHT and FULL OUTER JOIN in hash joins.

This is advantageous first because it allows us to hash the smaller table regardless of the outer-join type, and second because hash join can be more flexible than merge join in dealing with arbitrary join quals in a FULL join. For merge join all the join quals have to be mergejoinable, but hash join will work so long as there's at least one hashjoinable qual --- the others can be any condition. (This is true essentially because we don't keep per-inner-tuple match flags in merge join, while hash join can do so.) To do this, we need a has-it-been-matched flag for each tuple in the hashtable, not just one for the current outer tuple. The key idea that makes this practical is that we can store the match flag in the tuple's infomask, since there are lots of bits there that are of no interest for a MinimalTuple. So we aren't increasing the size of the hashtable at all for the feature. To write this without turning the hash code into even more of a pile of spaghetti than it already was, I rewrote ExecHashJoin in a state-machine style, similar to ExecMergeJoin. Other than that decision, it was pretty straightforward.
author: Tom Lane <tgl@sss.pgh.pa.us> 2010-12-30 20:24:55 -0500
committer: Tom Lane <tgl@sss.pgh.pa.us> 2010-12-30 20:26:08 -0500
commit: f4e4b3274317d9ce30de7e7e5b04dece7c4e1791 (patch)
tree: 6e3b700d25cb841749b4313e69bb4c30583e666c /src/backend/executor/nodeHash.c
parent: 17cb9e8c984746d3bbdf0d94367a0c5a6e2b6aee (diff)
download: postgresql-f4e4b3274317d9ce30de7e7e5b04dece7c4e1791.tar.gz
postgresql-f4e4b3274317d9ce30de7e7e5b04dece7c4e1791.zip
1 files changed, 146 insertions, 5 deletions
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 5cd7332237b..934a283b8d2 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -105,7 +105,8 @@ MultiExecHash(HashState *node)
 			break;
 		/* We have to compute the hash value */
 		econtext->ecxt_innertuple = slot;
-		if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false, false,
+		if (ExecHashGetHashValue(hashtable, econtext, hashkeys,
+								 false, hashtable->keepNulls,
 								 &hashvalue))
 		{
 			int			bucketNumber;
@@ -231,7 +232,7 @@ ExecEndHash(HashState *node)
  * ----------------------------------------------------------------
  */
 HashJoinTable
-ExecHashTableCreate(Hash *node, List *hashOperators)
+ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
 {
 	HashJoinTable hashtable;
 	Plan	   *outerNode;
@@ -273,6 +274,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
 	hashtable->nbuckets = nbuckets;
 	hashtable->log2_nbuckets = log2_nbuckets;
 	hashtable->buckets = NULL;
+	hashtable->keepNulls = keepNulls;
 	hashtable->skewEnabled = false;
 	hashtable->skewBucket = NULL;
 	hashtable->skewBucketLen = 0;
@@ -712,13 +714,26 @@ ExecHashTableInsert(HashJoinTable hashtable,
 		HashJoinTuple hashTuple;
 		int			hashTupleSize;
 
+		/* Create the HashJoinTuple */
 		hashTupleSize = HJTUPLE_OVERHEAD + tuple->t_len;
 		hashTuple = (HashJoinTuple) MemoryContextAlloc(hashtable->batchCxt,
 													   hashTupleSize);
 		hashTuple->hashvalue = hashvalue;
 		memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
+
+		/*
+		 * We always reset the tuple-matched flag on insertion.  This is okay
+		 * even when reloading a tuple from a batch file, since the tuple
+		 * could not possibly have been matched to an outer tuple before it
+		 * went into the batch file.
+		 */
+		HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple));
+
+		/* Push it onto the front of the bucket's list */
 		hashTuple->next = hashtable->buckets[bucketno];
 		hashtable->buckets[bucketno] = hashTuple;
+
+		/* Account for space used, and back off if we've used too much */
 		hashtable->spaceUsed += hashTupleSize;
 		if (hashtable->spaceUsed > hashtable->spacePeak)
 			hashtable->spacePeak = hashtable->spaceUsed;
@@ -878,8 +893,12 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable,
  *		scan a hash bucket for matches to the current outer tuple
  *
  * The current outer tuple must be stored in econtext->ecxt_outertuple.
+ *
+ * On success, the inner tuple is stored into hjstate->hj_CurTuple and
+ * econtext->ecxt_innertuple, using hjstate->hj_HashTupleSlot as the slot
+ * for the latter.
  */
-HashJoinTuple
+bool
 ExecScanHashBucket(HashJoinState *hjstate,
 				   ExprContext *econtext)
 {
@@ -920,7 +939,7 @@ ExecScanHashBucket(HashJoinState *hjstate,
 			if (ExecQual(hjclauses, econtext, false))
 			{
 				hjstate->hj_CurTuple = hashTuple;
-				return hashTuple;
+				return true;
 			}
 		}
 
@@ -930,7 +949,99 @@ ExecScanHashBucket(HashJoinState *hjstate,
 	/*
 	 * no match
 	 */
-	return NULL;
+	return false;
+}
+
+/*
+ * ExecPrepHashTableForUnmatched
+ *		set up for a series of ExecScanHashTableForUnmatched calls
+ */
+void
+ExecPrepHashTableForUnmatched(HashJoinState *hjstate)
+{
+	/*
+	 *----------
+	 * During this scan we use the HashJoinState fields as follows:
+	 *
+	 * hj_CurBucketNo: next regular bucket to scan
+	 * hj_CurSkewBucketNo: next skew bucket (an index into skewBucketNums)
+	 * hj_CurTuple: last tuple returned, or NULL to start next bucket
+	 *----------
+	 */
+	hjstate->hj_CurBucketNo = 0;
+	hjstate->hj_CurSkewBucketNo = 0;
+	hjstate->hj_CurTuple = NULL;
+}
+
+/*
+ * ExecScanHashTableForUnmatched
+ *		scan the hash table for unmatched inner tuples
+ *
+ * On success, the inner tuple is stored into hjstate->hj_CurTuple and
+ * econtext->ecxt_innertuple, using hjstate->hj_HashTupleSlot as the slot
+ * for the latter.
+ */
+bool
+ExecScanHashTableForUnmatched(HashJoinState *hjstate, ExprContext *econtext)
+{
+	HashJoinTable hashtable = hjstate->hj_HashTable;
+	HashJoinTuple hashTuple = hjstate->hj_CurTuple;
+
+	for (;;)
+	{
+		/*
+		 * hj_CurTuple is the address of the tuple last returned from the
+		 * current bucket, or NULL if it's time to start scanning a new
+		 * bucket.
+		 */
+		if (hashTuple != NULL)
+			hashTuple = hashTuple->next;
+		else if (hjstate->hj_CurBucketNo < hashtable->nbuckets)
+		{
+			hashTuple = hashtable->buckets[hjstate->hj_CurBucketNo];
+			hjstate->hj_CurBucketNo++;
+		}
+		else if (hjstate->hj_CurSkewBucketNo < hashtable->nSkewBuckets)
+		{
+			int		j = hashtable->skewBucketNums[hjstate->hj_CurSkewBucketNo];
+
+			hashTuple = hashtable->skewBucket[j]->tuples;
+			hjstate->hj_CurSkewBucketNo++;
+		}
+		else
+			break;				/* finished all buckets */
+
+		while (hashTuple != NULL)
+		{
+			if (!HeapTupleHeaderHasMatch(HJTUPLE_MINTUPLE(hashTuple)))
+			{
+				TupleTableSlot *inntuple;
+
+				/* insert hashtable's tuple into exec slot */
+				inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
+												 hjstate->hj_HashTupleSlot,
+												 false);	/* do not pfree */
+				econtext->ecxt_innertuple = inntuple;
+
+				/*
+				 * Reset temp memory each time; although this function doesn't
+				 * do any qual eval, the caller will, so let's keep it
+				 * parallel to ExecScanHashBucket.
+				 */
+				ResetExprContext(econtext);
+
+				hjstate->hj_CurTuple = hashTuple;
+				return true;
+			}
+
+			hashTuple = hashTuple->next;
+		}
+	}
+
+	/*
+	 * no more unmatched tuples
+	 */
+	return false;
 }
 
 /*
@@ -960,6 +1071,35 @@ ExecHashTableReset(HashJoinTable hashtable)
 	MemoryContextSwitchTo(oldcxt);
 }
 
+/*
+ * ExecHashTableResetMatchFlags
+ *		Clear all the HeapTupleHeaderHasMatch flags in the table
+ */
+void
+ExecHashTableResetMatchFlags(HashJoinTable hashtable)
+{
+	HashJoinTuple tuple;
+	int			i;
+
+	/* Reset all flags in the main table ... */
+	for (i = 0; i < hashtable->nbuckets; i++)
+	{
+		for (tuple = hashtable->buckets[i]; tuple != NULL; tuple = tuple->next)
+			HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(tuple));
+	}
+
+	/* ... and the same for the skew buckets, if any */
+	for (i = 0; i < hashtable->nSkewBuckets; i++)
+	{
+		int		j = hashtable->skewBucketNums[i];
+		HashSkewBucket *skewBucket = hashtable->skewBucket[j];
+
+		for (tuple = skewBucket->tuples; tuple != NULL; tuple = tuple->next)
+			HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(tuple));
+	}
+}
+
+
 void
 ExecReScanHash(HashState *node)
 {
@@ -1203,6 +1343,7 @@ ExecHashSkewTableInsert(HashJoinTable hashtable,
 												   hashTupleSize);
 	hashTuple->hashvalue = hashvalue;
 	memcpy(HJTUPLE_MINTUPLE(hashTuple), tuple, tuple->t_len);
+	HeapTupleHeaderClearMatch(HJTUPLE_MINTUPLE(hashTuple));
 
 	/* Push it onto the front of the skew bucket's list */
 	hashTuple->next = hashtable->skewBucket[bucketNumber]->tuples;
author	Tom Lane <tgl@sss.pgh.pa.us>	2010-12-30 20:24:55 -0500
committer	Tom Lane <tgl@sss.pgh.pa.us>	2010-12-30 20:26:08 -0500
commit	f4e4b3274317d9ce30de7e7e5b04dece7c4e1791 (patch)
tree	6e3b700d25cb841749b4313e69bb4c30583e666c /src/backend/executor/nodeHash.c
parent	17cb9e8c984746d3bbdf0d94367a0c5a6e2b6aee (diff)
download	postgresql-f4e4b3274317d9ce30de7e7e5b04dece7c4e1791.tar.gz postgresql-f4e4b3274317d9ce30de7e7e5b04dece7c4e1791.zip