Add support for cross-type hashing in hashed subplans (hashed IN/NOT IN cases

that aren't turned into true joins). Since this is the last missing bit of infrastructure, go ahead and fill out the hash integer_ops and float_ops opfamilies with cross-type operators. The operator family project is now DONE ... er, except for documentation ...
author: Tom Lane <tgl@sss.pgh.pa.us> 2007-02-06 02:59:15 +0000
committer: Tom Lane <tgl@sss.pgh.pa.us> 2007-02-06 02:59:15 +0000
commit: ab05eedecc5c5b3a07ff101d29c9fef612f42996 (patch)
tree: 39005eb2a059051dc7458a567802315c06d5d201 /src/backend
parent: b70e536e4d82d72745a7bd71556ff7bbecb568a2 (diff)
download: postgresql-ab05eedecc5c5b3a07ff101d29c9fef612f42996.tar.gz
postgresql-ab05eedecc5c5b3a07ff101d29c9fef612f42996.zip
4 files changed, 114 insertions, 43 deletions
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 08391bcc459..e6c9cf2a7d4 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/execGrouping.c,v 1.24 2007/01/30 01:33:36 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/execGrouping.c,v 1.25 2007/02/06 02:59:11 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -178,7 +178,7 @@ execTuplesUnequal(TupleTableSlot *slot1,
 /*
  * execTuplesMatchPrepare
  *		Look up the equality functions needed for execTuplesMatch or
- *		execTuplesUnequal.
+ *		execTuplesUnequal, given an array of equality operator OIDs.
  *
  * The result is a palloc'd array.
  */
@@ -208,6 +208,8 @@ execTuplesMatchPrepare(int numCols,
  * This is similar to execTuplesMatchPrepare, but we also need to find the
  * hash functions associated with the equality operators.  *eqFunctions and
  * *hashFunctions receive the palloc'd result arrays.
+ *
+ * Note: we expect that the given operators are not cross-type comparisons.
  */
 void
 execTuplesHashPrepare(int numCols,
@@ -232,7 +234,7 @@ execTuplesHashPrepare(int numCols,
 								   &left_hash_function, &right_hash_function))
 			elog(ERROR, "could not find hash function for hash operator %u",
 				 eq_opr);
-		/* For the moment, we're not supporting cross-type cases here */
+		/* We're not supporting cross-type cases here */
 		Assert(left_hash_function == right_hash_function);
 		fmgr_info(eq_function, &(*eqFunctions)[i]);
 		fmgr_info(right_hash_function, &(*hashFunctions)[i]);
@@ -259,7 +261,9 @@ execTuplesHashPrepare(int numCols,
  *	tablecxt: memory context in which to store table and table entries
  *	tempcxt: short-lived context for evaluation hash and comparison functions
  *
- * The function arrays may be made with execTuplesHashPrepare().
+ * The function arrays may be made with execTuplesHashPrepare().  Note they
+ * are not cross-type functions, but expect to see the table datatype(s)
+ * on both sides.
  *
  * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
  * storage that will live as long as the hashtable does.
@@ -282,13 +286,15 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 
 	hashtable->numCols = numCols;
 	hashtable->keyColIdx = keyColIdx;
-	hashtable->eqfunctions = eqfunctions;
-	hashtable->hashfunctions = hashfunctions;
+	hashtable->tab_hash_funcs = hashfunctions;
+	hashtable->tab_eq_funcs = eqfunctions;
 	hashtable->tablecxt = tablecxt;
 	hashtable->tempcxt = tempcxt;
 	hashtable->entrysize = entrysize;
 	hashtable->tableslot = NULL;	/* will be made on first lookup */
 	hashtable->inputslot = NULL;
+	hashtable->in_hash_funcs = NULL;
+	hashtable->cur_eq_funcs = NULL;
 
 	MemSet(&hash_ctl, 0, sizeof(hash_ctl));
 	hash_ctl.keysize = sizeof(TupleHashEntryData);
@@ -305,7 +311,7 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
 
 /*
  * Find or create a hashtable entry for the tuple group containing the
- * given tuple.
+ * given tuple.  The tuple must be the same type as the hashtable entries.
  *
  * If isnew is NULL, we do not create new entries; we return NULL if no
  * match is found.
@@ -351,6 +357,9 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 	 * invoke this code re-entrantly.
 	 */
 	hashtable->inputslot = slot;
+	hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
+	hashtable->cur_eq_funcs = hashtable->tab_eq_funcs;
+
 	saveCurHT = CurTupleHashTable;
 	CurTupleHashTable = hashtable;
 
@@ -395,6 +404,55 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
 }
 
 /*
+ * Search for a hashtable entry matching the given tuple.  No entry is
+ * created if there's not a match.  This is similar to the non-creating
+ * case of LookupTupleHashEntry, except that it supports cross-type
+ * comparisons, in which the given tuple is not of the same type as the
+ * table entries.  The caller must provide the hash functions to use for
+ * the input tuple, as well as the equality functions, since these may be
+ * different from the table's internal functions.
+ */
+TupleHashEntry
+FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
+				   FmgrInfo *eqfunctions,
+				   FmgrInfo *hashfunctions)
+{
+	TupleHashEntry entry;
+	MemoryContext oldContext;
+	TupleHashTable saveCurHT;
+	TupleHashEntryData dummy;
+
+	/* Need to run the hash functions in short-lived context */
+	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
+
+	/*
+	 * Set up data needed by hash and match functions
+	 *
+	 * We save and restore CurTupleHashTable just in case someone manages to
+	 * invoke this code re-entrantly.
+	 */
+	hashtable->inputslot = slot;
+	hashtable->in_hash_funcs = hashfunctions;
+	hashtable->cur_eq_funcs = eqfunctions;
+
+	saveCurHT = CurTupleHashTable;
+	CurTupleHashTable = hashtable;
+
+	/* Search the hash table */
+	dummy.firstTuple = NULL;	/* flag to reference inputslot */
+	entry = (TupleHashEntry) hash_search(hashtable->hashtab,
+										 &dummy,
+										 HASH_FIND,
+										 NULL);
+
+	CurTupleHashTable = saveCurHT;
+
+	MemoryContextSwitchTo(oldContext);
+
+	return entry;
+}
+
+/*
  * Compute the hash value for a tuple
  *
  * The passed-in key is a pointer to TupleHashEntryData.  In an actual hash
@@ -418,6 +476,7 @@ TupleHashTableHash(const void *key, Size keysize)
 	TupleHashTable hashtable = CurTupleHashTable;
 	int			numCols = hashtable->numCols;
 	AttrNumber *keyColIdx = hashtable->keyColIdx;
+	FmgrInfo   *hashfunctions;
 	uint32		hashkey = 0;
 	int			i;
 
@@ -425,6 +484,7 @@ TupleHashTableHash(const void *key, Size keysize)
 	{
 		/* Process the current input tuple for the table */
 		slot = hashtable->inputslot;
+		hashfunctions = hashtable->in_hash_funcs;
 	}
 	else
 	{
@@ -432,6 +492,7 @@ TupleHashTableHash(const void *key, Size keysize)
 		/* (this case never actually occurs in current dynahash.c code) */
 		slot = hashtable->tableslot;
 		ExecStoreMinimalTuple(tuple, slot, false);
+		hashfunctions = hashtable->tab_hash_funcs;
 	}
 
 	for (i = 0; i < numCols; i++)
@@ -449,7 +510,7 @@ TupleHashTableHash(const void *key, Size keysize)
 		{
 			uint32		hkey;
 
-			hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
+			hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i],
 												attr));
 			hashkey ^= hkey;
 		}
@@ -493,11 +554,12 @@ TupleHashTableMatch(const void *key1, const void *key2, Size keysize)
 	Assert(tuple2 == NULL);
 	slot2 = hashtable->inputslot;
 
-	if (execTuplesMatch(slot1,
-						slot2,
+	/* For crosstype comparisons, the inputslot must be first */
+	if (execTuplesMatch(slot2,
+						slot1,
 						hashtable->numCols,
 						hashtable->keyColIdx,
-						hashtable->eqfunctions,
+						hashtable->cur_eq_funcs,
 						hashtable->tempcxt))
 		return 0;
 	else
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 0e840802eb0..32167a94efb 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.84 2007/02/02 00:07:03 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.85 2007/02/06 02:59:11 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -139,7 +139,10 @@ ExecHashSubPlan(SubPlanState *node,
 	if (slotNoNulls(slot))
 	{
 		if (node->havehashrows &&
-			LookupTupleHashEntry(node->hashtable, slot, NULL) != NULL)
+			FindTupleHashEntry(node->hashtable,
+							   slot,
+							   node->cur_eq_funcs,
+							   node->lhs_hash_funcs) != NULL)
 		{
 			ExecClearTuple(slot);
 			return BoolGetDatum(true);
@@ -453,8 +456,8 @@ buildSubPlanHash(SubPlanState *node)
 
 	node->hashtable = BuildTupleHashTable(ncols,
 										  node->keyColIdx,
-										  node->eqfunctions,
-										  node->hashfunctions,
+										  node->tab_eq_funcs,
+										  node->tab_hash_funcs,
 										  nbuckets,
 										  sizeof(TupleHashEntryData),
 										  node->tablecxt,
@@ -472,8 +475,8 @@ buildSubPlanHash(SubPlanState *node)
 		}
 		node->hashnulls = BuildTupleHashTable(ncols,
 											  node->keyColIdx,
-											  node->eqfunctions,
-											  node->hashfunctions,
+											  node->tab_eq_funcs,
+											  node->tab_hash_funcs,
 											  nbuckets,
 											  sizeof(TupleHashEntryData),
 											  node->tablecxt,
@@ -573,9 +576,9 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
 	while ((entry = ScanTupleHashTable(&hashiter)) != NULL)
 	{
 		ExecStoreMinimalTuple(entry->firstTuple, hashtable->tableslot, false);
-		if (!execTuplesUnequal(hashtable->tableslot, slot,
+		if (!execTuplesUnequal(slot, hashtable->tableslot,
 							   numCols, keyColIdx,
-							   hashtable->eqfunctions,
+							   hashtable->cur_eq_funcs,
 							   hashtable->tempcxt))
 			return true;
 	}
@@ -653,8 +656,10 @@ ExecInitSubPlan(SubPlanState *node, EState *estate, int eflags)
 	node->tablecxt = NULL;
 	node->innerecontext = NULL;
 	node->keyColIdx = NULL;
-	node->eqfunctions = NULL;
-	node->hashfunctions = NULL;
+	node->tab_hash_funcs = NULL;
+	node->tab_eq_funcs = NULL;
+	node->lhs_hash_funcs = NULL;
+	node->cur_eq_funcs = NULL;
 
 	/*
 	 * create an EState for the subplan
@@ -781,8 +786,10 @@ ExecInitSubPlan(SubPlanState *node, EState *estate, int eflags)
 
 		lefttlist = righttlist = NIL;
 		leftptlist = rightptlist = NIL;
-		node->eqfunctions = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
-		node->hashfunctions = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+		node->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+		node->tab_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+		node->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+		node->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
 		i = 1;
 		foreach(l, oplist)
 		{
@@ -792,6 +799,7 @@ ExecInitSubPlan(SubPlanState *node, EState *estate, int eflags)
 			Expr	   *expr;
 			TargetEntry *tle;
 			GenericExprState *tlestate;
+			Oid			rhs_eq_oper;
 			Oid			left_hashfn;
 			Oid			right_hashfn;
 
@@ -827,18 +835,24 @@ ExecInitSubPlan(SubPlanState *node, EState *estate, int eflags)
 			righttlist = lappend(righttlist, tlestate);
 			rightptlist = lappend(rightptlist, tle);
 
-			/* Lookup the combining function */
-			fmgr_info(opexpr->opfuncid, &node->eqfunctions[i - 1]);
-			node->eqfunctions[i - 1].fn_expr = (Node *) opexpr;
+			/* Lookup the equality function (potentially cross-type) */
+			fmgr_info(opexpr->opfuncid, &node->cur_eq_funcs[i - 1]);
+			node->cur_eq_funcs[i - 1].fn_expr = (Node *) opexpr;
+
+			/* Look up the equality function for the RHS type */
+			if (!get_compatible_hash_operators(opexpr->opno,
+											   NULL, &rhs_eq_oper))
+				elog(ERROR, "could not find compatible hash operator for operator %u",
+					 opexpr->opno);
+			fmgr_info(get_opcode(rhs_eq_oper), &node->tab_eq_funcs[i - 1]);
 
 			/* Lookup the associated hash functions */
 			if (!get_op_hash_functions(opexpr->opno,
 									   &left_hashfn, &right_hashfn))
 				elog(ERROR, "could not find hash function for hash operator %u",
 					 opexpr->opno);
-			/* For the moment, not supporting cross-type cases */
-			Assert(left_hashfn == right_hashfn);
-			fmgr_info(right_hashfn, &node->hashfunctions[i - 1]);
+			fmgr_info(left_hashfn, &node->lhs_hash_funcs[i - 1]);
+			fmgr_info(right_hashfn, &node->tab_hash_funcs[i - 1]);
 
 			i++;
 		}
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 7339445e046..e79991a0f60 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.117 2007/01/10 18:06:03 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.118 2007/02/06 02:59:11 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -598,17 +598,13 @@ subplan_is_hashable(SubLink *slink, SubPlan *node)
 		return false;
 
 	/*
-	 * The combining operators must be hashable, strict, and self-commutative.
+	 * The combining operators must be hashable and strict.
 	 * The need for hashability is obvious, since we want to use hashing.
 	 * Without strictness, behavior in the presence of nulls is too
-	 * unpredictable.  (We actually must assume even more than plain
-	 * strictness, see nodeSubplan.c for details.)	And commutativity ensures
-	 * that the left and right datatypes are the same; this allows us to
-	 * assume that the combining operators are equality for the righthand
-	 * datatype, so that they can be used to compare righthand tuples as well
-	 * as comparing lefthand to righthand tuples.  (This last restriction
-	 * could be relaxed by using two different sets of operators with the hash
-	 * table, but there is no obvious usefulness to that at present.)
+	 * unpredictable.  We actually must assume even more than plain
+	 * strictness: they can't yield NULL for non-null inputs, either
+	 * (see nodeSubplan.c).  However, hash indexes and hash joins assume
+	 * that too.
 	 */
 	if (IsA(slink->testexpr, OpExpr))
 	{
@@ -644,8 +640,7 @@ hash_ok_operator(OpExpr *expr)
 	if (!HeapTupleIsValid(tup))
 		elog(ERROR, "cache lookup failed for operator %u", opid);
 	optup = (Form_pg_operator) GETSTRUCT(tup);
-	if (!optup->oprcanhash || optup->oprcom != opid ||
-		!func_strict(optup->oprcode))
+	if (!optup->oprcanhash || !func_strict(optup->oprcode))
 	{
 		ReleaseSysCache(tup);
 		return false;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 5832d145ef0..81f7c99e963 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.137 2007/01/20 20:45:39 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.138 2007/02/06 02:59:12 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1070,7 +1070,7 @@ distinct_col_search(int colno, List *colnos, List *opids)
  * We assume hashed aggregation will work if each IN operator is marked
  * hashjoinable.  If the IN operators are cross-type, this could conceivably
  * fail: the aggregation will need a hashable equality operator for the RHS
- * datatype --- but it's pretty hard to conceive of a hash opclass that has
+ * datatype --- but it's pretty hard to conceive of a hash opfamily that has
  * cross-type hashing without support for hashing the individual types, so
  * we don't expend cycles here to support the case.  We could check
  * get_compatible_hash_operator() instead of just op_hashjoinable(), but the
author	Tom Lane <tgl@sss.pgh.pa.us>	2007-02-06 02:59:15 +0000
committer	Tom Lane <tgl@sss.pgh.pa.us>	2007-02-06 02:59:15 +0000
commit	ab05eedecc5c5b3a07ff101d29c9fef612f42996 (patch)
tree	39005eb2a059051dc7458a567802315c06d5d201 /src/backend
parent	b70e536e4d82d72745a7bd71556ff7bbecb568a2 (diff)
download	postgresql-ab05eedecc5c5b3a07ff101d29c9fef612f42996.tar.gz postgresql-ab05eedecc5c5b3a07ff101d29c9fef612f42996.zip