aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2007-02-06 02:59:15 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2007-02-06 02:59:15 +0000
commitab05eedecc5c5b3a07ff101d29c9fef612f42996 (patch)
tree39005eb2a059051dc7458a567802315c06d5d201 /src/backend
parentb70e536e4d82d72745a7bd71556ff7bbecb568a2 (diff)
downloadpostgresql-ab05eedecc5c5b3a07ff101d29c9fef612f42996.tar.gz
postgresql-ab05eedecc5c5b3a07ff101d29c9fef612f42996.zip
Add support for cross-type hashing in hashed subplans (hashed IN/NOT IN cases
that aren't turned into true joins). Since this is the last missing bit of infrastructure, go ahead and fill out the hash integer_ops and float_ops opfamilies with cross-type operators. The operator family project is now DONE ... er, except for documentation ...
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/executor/execGrouping.c84
-rw-r--r--src/backend/executor/nodeSubplan.c50
-rw-r--r--src/backend/optimizer/plan/subselect.c19
-rw-r--r--src/backend/optimizer/util/pathnode.c4
4 files changed, 114 insertions, 43 deletions
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c
index 08391bcc459..e6c9cf2a7d4 100644
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/execGrouping.c,v 1.24 2007/01/30 01:33:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/execGrouping.c,v 1.25 2007/02/06 02:59:11 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -178,7 +178,7 @@ execTuplesUnequal(TupleTableSlot *slot1,
/*
* execTuplesMatchPrepare
* Look up the equality functions needed for execTuplesMatch or
- * execTuplesUnequal.
+ * execTuplesUnequal, given an array of equality operator OIDs.
*
* The result is a palloc'd array.
*/
@@ -208,6 +208,8 @@ execTuplesMatchPrepare(int numCols,
* This is similar to execTuplesMatchPrepare, but we also need to find the
* hash functions associated with the equality operators. *eqFunctions and
* *hashFunctions receive the palloc'd result arrays.
+ *
+ * Note: we expect that the given operators are not cross-type comparisons.
*/
void
execTuplesHashPrepare(int numCols,
@@ -232,7 +234,7 @@ execTuplesHashPrepare(int numCols,
&left_hash_function, &right_hash_function))
elog(ERROR, "could not find hash function for hash operator %u",
eq_opr);
- /* For the moment, we're not supporting cross-type cases here */
+ /* We're not supporting cross-type cases here */
Assert(left_hash_function == right_hash_function);
fmgr_info(eq_function, &(*eqFunctions)[i]);
fmgr_info(right_hash_function, &(*hashFunctions)[i]);
@@ -259,7 +261,9 @@ execTuplesHashPrepare(int numCols,
* tablecxt: memory context in which to store table and table entries
* tempcxt: short-lived context for evaluation hash and comparison functions
*
- * The function arrays may be made with execTuplesHashPrepare().
+ * The function arrays may be made with execTuplesHashPrepare(). Note they
+ * are not cross-type functions, but expect to see the table datatype(s)
+ * on both sides.
*
* Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
* storage that will live as long as the hashtable does.
@@ -282,13 +286,15 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
hashtable->numCols = numCols;
hashtable->keyColIdx = keyColIdx;
- hashtable->eqfunctions = eqfunctions;
- hashtable->hashfunctions = hashfunctions;
+ hashtable->tab_hash_funcs = hashfunctions;
+ hashtable->tab_eq_funcs = eqfunctions;
hashtable->tablecxt = tablecxt;
hashtable->tempcxt = tempcxt;
hashtable->entrysize = entrysize;
hashtable->tableslot = NULL; /* will be made on first lookup */
hashtable->inputslot = NULL;
+ hashtable->in_hash_funcs = NULL;
+ hashtable->cur_eq_funcs = NULL;
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
hash_ctl.keysize = sizeof(TupleHashEntryData);
@@ -305,7 +311,7 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
/*
* Find or create a hashtable entry for the tuple group containing the
- * given tuple.
+ * given tuple. The tuple must be the same type as the hashtable entries.
*
* If isnew is NULL, we do not create new entries; we return NULL if no
* match is found.
@@ -351,6 +357,9 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
* invoke this code re-entrantly.
*/
hashtable->inputslot = slot;
+ hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
+ hashtable->cur_eq_funcs = hashtable->tab_eq_funcs;
+
saveCurHT = CurTupleHashTable;
CurTupleHashTable = hashtable;
@@ -395,6 +404,55 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
}
/*
+ * Search for a hashtable entry matching the given tuple. No entry is
+ * created if there's not a match. This is similar to the non-creating
+ * case of LookupTupleHashEntry, except that it supports cross-type
+ * comparisons, in which the given tuple is not of the same type as the
+ * table entries. The caller must provide the hash functions to use for
+ * the input tuple, as well as the equality functions, since these may be
+ * different from the table's internal functions.
+ */
+TupleHashEntry
+FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
+ FmgrInfo *eqfunctions,
+ FmgrInfo *hashfunctions)
+{
+ TupleHashEntry entry;
+ MemoryContext oldContext;
+ TupleHashTable saveCurHT;
+ TupleHashEntryData dummy;
+
+ /* Need to run the hash functions in short-lived context */
+ oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
+
+ /*
+ * Set up data needed by hash and match functions
+ *
+ * We save and restore CurTupleHashTable just in case someone manages to
+ * invoke this code re-entrantly.
+ */
+ hashtable->inputslot = slot;
+ hashtable->in_hash_funcs = hashfunctions;
+ hashtable->cur_eq_funcs = eqfunctions;
+
+ saveCurHT = CurTupleHashTable;
+ CurTupleHashTable = hashtable;
+
+ /* Search the hash table */
+ dummy.firstTuple = NULL; /* flag to reference inputslot */
+ entry = (TupleHashEntry) hash_search(hashtable->hashtab,
+ &dummy,
+ HASH_FIND,
+ NULL);
+
+ CurTupleHashTable = saveCurHT;
+
+ MemoryContextSwitchTo(oldContext);
+
+ return entry;
+}
+
+/*
* Compute the hash value for a tuple
*
* The passed-in key is a pointer to TupleHashEntryData. In an actual hash
@@ -418,6 +476,7 @@ TupleHashTableHash(const void *key, Size keysize)
TupleHashTable hashtable = CurTupleHashTable;
int numCols = hashtable->numCols;
AttrNumber *keyColIdx = hashtable->keyColIdx;
+ FmgrInfo *hashfunctions;
uint32 hashkey = 0;
int i;
@@ -425,6 +484,7 @@ TupleHashTableHash(const void *key, Size keysize)
{
/* Process the current input tuple for the table */
slot = hashtable->inputslot;
+ hashfunctions = hashtable->in_hash_funcs;
}
else
{
@@ -432,6 +492,7 @@ TupleHashTableHash(const void *key, Size keysize)
/* (this case never actually occurs in current dynahash.c code) */
slot = hashtable->tableslot;
ExecStoreMinimalTuple(tuple, slot, false);
+ hashfunctions = hashtable->tab_hash_funcs;
}
for (i = 0; i < numCols; i++)
@@ -449,7 +510,7 @@ TupleHashTableHash(const void *key, Size keysize)
{
uint32 hkey;
- hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
+ hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i],
attr));
hashkey ^= hkey;
}
@@ -493,11 +554,12 @@ TupleHashTableMatch(const void *key1, const void *key2, Size keysize)
Assert(tuple2 == NULL);
slot2 = hashtable->inputslot;
- if (execTuplesMatch(slot1,
- slot2,
+ /* For crosstype comparisons, the inputslot must be first */
+ if (execTuplesMatch(slot2,
+ slot1,
hashtable->numCols,
hashtable->keyColIdx,
- hashtable->eqfunctions,
+ hashtable->cur_eq_funcs,
hashtable->tempcxt))
return 0;
else
diff --git a/src/backend/executor/nodeSubplan.c b/src/backend/executor/nodeSubplan.c
index 0e840802eb0..32167a94efb 100644
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.84 2007/02/02 00:07:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/nodeSubplan.c,v 1.85 2007/02/06 02:59:11 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -139,7 +139,10 @@ ExecHashSubPlan(SubPlanState *node,
if (slotNoNulls(slot))
{
if (node->havehashrows &&
- LookupTupleHashEntry(node->hashtable, slot, NULL) != NULL)
+ FindTupleHashEntry(node->hashtable,
+ slot,
+ node->cur_eq_funcs,
+ node->lhs_hash_funcs) != NULL)
{
ExecClearTuple(slot);
return BoolGetDatum(true);
@@ -453,8 +456,8 @@ buildSubPlanHash(SubPlanState *node)
node->hashtable = BuildTupleHashTable(ncols,
node->keyColIdx,
- node->eqfunctions,
- node->hashfunctions,
+ node->tab_eq_funcs,
+ node->tab_hash_funcs,
nbuckets,
sizeof(TupleHashEntryData),
node->tablecxt,
@@ -472,8 +475,8 @@ buildSubPlanHash(SubPlanState *node)
}
node->hashnulls = BuildTupleHashTable(ncols,
node->keyColIdx,
- node->eqfunctions,
- node->hashfunctions,
+ node->tab_eq_funcs,
+ node->tab_hash_funcs,
nbuckets,
sizeof(TupleHashEntryData),
node->tablecxt,
@@ -573,9 +576,9 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot)
while ((entry = ScanTupleHashTable(&hashiter)) != NULL)
{
ExecStoreMinimalTuple(entry->firstTuple, hashtable->tableslot, false);
- if (!execTuplesUnequal(hashtable->tableslot, slot,
+ if (!execTuplesUnequal(slot, hashtable->tableslot,
numCols, keyColIdx,
- hashtable->eqfunctions,
+ hashtable->cur_eq_funcs,
hashtable->tempcxt))
return true;
}
@@ -653,8 +656,10 @@ ExecInitSubPlan(SubPlanState *node, EState *estate, int eflags)
node->tablecxt = NULL;
node->innerecontext = NULL;
node->keyColIdx = NULL;
- node->eqfunctions = NULL;
- node->hashfunctions = NULL;
+ node->tab_hash_funcs = NULL;
+ node->tab_eq_funcs = NULL;
+ node->lhs_hash_funcs = NULL;
+ node->cur_eq_funcs = NULL;
/*
* create an EState for the subplan
@@ -781,8 +786,10 @@ ExecInitSubPlan(SubPlanState *node, EState *estate, int eflags)
lefttlist = righttlist = NIL;
leftptlist = rightptlist = NIL;
- node->eqfunctions = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
- node->hashfunctions = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+ node->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+ node->tab_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+ node->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+ node->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
i = 1;
foreach(l, oplist)
{
@@ -792,6 +799,7 @@ ExecInitSubPlan(SubPlanState *node, EState *estate, int eflags)
Expr *expr;
TargetEntry *tle;
GenericExprState *tlestate;
+ Oid rhs_eq_oper;
Oid left_hashfn;
Oid right_hashfn;
@@ -827,18 +835,24 @@ ExecInitSubPlan(SubPlanState *node, EState *estate, int eflags)
righttlist = lappend(righttlist, tlestate);
rightptlist = lappend(rightptlist, tle);
- /* Lookup the combining function */
- fmgr_info(opexpr->opfuncid, &node->eqfunctions[i - 1]);
- node->eqfunctions[i - 1].fn_expr = (Node *) opexpr;
+ /* Lookup the equality function (potentially cross-type) */
+ fmgr_info(opexpr->opfuncid, &node->cur_eq_funcs[i - 1]);
+ node->cur_eq_funcs[i - 1].fn_expr = (Node *) opexpr;
+
+ /* Look up the equality function for the RHS type */
+ if (!get_compatible_hash_operators(opexpr->opno,
+ NULL, &rhs_eq_oper))
+ elog(ERROR, "could not find compatible hash operator for operator %u",
+ opexpr->opno);
+ fmgr_info(get_opcode(rhs_eq_oper), &node->tab_eq_funcs[i - 1]);
/* Lookup the associated hash functions */
if (!get_op_hash_functions(opexpr->opno,
&left_hashfn, &right_hashfn))
elog(ERROR, "could not find hash function for hash operator %u",
opexpr->opno);
- /* For the moment, not supporting cross-type cases */
- Assert(left_hashfn == right_hashfn);
- fmgr_info(right_hashfn, &node->hashfunctions[i - 1]);
+ fmgr_info(left_hashfn, &node->lhs_hash_funcs[i - 1]);
+ fmgr_info(right_hashfn, &node->tab_hash_funcs[i - 1]);
i++;
}
diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c
index 7339445e046..e79991a0f60 100644
--- a/src/backend/optimizer/plan/subselect.c
+++ b/src/backend/optimizer/plan/subselect.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.117 2007/01/10 18:06:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.118 2007/02/06 02:59:11 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -598,17 +598,13 @@ subplan_is_hashable(SubLink *slink, SubPlan *node)
return false;
/*
- * The combining operators must be hashable, strict, and self-commutative.
+ * The combining operators must be hashable and strict.
* The need for hashability is obvious, since we want to use hashing.
* Without strictness, behavior in the presence of nulls is too
- * unpredictable. (We actually must assume even more than plain
- * strictness, see nodeSubplan.c for details.) And commutativity ensures
- * that the left and right datatypes are the same; this allows us to
- * assume that the combining operators are equality for the righthand
- * datatype, so that they can be used to compare righthand tuples as well
- * as comparing lefthand to righthand tuples. (This last restriction
- * could be relaxed by using two different sets of operators with the hash
- * table, but there is no obvious usefulness to that at present.)
+ * unpredictable. We actually must assume even more than plain
+ * strictness: they can't yield NULL for non-null inputs, either
+ * (see nodeSubplan.c). However, hash indexes and hash joins assume
+ * that too.
*/
if (IsA(slink->testexpr, OpExpr))
{
@@ -644,8 +640,7 @@ hash_ok_operator(OpExpr *expr)
if (!HeapTupleIsValid(tup))
elog(ERROR, "cache lookup failed for operator %u", opid);
optup = (Form_pg_operator) GETSTRUCT(tup);
- if (!optup->oprcanhash || optup->oprcom != opid ||
- !func_strict(optup->oprcode))
+ if (!optup->oprcanhash || !func_strict(optup->oprcode))
{
ReleaseSysCache(tup);
return false;
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 5832d145ef0..81f7c99e963 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.137 2007/01/20 20:45:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.138 2007/02/06 02:59:12 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1070,7 +1070,7 @@ distinct_col_search(int colno, List *colnos, List *opids)
* We assume hashed aggregation will work if each IN operator is marked
* hashjoinable. If the IN operators are cross-type, this could conceivably
* fail: the aggregation will need a hashable equality operator for the RHS
- * datatype --- but it's pretty hard to conceive of a hash opclass that has
+ * datatype --- but it's pretty hard to conceive of a hash opfamily that has
* cross-type hashing without support for hashing the individual types, so
* we don't expend cycles here to support the case. We could check
* get_compatible_hash_operator() instead of just op_hashjoinable(), but the