aboutsummaryrefslogtreecommitdiff
path: root/src/backend/executor
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2007-01-28 23:21:26 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2007-01-28 23:21:26 +0000
commitb39e91501c101d67b92f3e6965da5dc111195f52 (patch)
tree1820d8ba0289c5e32a025b9f62bdd98ca494a972 /src/backend/executor
parent28c480e9ae64fc239fa1ebe32a981312e65ae1e7 (diff)
downloadpostgresql-b39e91501c101d67b92f3e6965da5dc111195f52.tar.gz
postgresql-b39e91501c101d67b92f3e6965da5dc111195f52.zip
Improve hash join to discard input tuples immediately if they can't
match because they contain a null join key (and the join operator is known strict). Improves performance significantly when the inner relation contains a lot of nulls, as per bug #2930.
Diffstat (limited to 'src/backend/executor')
-rw-r--r--src/backend/executor/nodeHash.c54
-rw-r--r--src/backend/executor/nodeHashjoin.c29
2 files changed, 62 insertions, 21 deletions
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index de64e28293d..dffe8cb0d30 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.108 2007/01/05 22:19:28 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.109 2007/01/28 23:21:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -92,11 +92,14 @@ MultiExecHash(HashState *node)
slot = ExecProcNode(outerNode);
if (TupIsNull(slot))
break;
- hashtable->totalTuples += 1;
/* We have to compute the hash value */
econtext->ecxt_innertuple = slot;
- hashvalue = ExecHashGetHashValue(hashtable, econtext, hashkeys);
- ExecHashTableInsert(hashtable, slot, hashvalue);
+ if (ExecHashGetHashValue(hashtable, econtext, hashkeys, false,
+ &hashvalue))
+ {
+ ExecHashTableInsert(hashtable, slot, hashvalue);
+ hashtable->totalTuples += 1;
+ }
}
/* must provide our own instrumentation support */
@@ -261,19 +264,23 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
/*
* Get info about the hash functions to be used for each hash key.
+ * Also remember whether the join operators are strict.
*/
nkeys = list_length(hashOperators);
hashtable->hashfunctions = (FmgrInfo *) palloc(nkeys * sizeof(FmgrInfo));
+ hashtable->hashStrict = (bool *) palloc(nkeys * sizeof(bool));
i = 0;
foreach(ho, hashOperators)
{
+ Oid hashop = lfirst_oid(ho);
Oid hashfn;
- hashfn = get_op_hash_function(lfirst_oid(ho));
+ hashfn = get_op_hash_function(hashop);
if (!OidIsValid(hashfn))
elog(ERROR, "could not find hash function for hash operator %u",
- lfirst_oid(ho));
+ hashop);
fmgr_info(hashfn, &hashtable->hashfunctions[i]);
+ hashtable->hashStrict[i] = op_strict(hashop);
i++;
}
@@ -657,11 +664,18 @@ ExecHashTableInsert(HashJoinTable hashtable,
* The tuple to be tested must be in either econtext->ecxt_outertuple or
* econtext->ecxt_innertuple. Vars in the hashkeys expressions reference
* either OUTER or INNER.
+ *
+ * A TRUE result means the tuple's hash value has been successfully computed
+ * and stored at *hashvalue. A FALSE result means the tuple cannot match
+ * because it contains a null attribute, and hence it should be discarded
+ * immediately. (If keep_nulls is true then FALSE is never returned.)
*/
-uint32
+bool
ExecHashGetHashValue(HashJoinTable hashtable,
ExprContext *econtext,
- List *hashkeys)
+ List *hashkeys,
+ bool keep_nulls,
+ uint32 *hashvalue)
{
uint32 hashkey = 0;
ListCell *hk;
@@ -691,10 +705,27 @@ ExecHashGetHashValue(HashJoinTable hashtable,
keyval = ExecEvalExpr(keyexpr, econtext, &isNull, NULL);
/*
- * Compute the hash function
+ * If the attribute is NULL, and the join operator is strict, then
+ * this tuple cannot pass the join qual so we can reject it
+ * immediately (unless we're scanning the outside of an outer join,
+ * in which case we must not reject it). Otherwise we act like the
+ * hashcode of NULL is zero (this will support operators that act like
+ * IS NOT DISTINCT, though not any more-random behavior). We treat
+ * the hash support function as strict even if the operator is not.
+ *
+ * Note: currently, all hashjoinable operators must be strict since
+ * the hash index AM assumes that. However, it takes so little
+ * extra code here to allow non-strict that we may as well do it.
*/
- if (!isNull) /* treat nulls as having hash key 0 */
+ if (isNull)
+ {
+ if (hashtable->hashStrict[i] && !keep_nulls)
+ return false; /* cannot match */
+ /* else, leave hashkey unmodified, equivalent to hashcode 0 */
+ }
+ else
{
+ /* Compute the hash function */
uint32 hkey;
hkey = DatumGetUInt32(FunctionCall1(&hashtable->hashfunctions[i],
@@ -707,7 +738,8 @@ ExecHashGetHashValue(HashJoinTable hashtable,
MemoryContextSwitchTo(oldContext);
- return hashkey;
+ *hashvalue = hashkey;
+ return true;
}
/*
diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c
index 7f0801c69bd..b03086fb364 100644
--- a/src/backend/executor/nodeHashjoin.c
+++ b/src/backend/executor/nodeHashjoin.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.86 2007/01/05 22:19:28 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.87 2007/01/28 23:21:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -547,9 +547,8 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
int curbatch = hashtable->curbatch;
TupleTableSlot *slot;
- if (curbatch == 0)
- { /* if it is the first pass */
-
+ if (curbatch == 0) /* if it is the first pass */
+ {
/*
* Check to see if first outer tuple was already fetched by
* ExecHashJoin() and not used yet.
@@ -559,7 +558,8 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
hjstate->hj_FirstOuterTupleSlot = NULL;
else
slot = ExecProcNode(outerNode);
- if (!TupIsNull(slot))
+
+ while (!TupIsNull(slot))
{
/*
* We have to compute the tuple's hash value.
@@ -567,13 +567,22 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode,
ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
econtext->ecxt_outertuple = slot;
- *hashvalue = ExecHashGetHashValue(hashtable, econtext,
- hjstate->hj_OuterHashKeys);
+ if (ExecHashGetHashValue(hashtable, econtext,
+ hjstate->hj_OuterHashKeys,
+ (hjstate->js.jointype == JOIN_LEFT),
+ hashvalue))
+ {
+ /* remember outer relation is not empty for possible rescan */
+ hjstate->hj_OuterNotEmpty = true;
- /* remember outer relation is not empty for possible rescan */
- hjstate->hj_OuterNotEmpty = true;
+ return slot;
+ }
- return slot;
+ /*
+ * That tuple couldn't match because of a NULL, so discard it
+ * and continue with the next one.
+ */
+ slot = ExecProcNode(outerNode);
}
/*