diff options
author | David Rowley <drowley@postgresql.org> | 2024-08-20 13:38:22 +1200 |
---|---|---|
committer | David Rowley <drowley@postgresql.org> | 2024-08-20 13:38:22 +1200 |
commit | adf97c1562380e02acd60dc859c289ed3a8352ee (patch) | |
tree | bbc199a61078c00d997903c4d5ce0c2fdccc7224 /src/backend/executor | |
parent | 9380e5f129d2a160ecc2444f61bb7cb97fd51fbb (diff) | |
download | postgresql-adf97c1562380e02acd60dc859c289ed3a8352ee.tar.gz postgresql-adf97c1562380e02acd60dc859c289ed3a8352ee.zip |
Speed up Hash Join by making ExprStates support hashing
Here we add ExprState support for obtaining a 32-bit hash value from a
list of expressions. This allows both faster hashing and also JIT
compilation of these expressions. This is especially useful when hash
joins have multiple join keys as the previous code called ExecEvalExpr on
each hash join key individually and that was inefficient as tuple
deformation would have only taken into account one key at a time, which
could lead to walking the tuple once for each join key. With the new
code, we'll determine the maximum attribute required and deform the tuple
to that point only once.
Some performance tests done with this change have shown up to a 20%
performance increase of a query containing a Hash Join without JIT
compilation and up to a 26% performance increase when JIT is enabled and
optimization and inlining were performed by the JIT compiler. The
performance increase with 1 join column was less with a 14% increase
with and without JIT. This test was done using a fairly small hash
table and a large number of hash probes. The increase will likely be
less with large tables, especially ones larger than L3 cache as memory
pressure is more likely to be the limiting factor there.
This commit only addresses Hash Joins, but lays expression evaluation
and JIT compilation infrastructure for other hashing needs such as Hash
Aggregate.
Author: David Rowley
Reviewed-by: Alexey Dvoichenkov <alexey@hyperplane.net>
Reviewed-by: Tels <nospam-pg-abuse@bloodgate.com>
Discussion: https://postgr.es/m/CAApHDvoexAxgQFNQD_GRkr2O_eJUD1-wUGm%3Dm0L%2BGc%3DT%3DkEa4g%40mail.gmail.com
Diffstat (limited to 'src/backend/executor')
-rw-r--r-- | src/backend/executor/execExpr.c | 141 | ||||
-rw-r--r-- | src/backend/executor/execExprInterp.c | 110 | ||||
-rw-r--r-- | src/backend/executor/nodeHash.c | 190 | ||||
-rw-r--r-- | src/backend/executor/nodeHashjoin.c | 143 |
4 files changed, 408 insertions, 176 deletions
diff --git a/src/backend/executor/execExpr.c b/src/backend/executor/execExpr.c index 66dda8e5e69..63289ee35ee 100644 --- a/src/backend/executor/execExpr.c +++ b/src/backend/executor/execExpr.c @@ -3970,6 +3970,147 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate, } /* + * Build an ExprState that calls the given hash function(s) on the given + * 'hash_exprs'. When multiple expressions are present, the hash values + * returned by each hash function are combined to produce a single hash value. + * + * desc: tuple descriptor for the to-be-hashed expressions + * ops: TupleTableSlotOps for the TupleDesc + * hashfunc_oids: Oid for each hash function to call, one for each 'hash_expr' + * collations: collation to use when calling the hash function. + * hash_expr: list of expressions to hash the value of + * opstrict: array corresponding to the 'hashfunc_oids' to store op_strict() + * parent: PlanState node that the 'hash_exprs' will be evaluated at + * init_value: Normally 0, but can be set to other values to seed the hash + * with some other value. Using non-zero is slightly less efficient but can + * be useful. + * keep_nulls: if true, evaluation of the returned ExprState will abort early + * returning NULL if the given hash function is strict and the Datum to hash + * is null. When set to false, any NULL input Datums are skipped. + */ +ExprState * +ExecBuildHash32Expr(TupleDesc desc, const TupleTableSlotOps *ops, + const Oid *hashfunc_oids, const List *collations, + const List *hash_exprs, const bool *opstrict, + PlanState *parent, uint32 init_value, bool keep_nulls) +{ + ExprState *state = makeNode(ExprState); + ExprEvalStep scratch = {0}; + List *adjust_jumps = NIL; + ListCell *lc; + ListCell *lc2; + intptr_t strict_opcode; + intptr_t opcode; + + Assert(list_length(hash_exprs) == list_length(collations)); + + state->parent = parent; + + /* Insert setup steps as needed. */ + ExecCreateExprSetupSteps(state, (Node *) hash_exprs); + + if (init_value == 0) + { + /* + * No initial value, so we can assign the result of the hash function + * for the first hash_expr without having to concern ourselves with + * combining the result with any initial value. + */ + strict_opcode = EEOP_HASHDATUM_FIRST_STRICT; + opcode = EEOP_HASHDATUM_FIRST; + } + else + { + /* Set up operation to set the initial value. */ + scratch.opcode = EEOP_HASHDATUM_SET_INITVAL; + scratch.d.hashdatum_initvalue.init_value = UInt32GetDatum(init_value); + scratch.resvalue = &state->resvalue; + scratch.resnull = &state->resnull; + + ExprEvalPushStep(state, &scratch); + + /* + * When using an initial value use the NEXT32/NEXT32_STRICT ops as the + * FIRST/FIRST_STRICT ops would overwrite the stored initial value. + */ + strict_opcode = EEOP_HASHDATUM_NEXT32_STRICT; + opcode = EEOP_HASHDATUM_NEXT32; + } + + forboth(lc, hash_exprs, lc2, collations) + { + Expr *expr = (Expr *) lfirst(lc); + FmgrInfo *finfo; + FunctionCallInfo fcinfo; + int i = foreach_current_index(lc); + Oid funcid; + Oid inputcollid = lfirst_oid(lc2); + + funcid = hashfunc_oids[i]; + + /* Allocate hash function lookup data. */ + finfo = palloc0(sizeof(FmgrInfo)); + fcinfo = palloc0(SizeForFunctionCallInfo(1)); + + fmgr_info(funcid, finfo); + + /* + * Build the steps to evaluate the hash function's argument have it so + * the value of that is stored in the 0th argument of the hash func. + */ + ExecInitExprRec(expr, + state, + &fcinfo->args[0].value, + &fcinfo->args[0].isnull); + + scratch.resvalue = &state->resvalue; + scratch.resnull = &state->resnull; + + /* Initialize function call parameter structure too */ + InitFunctionCallInfoData(*fcinfo, finfo, 1, inputcollid, NULL, NULL); + + scratch.d.hashdatum.finfo = finfo; + scratch.d.hashdatum.fcinfo_data = fcinfo; + scratch.d.hashdatum.fn_addr = finfo->fn_addr; + + scratch.opcode = opstrict[i] && !keep_nulls ? strict_opcode : opcode; + scratch.d.hashdatum.jumpdone = -1; + + ExprEvalPushStep(state, &scratch); + adjust_jumps = lappend_int(adjust_jumps, state->steps_len - 1); + + /* + * For subsequent keys we must combine the hash value with the + * previous hashes. + */ + strict_opcode = EEOP_HASHDATUM_NEXT32_STRICT; + opcode = EEOP_HASHDATUM_NEXT32; + } + + /* adjust jump targets */ + foreach(lc, adjust_jumps) + { + ExprEvalStep *as = &state->steps[lfirst_int(lc)]; + + Assert(as->opcode == EEOP_HASHDATUM_FIRST || + as->opcode == EEOP_HASHDATUM_FIRST_STRICT || + as->opcode == EEOP_HASHDATUM_NEXT32 || + as->opcode == EEOP_HASHDATUM_NEXT32_STRICT); + Assert(as->d.hashdatum.jumpdone == -1); + as->d.hashdatum.jumpdone = state->steps_len; + } + + scratch.resvalue = NULL; + scratch.resnull = NULL; + scratch.opcode = EEOP_DONE; + ExprEvalPushStep(state, &scratch); + + ExecReadyExpr(state); + + return state; +} + +/* * Build equality expression that can be evaluated using ExecQual(), returning * true if the expression context's inner/outer tuple are NOT DISTINCT. I.e * two nulls match, a null and a not-null don't match. diff --git a/src/backend/executor/execExprInterp.c b/src/backend/executor/execExprInterp.c index ea47c4d6f9c..77394e76c37 100644 --- a/src/backend/executor/execExprInterp.c +++ b/src/backend/executor/execExprInterp.c @@ -477,6 +477,11 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) &&CASE_EEOP_DOMAIN_TESTVAL, &&CASE_EEOP_DOMAIN_NOTNULL, &&CASE_EEOP_DOMAIN_CHECK, + &&CASE_EEOP_HASHDATUM_SET_INITVAL, + &&CASE_EEOP_HASHDATUM_FIRST, + &&CASE_EEOP_HASHDATUM_FIRST_STRICT, + &&CASE_EEOP_HASHDATUM_NEXT32, + &&CASE_EEOP_HASHDATUM_NEXT32_STRICT, &&CASE_EEOP_CONVERT_ROWTYPE, &&CASE_EEOP_SCALARARRAYOP, &&CASE_EEOP_HASHED_SCALARARRAYOP, @@ -1543,6 +1548,111 @@ ExecInterpExpr(ExprState *state, ExprContext *econtext, bool *isnull) EEO_NEXT(); } + EEO_CASE(EEOP_HASHDATUM_SET_INITVAL) + { + *op->resvalue = op->d.hashdatum_initvalue.init_value; + *op->resnull = false; + + EEO_NEXT(); + } + + EEO_CASE(EEOP_HASHDATUM_FIRST) + { + FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data; + + /* + * Save the Datum on non-null inputs, otherwise store 0 so that + * subsequent NEXT32 operations combine with an initialized value. + */ + if (!fcinfo->args[0].isnull) + *op->resvalue = op->d.hashdatum.fn_addr(fcinfo); + else + *op->resvalue = (Datum) 0; + + *op->resnull = false; + + EEO_NEXT(); + } + + EEO_CASE(EEOP_HASHDATUM_FIRST_STRICT) + { + FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data; + + if (fcinfo->args[0].isnull) + { + /* + * With strict we have the expression return NULL instead of + * ignoring NULL input values. We've nothing more to do after + * finding a NULL. + */ + *op->resnull = true; + *op->resvalue = (Datum) 0; + EEO_JUMP(op->d.hashdatum.jumpdone); + } + + /* execute the hash function and save the resulting value */ + *op->resvalue = op->d.hashdatum.fn_addr(fcinfo); + *op->resnull = false; + + EEO_NEXT(); + } + + EEO_CASE(EEOP_HASHDATUM_NEXT32) + { + FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data; + uint32 existing_hash = DatumGetUInt32(*op->resvalue); + + /* combine successive hash values by rotating */ + existing_hash = pg_rotate_left32(existing_hash, 1); + + /* leave the hash value alone on NULL inputs */ + if (!fcinfo->args[0].isnull) + { + uint32 hashvalue; + + /* execute hash func and combine with previous hash value */ + hashvalue = DatumGetUInt32(op->d.hashdatum.fn_addr(fcinfo)); + existing_hash = existing_hash ^ hashvalue; + } + + *op->resvalue = UInt32GetDatum(existing_hash); + *op->resnull = false; + + EEO_NEXT(); + } + + EEO_CASE(EEOP_HASHDATUM_NEXT32_STRICT) + { + FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data; + + if (fcinfo->args[0].isnull) + { + /* + * With strict we have the expression return NULL instead of + * ignoring NULL input values. We've nothing more to do after + * finding a NULL. + */ + *op->resnull = true; + *op->resvalue = (Datum) 0; + EEO_JUMP(op->d.hashdatum.jumpdone); + } + else + { + uint32 existing_hash = DatumGetUInt32(*op->resvalue); + uint32 hashvalue; + + /* combine successive hash values by rotating */ + existing_hash = pg_rotate_left32(existing_hash, 1); + + /* execute hash func and combine with previous hash value */ + hashvalue = DatumGetUInt32(op->d.hashdatum.fn_addr(fcinfo)); + *op->resvalue = UInt32GetDatum(existing_hash ^ hashvalue); + *op->resnull = false; + } + + EEO_NEXT(); + } + EEO_CASE(EEOP_XMLEXPR) { /* too complex for an inline implementation */ diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 61480733a12..570a90ebe15 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -47,7 +47,8 @@ static void ExecHashIncreaseNumBatches(HashJoinTable hashtable); static void ExecHashIncreaseNumBuckets(HashJoinTable hashtable); static void ExecParallelHashIncreaseNumBatches(HashJoinTable hashtable); static void ExecParallelHashIncreaseNumBuckets(HashJoinTable hashtable); -static void ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, +static void ExecHashBuildSkewHash(HashState *hashstate, + HashJoinTable hashtable, Hash *node, int mcvsToUse); static void ExecHashSkewTableInsert(HashJoinTable hashtable, TupleTableSlot *slot, @@ -138,11 +139,9 @@ static void MultiExecPrivateHash(HashState *node) { PlanState *outerNode; - List *hashkeys; HashJoinTable hashtable; TupleTableSlot *slot; ExprContext *econtext; - uint32 hashvalue; /* * get state info from node @@ -153,7 +152,6 @@ MultiExecPrivateHash(HashState *node) /* * set expression context */ - hashkeys = node->hashkeys; econtext = node->ps.ps_ExprContext; /* @@ -162,15 +160,23 @@ MultiExecPrivateHash(HashState *node) */ for (;;) { + bool isnull; + Datum hashdatum; + slot = ExecProcNode(outerNode); if (TupIsNull(slot)) break; /* We have to compute the hash value */ econtext->ecxt_outertuple = slot; - if (ExecHashGetHashValue(hashtable, econtext, hashkeys, - false, hashtable->keepNulls, - &hashvalue)) + + ResetExprContext(econtext); + + hashdatum = ExecEvalExprSwitchContext(node->hash_expr, econtext, + &isnull); + + if (!isnull) { + uint32 hashvalue = DatumGetUInt32(hashdatum); int bucketNumber; bucketNumber = ExecHashGetSkewBucket(hashtable, hashvalue); @@ -215,7 +221,6 @@ MultiExecParallelHash(HashState *node) { ParallelHashJoinState *pstate; PlanState *outerNode; - List *hashkeys; HashJoinTable hashtable; TupleTableSlot *slot; ExprContext *econtext; @@ -232,7 +237,6 @@ MultiExecParallelHash(HashState *node) /* * set expression context */ - hashkeys = node->hashkeys; econtext = node->ps.ps_ExprContext; /* @@ -279,13 +283,20 @@ MultiExecParallelHash(HashState *node) ExecParallelHashTableSetCurrentBatch(hashtable, 0); for (;;) { + bool isnull; + slot = ExecProcNode(outerNode); if (TupIsNull(slot)) break; econtext->ecxt_outertuple = slot; - if (ExecHashGetHashValue(hashtable, econtext, hashkeys, - false, hashtable->keepNulls, - &hashvalue)) + + ResetExprContext(econtext); + + hashvalue = DatumGetUInt32(ExecEvalExprSwitchContext(node->hash_expr, + econtext, + &isnull)); + + if (!isnull) ExecParallelHashTableInsert(hashtable, slot, hashvalue); hashtable->partialTuples++; } @@ -371,8 +382,8 @@ ExecInitHash(Hash *node, EState *estate, int eflags) hashstate->ps.plan = (Plan *) node; hashstate->ps.state = estate; hashstate->ps.ExecProcNode = ExecHash; + /* delay building hashtable until ExecHashTableCreate() in executor run */ hashstate->hashtable = NULL; - hashstate->hashkeys = NIL; /* will be set by parent HashJoin */ /* * Miscellaneous initialization @@ -393,12 +404,16 @@ ExecInitHash(Hash *node, EState *estate, int eflags) ExecInitResultTupleSlotTL(&hashstate->ps, &TTSOpsMinimalTuple); hashstate->ps.ps_ProjInfo = NULL; + Assert(node->plan.qual == NIL); + /* - * initialize child expressions + * Delay initialization of hash_expr until ExecInitHashJoin(). We cannot + * build the ExprState here as we don't yet know the join type we're going + * to be hashing values for and we need to know that before calling + * ExecBuildHash32Expr as the keep_nulls parameter depends on the join + * type. */ - Assert(node->plan.qual == NIL); - hashstate->hashkeys = - ExecInitExprList(node->hashkeys, (PlanState *) hashstate); + hashstate->hash_expr = NULL; return hashstate; } @@ -429,7 +444,7 @@ ExecEndHash(HashState *node) * ---------------------------------------------------------------- */ HashJoinTable -ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, bool keepNulls) +ExecHashTableCreate(HashState *state) { Hash *node; HashJoinTable hashtable; @@ -440,10 +455,6 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, double rows; int num_skew_mcvs; int log2_nbuckets; - int nkeys; - int i; - ListCell *ho; - ListCell *hc; MemoryContext oldcxt; /* @@ -487,7 +498,6 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, hashtable->log2_nbuckets = log2_nbuckets; hashtable->log2_nbuckets_optimal = log2_nbuckets; hashtable->buckets.unshared = NULL; - hashtable->keepNulls = keepNulls; hashtable->skewEnabled = false; hashtable->skewBucket = NULL; hashtable->skewBucketLen = 0; @@ -540,32 +550,6 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, oldcxt = MemoryContextSwitchTo(hashtable->hashCxt); - /* - * Get info about the hash functions to be used for each hash key. Also - * remember whether the join operators are strict. - */ - nkeys = list_length(hashOperators); - hashtable->outer_hashfunctions = palloc_array(FmgrInfo, nkeys); - hashtable->inner_hashfunctions = palloc_array(FmgrInfo, nkeys); - hashtable->hashStrict = palloc_array(bool, nkeys); - hashtable->collations = palloc_array(Oid, nkeys); - i = 0; - forboth(ho, hashOperators, hc, hashCollations) - { - Oid hashop = lfirst_oid(ho); - Oid left_hashfn; - Oid right_hashfn; - - if (!get_op_hash_functions(hashop, &left_hashfn, &right_hashfn)) - elog(ERROR, "could not find hash function for hash operator %u", - hashop); - fmgr_info(left_hashfn, &hashtable->outer_hashfunctions[i]); - fmgr_info(right_hashfn, &hashtable->inner_hashfunctions[i]); - hashtable->hashStrict[i] = op_strict(hashop); - hashtable->collations[i] = lfirst_oid(hc); - i++; - } - if (nbatch > 1 && hashtable->parallel_state == NULL) { MemoryContext oldctx; @@ -652,7 +636,7 @@ ExecHashTableCreate(HashState *state, List *hashOperators, List *hashCollations, * it.) */ if (nbatch > 1) - ExecHashBuildSkewHash(hashtable, node, num_skew_mcvs); + ExecHashBuildSkewHash(state, hashtable, node, num_skew_mcvs); MemoryContextSwitchTo(oldcxt); } @@ -1803,103 +1787,6 @@ ExecParallelHashTableInsertCurrentBatch(HashJoinTable hashtable, heap_free_minimal_tuple(tuple); } -/* - * ExecHashGetHashValue - * Compute the hash value for a tuple - * - * The tuple to be tested must be in econtext->ecxt_outertuple (thus Vars in - * the hashkeys expressions need to have OUTER_VAR as varno). If outer_tuple - * is false (meaning it's the HashJoin's inner node, Hash), econtext, - * hashkeys, and slot need to be from Hash, with hashkeys/slot referencing and - * being suitable for tuples from the node below the Hash. Conversely, if - * outer_tuple is true, econtext is from HashJoin, and hashkeys/slot need to - * be appropriate for tuples from HashJoin's outer node. - * - * A true result means the tuple's hash value has been successfully computed - * and stored at *hashvalue. A false result means the tuple cannot match - * because it contains a null attribute, and hence it should be discarded - * immediately. (If keep_nulls is true then false is never returned.) - */ -bool -ExecHashGetHashValue(HashJoinTable hashtable, - ExprContext *econtext, - List *hashkeys, - bool outer_tuple, - bool keep_nulls, - uint32 *hashvalue) -{ - uint32 hashkey = 0; - FmgrInfo *hashfunctions; - ListCell *hk; - int i = 0; - MemoryContext oldContext; - - /* - * We reset the eval context each time to reclaim any memory leaked in the - * hashkey expressions. - */ - ResetExprContext(econtext); - - oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); - - if (outer_tuple) - hashfunctions = hashtable->outer_hashfunctions; - else - hashfunctions = hashtable->inner_hashfunctions; - - foreach(hk, hashkeys) - { - ExprState *keyexpr = (ExprState *) lfirst(hk); - Datum keyval; - bool isNull; - - /* combine successive hashkeys by rotating */ - hashkey = pg_rotate_left32(hashkey, 1); - - /* - * Get the join attribute value of the tuple - */ - keyval = ExecEvalExpr(keyexpr, econtext, &isNull); - - /* - * If the attribute is NULL, and the join operator is strict, then - * this tuple cannot pass the join qual so we can reject it - * immediately (unless we're scanning the outside of an outer join, in - * which case we must not reject it). Otherwise we act like the - * hashcode of NULL is zero (this will support operators that act like - * IS NOT DISTINCT, though not any more-random behavior). We treat - * the hash support function as strict even if the operator is not. - * - * Note: currently, all hashjoinable operators must be strict since - * the hash index AM assumes that. However, it takes so little extra - * code here to allow non-strict that we may as well do it. - */ - if (isNull) - { - if (hashtable->hashStrict[i] && !keep_nulls) - { - MemoryContextSwitchTo(oldContext); - return false; /* cannot match */ - } - /* else, leave hashkey unmodified, equivalent to hashcode 0 */ - } - else - { - /* Compute the hash function */ - uint32 hkey; - - hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], hashtable->collations[i], keyval)); - hashkey ^= hkey; - } - - i++; - } - - MemoryContextSwitchTo(oldContext); - - *hashvalue = hashkey; - return true; -} /* * ExecHashGetBucketAndBatch @@ -2372,7 +2259,8 @@ ExecReScanHash(HashState *node) * based on available memory. */ static void -ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse) +ExecHashBuildSkewHash(HashState *hashstate, HashJoinTable hashtable, + Hash *node, int mcvsToUse) { HeapTupleData *statsTuple; AttStatsSlot sslot; @@ -2400,7 +2288,6 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse) { double frac; int nbuckets; - FmgrInfo *hashfunctions; int i; if (mcvsToUse > sslot.nvalues) @@ -2468,15 +2355,14 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse) * ExecHashRemoveNextSkewBucket) and we want the least common MCVs to * be removed first. */ - hashfunctions = hashtable->outer_hashfunctions; for (i = 0; i < mcvsToUse; i++) { uint32 hashvalue; int bucket; - hashvalue = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[0], - hashtable->collations[0], + hashvalue = DatumGetUInt32(FunctionCall1Coll(hashstate->skew_hashfunction, + hashstate->skew_collation, sslot.values[i])); /* diff --git a/src/backend/executor/nodeHashjoin.c b/src/backend/executor/nodeHashjoin.c index 5429e687342..2f7170604d6 100644 --- a/src/backend/executor/nodeHashjoin.c +++ b/src/backend/executor/nodeHashjoin.c @@ -169,6 +169,7 @@ #include "executor/nodeHash.h" #include "executor/nodeHashjoin.h" #include "miscadmin.h" +#include "utils/lsyscache.h" #include "utils/sharedtuplestore.h" #include "utils/wait_event.h" @@ -331,10 +332,7 @@ ExecHashJoinImpl(PlanState *pstate, bool parallel) * whoever gets here first will create the hash table and any * later arrivals will merely attach to it. */ - hashtable = ExecHashTableCreate(hashNode, - node->hj_HashOperators, - node->hj_Collations, - HJ_FILL_INNER(node)); + hashtable = ExecHashTableCreate(hashNode); node->hj_HashTable = hashtable; /* @@ -820,9 +818,96 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) */ { HashState *hashstate = (HashState *) innerPlanState(hjstate); + Hash *hash = (Hash *) hashstate->ps.plan; TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot; + Oid *outer_hashfuncid; + Oid *inner_hashfuncid; + bool *hash_strict; + ListCell *lc; + int nkeys; + hjstate->hj_HashTupleSlot = slot; + + /* + * Build ExprStates to obtain hash values for either side of the join. + * This must be done here as ExecBuildHash32Expr needs to know how to + * handle NULL inputs and the required handling of that depends on the + * jointype. We don't know the join type in ExecInitHash() and we + * must build the ExprStates before ExecHashTableCreate() so we + * properly attribute any SubPlans that exist in the hash expressions + * to the correct PlanState. + */ + nkeys = list_length(node->hashoperators); + + outer_hashfuncid = palloc_array(Oid, nkeys); + inner_hashfuncid = palloc_array(Oid, nkeys); + hash_strict = palloc_array(bool, nkeys); + + /* + * Determine the hash function for each side of the join for the given + * hash operator. + */ + foreach(lc, node->hashoperators) + { + Oid hashop = lfirst_oid(lc); + int i = foreach_current_index(lc); + + if (!get_op_hash_functions(hashop, + &outer_hashfuncid[i], + &inner_hashfuncid[i])) + elog(ERROR, + "could not find hash function for hash operator %u", + hashop); + hash_strict[i] = op_strict(hashop); + } + + /* + * Build an ExprState to generate the hash value for the expressions + * on the outer of the join. This ExprState must finish generating + * the hash value when HJ_FILL_OUTER() is true. Otherwise, + * ExecBuildHash32Expr will set up the ExprState to abort early if it + * finds a NULL. In these cases, we don't need to store these tuples + * in the hash table as the jointype does not require it. + */ + hjstate->hj_OuterHash = + ExecBuildHash32Expr(hjstate->js.ps.ps_ResultTupleDesc, + hjstate->js.ps.resultops, + outer_hashfuncid, + node->hashcollations, + node->hashkeys, + hash_strict, + &hjstate->js.ps, + 0, + HJ_FILL_OUTER(hjstate)); + + /* As above, but for the inner side of the join */ + hashstate->hash_expr = + ExecBuildHash32Expr(hashstate->ps.ps_ResultTupleDesc, + hashstate->ps.resultops, + inner_hashfuncid, + node->hashcollations, + hash->hashkeys, + hash_strict, + &hashstate->ps, + 0, + HJ_FILL_INNER(hjstate)); + + /* + * Set up the skew table hash function while we have a record of the + * first key's hash function Oid. + */ + if (OidIsValid(hash->skewTable)) + { + hashstate->skew_hashfunction = palloc0(sizeof(FmgrInfo)); + hashstate->skew_collation = linitial_oid(node->hashcollations); + fmgr_info(outer_hashfuncid[0], hashstate->skew_hashfunction); + } + + /* no need to keep these */ + pfree(outer_hashfuncid); + pfree(inner_hashfuncid); + pfree(hash_strict); } /* @@ -846,11 +931,6 @@ ExecInitHashJoin(HashJoin *node, EState *estate, int eflags) hjstate->hj_CurSkewBucketNo = INVALID_SKEW_BUCKET_NO; hjstate->hj_CurTuple = NULL; - hjstate->hj_OuterHashKeys = ExecInitExprList(node->hashkeys, - (PlanState *) hjstate); - hjstate->hj_HashOperators = node->hashoperators; - hjstate->hj_Collations = node->hashcollations; - hjstate->hj_JoinState = HJ_BUILD_HASHTABLE; hjstate->hj_MatchedOuter = false; hjstate->hj_OuterNotEmpty = false; @@ -918,17 +998,22 @@ ExecHashJoinOuterGetTuple(PlanState *outerNode, while (!TupIsNull(slot)) { + bool isnull; + /* * We have to compute the tuple's hash value. */ ExprContext *econtext = hjstate->js.ps.ps_ExprContext; econtext->ecxt_outertuple = slot; - if (ExecHashGetHashValue(hashtable, econtext, - hjstate->hj_OuterHashKeys, - true, /* outer tuple */ - HJ_FILL_OUTER(hjstate), - hashvalue)) + + ResetExprContext(econtext); + + *hashvalue = DatumGetUInt32(ExecEvalExprSwitchContext(hjstate->hj_OuterHash, + econtext, + &isnull)); + + if (!isnull) { /* remember outer relation is not empty for possible rescan */ hjstate->hj_OuterNotEmpty = true; @@ -989,14 +1074,19 @@ ExecParallelHashJoinOuterGetTuple(PlanState *outerNode, while (!TupIsNull(slot)) { + bool isnull; + ExprContext *econtext = hjstate->js.ps.ps_ExprContext; econtext->ecxt_outertuple = slot; - if (ExecHashGetHashValue(hashtable, econtext, - hjstate->hj_OuterHashKeys, - true, /* outer tuple */ - HJ_FILL_OUTER(hjstate), - hashvalue)) + + ResetExprContext(econtext); + + *hashvalue = DatumGetUInt32(ExecEvalExprSwitchContext(hjstate->hj_OuterHash, + econtext, + &isnull)); + + if (!isnull) return slot; /* @@ -1518,15 +1608,20 @@ ExecParallelHashJoinPartitionOuter(HashJoinState *hjstate) /* Execute outer plan, writing all tuples to shared tuplestores. */ for (;;) { + bool isnull; + slot = ExecProcNode(outerState); if (TupIsNull(slot)) break; econtext->ecxt_outertuple = slot; - if (ExecHashGetHashValue(hashtable, econtext, - hjstate->hj_OuterHashKeys, - true, /* outer tuple */ - HJ_FILL_OUTER(hjstate), - &hashvalue)) + + ResetExprContext(econtext); + + hashvalue = DatumGetUInt32(ExecEvalExprSwitchContext(hjstate->hj_OuterHash, + econtext, + &isnull)); + + if (!isnull) { int batchno; int bucketno; |