diff options
author | David Rowley <drowley@postgresql.org> | 2024-08-20 13:38:22 +1200 |
---|---|---|
committer | David Rowley <drowley@postgresql.org> | 2024-08-20 13:38:22 +1200 |
commit | adf97c1562380e02acd60dc859c289ed3a8352ee (patch) | |
tree | bbc199a61078c00d997903c4d5ce0c2fdccc7224 /src/backend/jit/llvm/llvmjit_expr.c | |
parent | 9380e5f129d2a160ecc2444f61bb7cb97fd51fbb (diff) | |
download | postgresql-adf97c1562380e02acd60dc859c289ed3a8352ee.tar.gz postgresql-adf97c1562380e02acd60dc859c289ed3a8352ee.zip |
Speed up Hash Join by making ExprStates support hashing
Here we add ExprState support for obtaining a 32-bit hash value from a
list of expressions. This allows both faster hashing and also JIT
compilation of these expressions. This is especially useful when hash
joins have multiple join keys as the previous code called ExecEvalExpr on
each hash join key individually and that was inefficient as tuple
deformation would have only taken into account one key at a time, which
could lead to walking the tuple once for each join key. With the new
code, we'll determine the maximum attribute required and deform the tuple
to that point only once.
Some performance tests done with this change have shown up to a 20%
performance increase of a query containing a Hash Join without JIT
compilation and up to a 26% performance increase when JIT is enabled and
optimization and inlining were performed by the JIT compiler. The
performance increase with 1 join column was less with a 14% increase
with and without JIT. This test was done using a fairly small hash
table and a large number of hash probes. The increase will likely be
less with large tables, especially ones larger than L3 cache as memory
pressure is more likely to be the limiting factor there.
This commit only addresses Hash Joins, but lays expression evaluation
and JIT compilation infrastructure for other hashing needs such as Hash
Aggregate.
Author: David Rowley
Reviewed-by: Alexey Dvoichenkov <alexey@hyperplane.net>
Reviewed-by: Tels <nospam-pg-abuse@bloodgate.com>
Discussion: https://postgr.es/m/CAApHDvoexAxgQFNQD_GRkr2O_eJUD1-wUGm%3Dm0L%2BGc%3DT%3DkEa4g%40mail.gmail.com
Diffstat (limited to 'src/backend/jit/llvm/llvmjit_expr.c')
-rw-r--r-- | src/backend/jit/llvm/llvmjit_expr.c | 204 |
1 files changed, 204 insertions, 0 deletions
diff --git a/src/backend/jit/llvm/llvmjit_expr.c b/src/backend/jit/llvm/llvmjit_expr.c index 27f94f90070..48ccdb942a2 100644 --- a/src/backend/jit/llvm/llvmjit_expr.c +++ b/src/backend/jit/llvm/llvmjit_expr.c @@ -1900,6 +1900,210 @@ llvm_compile_expr(ExprState *state) LLVMBuildBr(b, opblocks[opno + 1]); break; + case EEOP_HASHDATUM_SET_INITVAL: + { + LLVMValueRef v_initvalue; + + v_initvalue = l_sizet_const(op->d.hashdatum_initvalue.init_value); + + LLVMBuildStore(b, v_initvalue, v_resvaluep); + LLVMBuildStore(b, l_sbool_const(0), v_resnullp); + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + + case EEOP_HASHDATUM_FIRST: + case EEOP_HASHDATUM_FIRST_STRICT: + case EEOP_HASHDATUM_NEXT32: + case EEOP_HASHDATUM_NEXT32_STRICT: + { + FunctionCallInfo fcinfo = op->d.hashdatum.fcinfo_data; + LLVMValueRef v_fcinfo; + LLVMValueRef v_fcinfo_isnull; + LLVMValueRef v_retval; + LLVMBasicBlockRef b_checkargnull; + LLVMBasicBlockRef b_ifnotnull; + LLVMBasicBlockRef b_ifnullblock; + LLVMValueRef v_argisnull; + LLVMValueRef v_prevhash = NULL; + + /* + * When performing the next hash and not in strict mode we + * perform a rotation of the previously stored hash value + * before doing the NULL check. We want to do this even + * when we receive a NULL Datum to hash. In strict mode, + * we do this after the NULL check so as not to waste the + * effort of rotating the bits when we're going to throw + * away the hash value and return NULL. + */ + if (opcode == EEOP_HASHDATUM_NEXT32) + { + LLVMValueRef v_tmp1; + LLVMValueRef v_tmp2; + + /* + * Fetch the previously hashed value from where the + * EEOP_HASHDATUM_FIRST operation stored it. + */ + v_prevhash = l_load(b, TypeSizeT, v_resvaluep, + "prevhash"); + + /* + * Rotate bits left by 1 bit. Be careful not to + * overflow uint32 when working with size_t. + */ + v_tmp1 = LLVMBuildShl(b, v_prevhash, l_sizet_const(1), + ""); + v_tmp1 = LLVMBuildAnd(b, v_tmp1, + l_sizet_const(0xffffffff), ""); + v_tmp2 = LLVMBuildLShr(b, v_prevhash, + l_sizet_const(31), ""); + v_prevhash = LLVMBuildOr(b, v_tmp1, v_tmp2, + "rotatedhash"); + } + + /* + * Block for the actual function call, if args are + * non-NULL. + */ + b_ifnotnull = l_bb_before_v(opblocks[opno + 1], + "b.%d.ifnotnull", + opno); + + /* we expect the hash function to have 1 argument */ + if (fcinfo->nargs != 1) + elog(ERROR, "incorrect number of function arguments"); + + v_fcinfo = l_ptr_const(fcinfo, + l_ptr(StructFunctionCallInfoData)); + + b_checkargnull = l_bb_before_v(b_ifnotnull, + "b.%d.isnull.0", opno); + + LLVMBuildBr(b, b_checkargnull); + + /* + * Determine what to do if we find the argument to be + * NULL. + */ + if (opcode == EEOP_HASHDATUM_FIRST_STRICT || + opcode == EEOP_HASHDATUM_NEXT32_STRICT) + { + b_ifnullblock = l_bb_before_v(b_ifnotnull, + "b.%d.strictnull", + opno); + + LLVMPositionBuilderAtEnd(b, b_ifnullblock); + + /* + * In strict node, NULL inputs result in NULL. Save + * the NULL result and goto jumpdone. + */ + LLVMBuildStore(b, l_sbool_const(1), v_resnullp); + LLVMBuildStore(b, l_sizet_const(0), v_resvaluep); + LLVMBuildBr(b, opblocks[op->d.hashdatum.jumpdone]); + } + else + { + b_ifnullblock = l_bb_before_v(b_ifnotnull, + "b.%d.null", + opno); + + LLVMPositionBuilderAtEnd(b, b_ifnullblock); + + + LLVMBuildStore(b, l_sbool_const(0), v_resnullp); + + if (opcode == EEOP_HASHDATUM_NEXT32) + { + Assert(v_prevhash != NULL); + + /* + * Save the rotated hash value and skip to the + * next op. + */ + LLVMBuildStore(b, v_prevhash, v_resvaluep); + } + else + { + Assert(opcode == EEOP_HASHDATUM_FIRST); + + /* + * Store a zero Datum when the Datum to hash is + * NULL + */ + LLVMBuildStore(b, l_sizet_const(0), v_resvaluep); + } + + LLVMBuildBr(b, opblocks[opno + 1]); + } + + LLVMPositionBuilderAtEnd(b, b_checkargnull); + + /* emit code to check if the input parameter is NULL */ + v_argisnull = l_funcnull(b, v_fcinfo, 0); + LLVMBuildCondBr(b, + LLVMBuildICmp(b, + LLVMIntEQ, + v_argisnull, + l_sbool_const(1), + ""), + b_ifnullblock, + b_ifnotnull); + + LLVMPositionBuilderAtEnd(b, b_ifnotnull); + + /* + * Rotate the previously stored hash value when performing + * NEXT32 in strict mode. In non-strict mode we already + * did this before checking for NULLs. + */ + if (opcode == EEOP_HASHDATUM_NEXT32_STRICT) + { + LLVMValueRef v_tmp1; + LLVMValueRef v_tmp2; + + /* + * Fetch the previously hashed value from where the + * EEOP_HASHDATUM_FIRST_STRICT operation stored it. + */ + v_prevhash = l_load(b, TypeSizeT, v_resvaluep, + "prevhash"); + + /* + * Rotate bits left by 1 bit. Be careful not to + * overflow uint32 when working with size_t. + */ + v_tmp1 = LLVMBuildShl(b, v_prevhash, l_sizet_const(1), + ""); + v_tmp1 = LLVMBuildAnd(b, v_tmp1, + l_sizet_const(0xffffffff), ""); + v_tmp2 = LLVMBuildLShr(b, v_prevhash, + l_sizet_const(31), ""); + v_prevhash = LLVMBuildOr(b, v_tmp1, v_tmp2, + "rotatedhash"); + } + + /* call the hash function */ + v_retval = BuildV1Call(context, b, mod, fcinfo, + &v_fcinfo_isnull); + + /* + * For NEXT32 ops, XOR (^) the returned hash value with + * the existing hash value. + */ + if (opcode == EEOP_HASHDATUM_NEXT32 || + opcode == EEOP_HASHDATUM_NEXT32_STRICT) + v_retval = LLVMBuildXor(b, v_prevhash, v_retval, + "xorhash"); + + LLVMBuildStore(b, v_retval, v_resvaluep); + LLVMBuildStore(b, l_sbool_const(0), v_resnullp); + + LLVMBuildBr(b, opblocks[opno + 1]); + break; + } + case EEOP_CONVERT_ROWTYPE: build_EvalXFunc(b, mod, "ExecEvalConvertRowtype", v_state, op, v_econtext); |