diff options
author | David Rowley <drowley@postgresql.org> | 2024-12-11 13:47:16 +1300 |
---|---|---|
committer | David Rowley <drowley@postgresql.org> | 2024-12-11 13:47:16 +1300 |
commit | 0f5738202b812a976e8612c85399b52d16a0abb6 (patch) | |
tree | 9edf2821d761f7f42615573a45bf93f9ed64cf75 /src/backend/executor/execGrouping.c | |
parent | a43567483c617fb046c805b61964d5168c9a0553 (diff) | |
download | postgresql-0f5738202b812a976e8612c85399b52d16a0abb6.tar.gz postgresql-0f5738202b812a976e8612c85399b52d16a0abb6.zip |
Use ExprStates for hashing in GROUP BY and SubPlans
This speeds up obtaining hash values for GROUP BY and hashed SubPlans by
using the ExprState support for hashing, thus allowing JIT compilation for
obtaining hash values for these operations.
This, even without JIT compilation, has been shown to improve Hash
Aggregate performance in some cases by around 15% and hashed NOT IN
queries in one case by over 30%, however, real-world cases are likely to
see smaller gains as the test cases used were purposefully designed to
have high hashing overheads by keeping the hash table small to prevent
additional memory overheads that would be a factor when working with large
hash tables.
In passing, fix a hypothetical bug in ExecBuildHash32Expr() so that the
initial value is stored directly in the ExprState's result field if
there are no expressions to hash. None of the current users of this
function use an initial value, so the bug is only hypothetical.
Reviewed-by: Andrei Lepikhov <lepihov@gmail.com>
Discussion: https://postgr.es/m/CAApHDvpYSO3kc9UryMevWqthTBrxgfd9djiAjKHMPUSQeX9vdQ@mail.gmail.com
Diffstat (limited to 'src/backend/executor/execGrouping.c')
-rw-r--r-- | src/backend/executor/execGrouping.c | 82 |
1 files changed, 34 insertions, 48 deletions
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 774e4de8828..9a88fc65249 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -169,6 +169,7 @@ BuildTupleHashTableExt(PlanState *parent, Size hash_mem_limit; MemoryContext oldcontext; bool allow_jit; + uint32 hash_iv = 0; Assert(nbuckets > 0); @@ -183,14 +184,13 @@ BuildTupleHashTableExt(PlanState *parent, hashtable->numCols = numCols; hashtable->keyColIdx = keyColIdx; - hashtable->tab_hash_funcs = hashfunctions; hashtable->tab_collations = collations; hashtable->tablecxt = tablecxt; hashtable->tempcxt = tempcxt; hashtable->entrysize = entrysize; hashtable->tableslot = NULL; /* will be made on first lookup */ hashtable->inputslot = NULL; - hashtable->in_hash_funcs = NULL; + hashtable->in_hash_expr = NULL; hashtable->cur_eq_func = NULL; /* @@ -202,9 +202,7 @@ BuildTupleHashTableExt(PlanState *parent, * underestimated. */ if (use_variable_hash_iv) - hashtable->hash_iv = murmurhash32(ParallelWorkerNumber); - else - hashtable->hash_iv = 0; + hash_iv = murmurhash32(ParallelWorkerNumber); hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable); @@ -225,6 +223,16 @@ BuildTupleHashTableExt(PlanState *parent, */ allow_jit = metacxt != tablecxt; + /* build hash ExprState for all columns */ + hashtable->tab_hash_expr = ExecBuildHash32FromAttrs(inputDesc, + &TTSOpsMinimalTuple, + hashfunctions, + collations, + numCols, + keyColIdx, + allow_jit ? parent : NULL, + hash_iv); + /* build comparator for all columns */ /* XXX: should we support non-minimal tuples for the inputslot? */ hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc, @@ -316,7 +324,7 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, /* set up data needed by hash and match functions */ hashtable->inputslot = slot; - hashtable->in_hash_funcs = hashtable->tab_hash_funcs; + hashtable->in_hash_expr = hashtable->tab_hash_expr; hashtable->cur_eq_func = hashtable->tab_eq_func; local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL); @@ -342,7 +350,7 @@ TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot) uint32 hash; hashtable->inputslot = slot; - hashtable->in_hash_funcs = hashtable->tab_hash_funcs; + hashtable->in_hash_expr = hashtable->tab_hash_expr; /* Need to run the hash functions in short-lived context */ oldContext = MemoryContextSwitchTo(hashtable->tempcxt); @@ -370,7 +378,7 @@ LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot, /* set up data needed by hash and match functions */ hashtable->inputslot = slot; - hashtable->in_hash_funcs = hashtable->tab_hash_funcs; + hashtable->in_hash_expr = hashtable->tab_hash_expr; hashtable->cur_eq_func = hashtable->tab_eq_func; entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash); @@ -386,14 +394,14 @@ LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot, * created if there's not a match. This is similar to the non-creating * case of LookupTupleHashEntry, except that it supports cross-type * comparisons, in which the given tuple is not of the same type as the - * table entries. The caller must provide the hash functions to use for - * the input tuple, as well as the equality functions, since these may be + * table entries. The caller must provide the hash ExprState to use for + * the input tuple, as well as the equality ExprState, since these may be * different from the table's internal functions. */ TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, ExprState *eqcomp, - FmgrInfo *hashfunctions) + ExprState *hashexpr) { TupleHashEntry entry; MemoryContext oldContext; @@ -404,7 +412,7 @@ FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, /* Set up data needed by hash and match functions */ hashtable->inputslot = slot; - hashtable->in_hash_funcs = hashfunctions; + hashtable->in_hash_expr = hashexpr; hashtable->cur_eq_func = eqcomp; /* Search the hash table */ @@ -421,25 +429,24 @@ FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, * copied into the table. * * Also, the caller must select an appropriate memory context for running - * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.) + * the hash functions. */ static uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb, const MinimalTuple tuple) { TupleHashTable hashtable = (TupleHashTable) tb->private_data; - int numCols = hashtable->numCols; - AttrNumber *keyColIdx = hashtable->keyColIdx; - uint32 hashkey = hashtable->hash_iv; + uint32 hashkey; TupleTableSlot *slot; - FmgrInfo *hashfunctions; - int i; + bool isnull; if (tuple == NULL) { /* Process the current input tuple for the table */ - slot = hashtable->inputslot; - hashfunctions = hashtable->in_hash_funcs; + hashtable->exprcontext->ecxt_innertuple = hashtable->inputslot; + hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->in_hash_expr, + hashtable->exprcontext, + &isnull)); } else { @@ -449,38 +456,17 @@ TupleHashTableHash_internal(struct tuplehash_hash *tb, * (this case never actually occurs due to the way simplehash.h is * used, as the hash-value is stored in the entries) */ - slot = hashtable->tableslot; + slot = hashtable->exprcontext->ecxt_innertuple = hashtable->tableslot; ExecStoreMinimalTuple(tuple, slot, false); - hashfunctions = hashtable->tab_hash_funcs; - } - - for (i = 0; i < numCols; i++) - { - AttrNumber att = keyColIdx[i]; - Datum attr; - bool isNull; - - /* combine successive hashkeys by rotating */ - hashkey = pg_rotate_left32(hashkey, 1); - - attr = slot_getattr(slot, att, &isNull); - - if (!isNull) /* treat nulls as having hash key 0 */ - { - uint32 hkey; - - hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], - hashtable->tab_collations[i], - attr)); - hashkey ^= hkey; - } + hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->tab_hash_expr, + hashtable->exprcontext, + &isnull)); } /* - * The way hashes are combined above, among each other and with the IV, - * doesn't lead to good bit perturbation. As the IV's goal is to lead to - * achieve that, perform a round of hashing of the combined hash - - * resulting in near perfect perturbation. + * The hashing done above, even with an initial value, doesn't tend to + * result in good hash perturbation. Running the value produced above + * through murmurhash32 leads to near perfect hash perturbation. */ return murmurhash32(hashkey); } |