diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2003-08-19 01:13:41 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2003-08-19 01:13:41 +0000 |
commit | 80860c32d92fe3445dcb7de70091354c9d0406b0 (patch) | |
tree | 2119ed51447a2c45fc75dab13e8ec89115a9890a /src/backend/executor/execGrouping.c | |
parent | 23e10843db588928e18bd58018c2e70f4548f177 (diff) | |
download | postgresql-80860c32d92fe3445dcb7de70091354c9d0406b0.tar.gz postgresql-80860c32d92fe3445dcb7de70091354c9d0406b0.zip |
Improve dynahash.c's API so that caller can specify the comparison function
as well as the hash function (formerly the comparison function was hardwired
as memcmp()). This makes it possible to eliminate the special-purpose
hashtable management code in execGrouping.c in favor of using dynahash to
manage tuple hashtables; which is a win because dynahash knows how to expand
a hashtable when the original size estimate was too small, whereas the
special-purpose code was too stupid to do that. (See recent gripe from
Stephan Szabo about poor performance when hash table size estimate is way
off.) Free side benefit: when using string_hash, the default comparison
function is now strncmp() instead of memcmp(). This should eliminate some
part of the overhead associated with larger NAMEDATALEN values.
Diffstat (limited to 'src/backend/executor/execGrouping.c')
-rw-r--r-- | src/backend/executor/execGrouping.c | 205 |
1 files changed, 131 insertions, 74 deletions
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 100e7a1c375..d293bb7ff29 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.7 2003/08/08 21:41:34 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.8 2003/08/19 01:13:40 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -23,6 +23,13 @@ #include "utils/syscache.h" +static TupleHashTable CurTupleHashTable = NULL; + +static uint32 TupleHashTableHash(const void *key, Size keysize); +static int TupleHashTableMatch(const void *key1, const void *key2, + Size keysize); + + /***************************************************************************** * Utility routines for grouping tuples together *****************************************************************************/ @@ -272,7 +279,7 @@ execTuplesHashPrepare(TupleDesc tupdesc, * numCols, keyColIdx: identify the tuple fields to use as lookup key * eqfunctions: equality comparison functions to use * hashfunctions: datatype-specific hashing functions to use - * nbuckets: number of buckets to make + * nbuckets: initial estimate of hashtable size * entrysize: size of each entry (at least sizeof(TupleHashEntryData)) * tablecxt: memory context in which to store table and table entries * tempcxt: short-lived context for evaluation hash and comparison functions @@ -290,14 +297,13 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx, MemoryContext tablecxt, MemoryContext tempcxt) { TupleHashTable hashtable; - Size tabsize; + HASHCTL hash_ctl; Assert(nbuckets > 0); Assert(entrysize >= sizeof(TupleHashEntryData)); - tabsize = sizeof(TupleHashTableData) + - (nbuckets - 1) *sizeof(TupleHashEntry); - hashtable = (TupleHashTable) MemoryContextAllocZero(tablecxt, tabsize); + hashtable = (TupleHashTable) MemoryContextAlloc(tablecxt, + sizeof(TupleHashTableData)); hashtable->numCols = numCols; hashtable->keyColIdx = keyColIdx; @@ -306,7 +312,20 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx, hashtable->tablecxt = tablecxt; hashtable->tempcxt = tempcxt; hashtable->entrysize = entrysize; - hashtable->nbuckets = nbuckets; + + MemSet(&hash_ctl, 0, sizeof(hash_ctl)); + hash_ctl.keysize = sizeof(TupleHashEntryData); + hash_ctl.entrysize = entrysize; + hash_ctl.hash = TupleHashTableHash; + hash_ctl.match = TupleHashTableMatch; + hash_ctl.hcxt = tablecxt; + hashtable->hashtab = hash_create("TupleHashTable", (long) nbuckets, + &hash_ctl, + HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT); + if (hashtable->hashtab == NULL) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); return hashtable; } @@ -327,19 +346,93 @@ TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, bool *isnew) { - int numCols = hashtable->numCols; - AttrNumber *keyColIdx = hashtable->keyColIdx; HeapTuple tuple = slot->val; TupleDesc tupdesc = slot->ttc_tupleDescriptor; - uint32 hashkey = 0; - int i; - int bucketno; TupleHashEntry entry; MemoryContext oldContext; + TupleHashTable saveCurHT; + bool found; - /* Need to run the hash function in short-lived context */ + /* Need to run the hash functions in short-lived context */ oldContext = MemoryContextSwitchTo(hashtable->tempcxt); + /* + * Set up data needed by hash and match functions + * + * We save and restore CurTupleHashTable just in case someone manages + * to invoke this code re-entrantly. + */ + hashtable->tupdesc = tupdesc; + saveCurHT = CurTupleHashTable; + CurTupleHashTable = hashtable; + + /* Search the hash table */ + entry = (TupleHashEntry) hash_search(hashtable->hashtab, + &tuple, + isnew ? HASH_ENTER : HASH_FIND, + &found); + + if (isnew) + { + if (found) + { + /* found pre-existing entry */ + *isnew = false; + } + else + { + /* created new entry ... we hope */ + if (entry == NULL) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + /* + * Zero any caller-requested space in the entry. (This zaps + * the "key data" dynahash.c copied into the new entry, but + * we don't care since we're about to overwrite it anyway.) + */ + MemSet(entry, 0, hashtable->entrysize); + + /* Copy the first tuple into the table context */ + MemoryContextSwitchTo(hashtable->tablecxt); + entry->firstTuple = heap_copytuple(tuple); + + *isnew = true; + } + } + + CurTupleHashTable = saveCurHT; + + MemoryContextSwitchTo(oldContext); + + return entry; +} + +/* + * Compute the hash value for a tuple + * + * The passed-in key is a pointer to a HeapTuple pointer -- this is either + * the firstTuple field of a TupleHashEntry struct, or the key value passed + * to hash_search. We ignore the keysize. + * + * CurTupleHashTable must be set before calling this, since dynahash.c + * doesn't provide any API that would let us get at the hashtable otherwise. + * + * Also, the caller must select an appropriate memory context for running + * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.) + */ +static uint32 +TupleHashTableHash(const void *key, Size keysize) +{ + HeapTuple tuple = *(const HeapTuple *) key; + TupleHashTable hashtable = CurTupleHashTable; + int numCols = hashtable->numCols; + AttrNumber *keyColIdx = hashtable->keyColIdx; + TupleDesc tupdesc = hashtable->tupdesc; + uint32 hashkey = 0; + int i; + for (i = 0; i < numCols; i++) { AttrNumber att = keyColIdx[i]; @@ -360,72 +453,36 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, hashkey ^= hkey; } } - bucketno = hashkey % (uint32) hashtable->nbuckets; - - for (entry = hashtable->buckets[bucketno]; - entry != NULL; - entry = entry->next) - { - /* Quick check using hashkey */ - if (entry->hashkey != hashkey) - continue; - if (execTuplesMatch(entry->firstTuple, - tuple, - tupdesc, - numCols, keyColIdx, - hashtable->eqfunctions, - hashtable->tempcxt)) - { - if (isnew) - *isnew = false; - MemoryContextSwitchTo(oldContext); - return entry; - } - } - - /* Not there, so build a new one if requested */ - if (isnew) - { - MemoryContextSwitchTo(hashtable->tablecxt); - - entry = (TupleHashEntry) palloc0(hashtable->entrysize); - - entry->hashkey = hashkey; - entry->firstTuple = heap_copytuple(tuple); - - entry->next = hashtable->buckets[bucketno]; - hashtable->buckets[bucketno] = entry; - - *isnew = true; - } - - MemoryContextSwitchTo(oldContext); - return entry; + return hashkey; } /* - * Walk through all the entries of a hash table, in no special order. - * Returns NULL when no more entries remain. + * See whether two tuples (presumably of the same hash value) match + * + * As above, the passed pointers are pointers to HeapTuple pointers. * - * Iterator state must be initialized with ResetTupleHashIterator() macro. + * CurTupleHashTable must be set before calling this, since dynahash.c + * doesn't provide any API that would let us get at the hashtable otherwise. + * + * Also, the caller must select an appropriate memory context for running + * the compare functions. (dynahash.c doesn't change CurrentMemoryContext.) */ -TupleHashEntry -ScanTupleHashTable(TupleHashTable hashtable, TupleHashIterator *state) +static int +TupleHashTableMatch(const void *key1, const void *key2, Size keysize) { - TupleHashEntry entry; - - entry = state->next_entry; - while (entry == NULL) - { - if (state->next_bucket >= hashtable->nbuckets) - { - /* No more entries in hashtable, so done */ - return NULL; - } - entry = hashtable->buckets[state->next_bucket++]; - } - state->next_entry = entry->next; - - return entry; + HeapTuple tuple1 = *(const HeapTuple *) key1; + HeapTuple tuple2 = *(const HeapTuple *) key2; + TupleHashTable hashtable = CurTupleHashTable; + + if (execTuplesMatch(tuple1, + tuple2, + hashtable->tupdesc, + hashtable->numCols, + hashtable->keyColIdx, + hashtable->eqfunctions, + hashtable->tempcxt)) + return 0; + else + return 1; } |