diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2003-01-10 23:54:24 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2003-01-10 23:54:24 +0000 |
commit | 1afac12910b82cf68d064284391209f41a6adcf9 (patch) | |
tree | dee75ebb24c486541ab2cf55b29f71a367dc0a7b /src/backend/executor/execGrouping.c | |
parent | c83702606c3fb8c2bcef7b6f57232c982ff62ab3 (diff) | |
download | postgresql-1afac12910b82cf68d064284391209f41a6adcf9.tar.gz postgresql-1afac12910b82cf68d064284391209f41a6adcf9.zip |
Create a new file executor/execGrouping.c to centralize utility routines
shared by nodeGroup, nodeAgg, and soon nodeSubplan.
Diffstat (limited to 'src/backend/executor/execGrouping.c')
-rw-r--r-- | src/backend/executor/execGrouping.c | 369 |
1 files changed, 369 insertions, 0 deletions
diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c new file mode 100644 index 00000000000..e3f7720ca75 --- /dev/null +++ b/src/backend/executor/execGrouping.c @@ -0,0 +1,369 @@ +/*------------------------------------------------------------------------- + * + * execGrouping.c + * executor utility routines for grouping, hashing, and aggregation + * + * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/executor/execGrouping.c,v 1.1 2003/01/10 23:54:24 tgl Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/hash.h" +#include "access/heapam.h" +#include "executor/executor.h" +#include "parser/parse_oper.h" +#include "utils/memutils.h" + + +/***************************************************************************** + * Utility routines for grouping tuples together + * + * These routines actually implement SQL's notion of "distinct/not distinct". + * Two tuples match if they are not distinct in all the compared columns, + * i.e., the column values are either both null, or both non-null and equal. + *****************************************************************************/ + +/* + * execTuplesMatch + * Return true if two tuples match in all the indicated fields. + * This is used to detect group boundaries in nodeGroup and nodeAgg, + * and to decide whether two tuples are distinct or not in nodeUnique. + * + * tuple1, tuple2: the tuples to compare + * tupdesc: tuple descriptor applying to both tuples + * numCols: the number of attributes to be examined + * matchColIdx: array of attribute column numbers + * eqFunctions: array of fmgr lookup info for the equality functions to use + * evalContext: short-term memory context for executing the functions + * + * NB: evalContext is reset each time! + */ +bool +execTuplesMatch(HeapTuple tuple1, + HeapTuple tuple2, + TupleDesc tupdesc, + int numCols, + AttrNumber *matchColIdx, + FmgrInfo *eqfunctions, + MemoryContext evalContext) +{ + MemoryContext oldContext; + bool result; + int i; + + /* Reset and switch into the temp context. */ + MemoryContextReset(evalContext); + oldContext = MemoryContextSwitchTo(evalContext); + + /* + * We cannot report a match without checking all the fields, but we + * can report a non-match as soon as we find unequal fields. So, + * start comparing at the last field (least significant sort key). + * That's the most likely to be different if we are dealing with + * sorted input. + */ + result = true; + + for (i = numCols; --i >= 0;) + { + AttrNumber att = matchColIdx[i]; + Datum attr1, + attr2; + bool isNull1, + isNull2; + + attr1 = heap_getattr(tuple1, + att, + tupdesc, + &isNull1); + + attr2 = heap_getattr(tuple2, + att, + tupdesc, + &isNull2); + + if (isNull1 != isNull2) + { + result = false; /* one null and one not; they aren't equal */ + break; + } + + if (isNull1) + continue; /* both are null, treat as equal */ + + /* Apply the type-specific equality function */ + + if (!DatumGetBool(FunctionCall2(&eqfunctions[i], + attr1, attr2))) + { + result = false; /* they aren't equal */ + break; + } + } + + MemoryContextSwitchTo(oldContext); + + return result; +} + + +/* + * execTuplesMatchPrepare + * Look up the equality functions needed for execTuplesMatch. + * The result is a palloc'd array. + */ +FmgrInfo * +execTuplesMatchPrepare(TupleDesc tupdesc, + int numCols, + AttrNumber *matchColIdx) +{ + FmgrInfo *eqfunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); + int i; + + for (i = 0; i < numCols; i++) + { + AttrNumber att = matchColIdx[i]; + Oid typid = tupdesc->attrs[att - 1]->atttypid; + Oid eq_function; + + eq_function = equality_oper_funcid(typid); + fmgr_info(eq_function, &eqfunctions[i]); + } + + return eqfunctions; +} + + +/***************************************************************************** + * Utility routines for hashing + *****************************************************************************/ + +/* + * ComputeHashFunc + * + * the hash function for hash joins (also used for hash aggregation) + * + * XXX this probably ought to be replaced with datatype-specific + * hash functions, such as those already implemented for hash indexes. + */ +uint32 +ComputeHashFunc(Datum key, int typLen, bool byVal) +{ + unsigned char *k; + + if (byVal) + { + /* + * If it's a by-value data type, just hash the whole Datum value. + * This assumes that datatypes narrower than Datum are + * consistently padded (either zero-extended or sign-extended, but + * not random bits) to fill Datum; see the XXXGetDatum macros in + * postgres.h. NOTE: it would not work to do hash_any(&key, len) + * since this would get the wrong bytes on a big-endian machine. + */ + k = (unsigned char *) &key; + typLen = sizeof(Datum); + } + else + { + if (typLen > 0) + { + /* fixed-width pass-by-reference type */ + k = (unsigned char *) DatumGetPointer(key); + } + else if (typLen == -1) + { + /* + * It's a varlena type, so 'key' points to a "struct varlena". + * NOTE: VARSIZE returns the "real" data length plus the + * sizeof the "vl_len" attribute of varlena (the length + * information). 'key' points to the beginning of the varlena + * struct, so we have to use "VARDATA" to find the beginning + * of the "real" data. Also, we have to be careful to detoast + * the datum if it's toasted. (We don't worry about freeing + * the detoasted copy; that happens for free when the + * per-tuple memory context is reset in ExecHashGetBucket.) + */ + struct varlena *vkey = PG_DETOAST_DATUM(key); + + typLen = VARSIZE(vkey) - VARHDRSZ; + k = (unsigned char *) VARDATA(vkey); + } + else if (typLen == -2) + { + /* It's a null-terminated C string */ + typLen = strlen(DatumGetCString(key)) + 1; + k = (unsigned char *) DatumGetPointer(key); + } + else + { + elog(ERROR, "ComputeHashFunc: Invalid typLen %d", typLen); + k = NULL; /* keep compiler quiet */ + } + } + + return DatumGetUInt32(hash_any(k, typLen)); +} + + +/***************************************************************************** + * Utility routines for all-in-memory hash tables + * + * These routines build hash tables for grouping tuples together (eg, for + * hash aggregation). There is one entry for each not-distinct set of tuples + * presented. + *****************************************************************************/ + +/* + * Construct an empty TupleHashTable + * + * numCols, keyColIdx: identify the tuple fields to use as lookup key + * eqfunctions: equality comparison functions to use + * nbuckets: number of buckets to make + * entrysize: size of each entry (at least sizeof(TupleHashEntryData)) + * tablecxt: memory context in which to store table and table entries + * tempcxt: short-lived context for evaluation hash and comparison functions + * + * The eqfunctions array may be made with execTuplesMatchPrepare(). + * + * Note that keyColIdx and eqfunctions must be allocated in storage that + * will live as long as the hashtable does. + */ +TupleHashTable +BuildTupleHashTable(int numCols, AttrNumber *keyColIdx, + FmgrInfo *eqfunctions, + int nbuckets, Size entrysize, + MemoryContext tablecxt, MemoryContext tempcxt) +{ + TupleHashTable hashtable; + Size tabsize; + + Assert(nbuckets > 0); + Assert(entrysize >= sizeof(TupleHashEntryData)); + + tabsize = sizeof(TupleHashTableData) + + (nbuckets - 1) * sizeof(TupleHashEntry); + hashtable = (TupleHashTable) MemoryContextAllocZero(tablecxt, tabsize); + + hashtable->numCols = numCols; + hashtable->keyColIdx = keyColIdx; + hashtable->eqfunctions = eqfunctions; + hashtable->tablecxt = tablecxt; + hashtable->tempcxt = tempcxt; + hashtable->entrysize = entrysize; + hashtable->nbuckets = nbuckets; + + return hashtable; +} + +/* + * Find or create a hashtable entry for the tuple group containing the + * given tuple. + * + * On return, *isnew is true if the entry is newly created, false if it + * existed already. Any extra space in a new entry has been zeroed. + */ +TupleHashEntry +LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, + bool *isnew) +{ + int numCols = hashtable->numCols; + AttrNumber *keyColIdx = hashtable->keyColIdx; + HeapTuple tuple = slot->val; + TupleDesc tupdesc = slot->ttc_tupleDescriptor; + uint32 hashkey = 0; + int i; + int bucketno; + TupleHashEntry entry; + MemoryContext oldContext; + + /* Need to run the hash function in short-lived context */ + oldContext = MemoryContextSwitchTo(hashtable->tempcxt); + + for (i = 0; i < numCols; i++) + { + AttrNumber att = keyColIdx[i]; + Datum attr; + bool isNull; + + /* rotate hashkey left 1 bit at each step */ + hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); + + attr = heap_getattr(tuple, att, tupdesc, &isNull); + if (isNull) + continue; /* treat nulls as having hash key 0 */ + hashkey ^= ComputeHashFunc(attr, + (int) tupdesc->attrs[att - 1]->attlen, + tupdesc->attrs[att - 1]->attbyval); + } + bucketno = hashkey % (uint32) hashtable->nbuckets; + + for (entry = hashtable->buckets[bucketno]; + entry != NULL; + entry = entry->next) + { + /* Quick check using hashkey */ + if (entry->hashkey != hashkey) + continue; + if (execTuplesMatch(entry->firstTuple, + tuple, + tupdesc, + numCols, keyColIdx, + hashtable->eqfunctions, + hashtable->tempcxt)) + { + MemoryContextSwitchTo(oldContext); + *isnew = false; + return entry; + } + } + + /* Not there, so build a new one */ + MemoryContextSwitchTo(hashtable->tablecxt); + + entry = (TupleHashEntry) palloc0(hashtable->entrysize); + + entry->hashkey = hashkey; + entry->firstTuple = heap_copytuple(tuple); + + entry->next = hashtable->buckets[bucketno]; + hashtable->buckets[bucketno] = entry; + + MemoryContextSwitchTo(oldContext); + + *isnew = true; + + return entry; +} + +/* + * Walk through all the entries of a hash table, in no special order. + * Returns NULL when no more entries remain. + * + * Iterator state must be initialized with ResetTupleHashIterator() macro. + */ +TupleHashEntry +ScanTupleHashTable(TupleHashTable hashtable, TupleHashIterator *state) +{ + TupleHashEntry entry; + + entry = state->next_entry; + while (entry == NULL) + { + if (state->next_bucket >= hashtable->nbuckets) + { + /* No more entries in hashtable, so done */ + return NULL; + } + entry = hashtable->buckets[state->next_bucket++]; + } + state->next_entry = entry->next; + + return entry; +} |