diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2010-10-30 21:55:20 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2010-10-30 21:56:11 -0400 |
commit | 186cbbda8f8dc5e42f68fc7892f206a76d56a20f (patch) | |
tree | 2c909d8365726683a61515b639c02a9ac00682f4 /src/backend/utils/adt/arrayfuncs.c | |
parent | bd1ff9713369c2f54391112b92e0c22ab5c99180 (diff) | |
download | postgresql-186cbbda8f8dc5e42f68fc7892f206a76d56a20f.tar.gz postgresql-186cbbda8f8dc5e42f68fc7892f206a76d56a20f.zip |
Provide hashing support for arrays.
The core of this patch is hash_array() and associated typcache
infrastructure, which works just about exactly like the existing support
for array comparison.
In addition I did some work to ensure that the planner won't think that an
array type is hashable unless its element type is hashable, and similarly
for sorting. This includes adding a datatype parameter to op_hashjoinable
and op_mergejoinable, and adding an explicit "hashable" flag to
SortGroupClause. The lack of a cross-check on the element type was a
pre-existing bug in mergejoin support --- but it didn't matter so much
before, because if you couldn't sort the element type there wasn't any good
alternative to failing anyhow. Now that we have the alternative of hashing
the array type, there are cases where we can avoid a failure by being picky
at the planner stage, so it's time to be picky.
The issue of exactly how to combine the per-element hash values to produce
an array hash is still open for discussion, but the rest of this is pretty
solid, so I'll commit it as-is.
Diffstat (limited to 'src/backend/utils/adt/arrayfuncs.c')
-rw-r--r-- | src/backend/utils/adt/arrayfuncs.c | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 43a2a6b596a..1b7bec5a317 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -3449,6 +3449,117 @@ array_cmp(FunctionCallInfo fcinfo) /*----------------------------------------------------------------------------- + * array hashing + * Hash the elements and combine the results. + *---------------------------------------------------------------------------- + */ + +Datum +hash_array(PG_FUNCTION_ARGS) +{ + ArrayType *array = PG_GETARG_ARRAYTYPE_P(0); + int ndims = ARR_NDIM(array); + int *dims = ARR_DIMS(array); + Oid element_type = ARR_ELEMTYPE(array); + uint32 result = 0; + int nitems; + TypeCacheEntry *typentry; + int typlen; + bool typbyval; + char typalign; + char *ptr; + bits8 *bitmap; + int bitmask; + int i; + FunctionCallInfoData locfcinfo; + + /* + * We arrange to look up the hash function only once per series of calls, + * assuming the element type doesn't change underneath us. The typcache + * is used so that we have no memory leakage when being used as an index + * support function. + */ + typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || + typentry->type_id != element_type) + { + typentry = lookup_type_cache(element_type, + TYPECACHE_HASH_PROC_FINFO); + if (!OidIsValid(typentry->hash_proc_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify a hash function for type %s", + format_type_be(element_type)))); + fcinfo->flinfo->fn_extra = (void *) typentry; + } + typlen = typentry->typlen; + typbyval = typentry->typbyval; + typalign = typentry->typalign; + + /* + * apply the hash function to each array element. + */ + InitFunctionCallInfoData(locfcinfo, &typentry->hash_proc_finfo, 1, + NULL, NULL); + + /* Loop over source data */ + nitems = ArrayGetNItems(ndims, dims); + ptr = ARR_DATA_PTR(array); + bitmap = ARR_NULLBITMAP(array); + bitmask = 1; + + for (i = 0; i < nitems; i++) + { + uint32 elthash; + + /* Get element, checking for NULL */ + if (bitmap && (*bitmap & bitmask) == 0) + { + /* Treat nulls as having hashvalue 0 */ + elthash = 0; + } + else + { + Datum elt; + + elt = fetch_att(ptr, typbyval, typlen); + ptr = att_addlength_pointer(ptr, typlen, ptr); + ptr = (char *) att_align_nominal(ptr, typalign); + + /* Apply the hash function */ + locfcinfo.arg[0] = elt; + locfcinfo.argnull[0] = false; + locfcinfo.isnull = false; + elthash = DatumGetUInt32(FunctionCallInvoke(&locfcinfo)); + } + + /* advance bitmap pointer if any */ + if (bitmap) + { + bitmask <<= 1; + if (bitmask == 0x100) + { + bitmap++; + bitmask = 1; + } + } + + /* + * Combine hash values of successive elements by rotating the previous + * value left 1 bit, then XOR'ing in the new element's hash value. + */ + result = (result << 1) | (result >> 31); + result ^= elthash; + } + + /* Avoid leaking memory when handed toasted input. */ + PG_FREE_IF_COPY(array, 0); + + PG_RETURN_UINT32(result); +} + + +/*----------------------------------------------------------------------------- * array overlap/containment comparisons * These use the same methods of comparing array elements as array_eq. * We consider only the elements of the arrays, ignoring dimensionality. |