diff options
author | Robert Haas <rhaas@postgresql.org> | 2017-08-31 22:21:21 -0400 |
---|---|---|
committer | Robert Haas <rhaas@postgresql.org> | 2017-08-31 22:21:21 -0400 |
commit | 81c5e46c490e2426db243eada186995da5bb0ba7 (patch) | |
tree | a6cb745131c45a06fa43746a17a69e1dc9daa44a /src/backend/utils/adt/arrayfuncs.c | |
parent | 2d44c58c79aeef2d376be0141057afbb9ec6b5bc (diff) | |
download | postgresql-81c5e46c490e2426db243eada186995da5bb0ba7.tar.gz postgresql-81c5e46c490e2426db243eada186995da5bb0ba7.zip |
Introduce 64-bit hash functions with a 64-bit seed.
This will be useful for hash partitioning, which needs a way to seed
the hash functions to avoid problems such as a hash index on a hash
partitioned table clumping all values into a small portion of the
bucket space; it's also useful for anything that wants a 64-bit hash
value rather than a 32-bit hash value.
Just in case somebody wants a 64-bit hash value that is compatible
with the existing 32-bit hash values, make the low 32-bits of the
64-bit hash value match the 32-bit hash value when the seed is 0.
Robert Haas and Amul Sul
Discussion: http://postgr.es/m/CA+Tgmoafx2yoJuhCQQOL5CocEi-w_uG4S2xT0EtgiJnPGcHW3g@mail.gmail.com
Diffstat (limited to 'src/backend/utils/adt/arrayfuncs.c')
-rw-r--r-- | src/backend/utils/adt/arrayfuncs.c | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 34dadd6e19e..522af7affc6 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -20,6 +20,7 @@ #endif #include <math.h> +#include "access/hash.h" #include "access/htup_details.h" #include "catalog/pg_type.h" #include "funcapi.h" @@ -4020,6 +4021,84 @@ hash_array(PG_FUNCTION_ARGS) PG_RETURN_UINT32(result); } +/* + * Returns 64-bit value by hashing a value to a 64-bit value, with a seed. + * Otherwise, similar to hash_array. + */ +Datum +hash_array_extended(PG_FUNCTION_ARGS) +{ + AnyArrayType *array = PG_GETARG_ANY_ARRAY(0); + uint64 seed = PG_GETARG_INT64(1); + int ndims = AARR_NDIM(array); + int *dims = AARR_DIMS(array); + Oid element_type = AARR_ELEMTYPE(array); + uint64 result = 1; + int nitems; + TypeCacheEntry *typentry; + int typlen; + bool typbyval; + char typalign; + int i; + array_iter iter; + FunctionCallInfoData locfcinfo; + + typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; + if (typentry == NULL || + typentry->type_id != element_type) + { + typentry = lookup_type_cache(element_type, + TYPECACHE_HASH_EXTENDED_PROC_FINFO); + if (!OidIsValid(typentry->hash_extended_proc_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an extended hash function for type %s", + format_type_be(element_type)))); + fcinfo->flinfo->fn_extra = (void *) typentry; + } + typlen = typentry->typlen; + typbyval = typentry->typbyval; + typalign = typentry->typalign; + + InitFunctionCallInfoData(locfcinfo, &typentry->hash_extended_proc_finfo, 2, + InvalidOid, NULL, NULL); + + /* Loop over source data */ + nitems = ArrayGetNItems(ndims, dims); + array_iter_setup(&iter, array); + + for (i = 0; i < nitems; i++) + { + Datum elt; + bool isnull; + uint64 elthash; + + /* Get element, checking for NULL */ + elt = array_iter_next(&iter, &isnull, i, typlen, typbyval, typalign); + + if (isnull) + { + elthash = 0; + } + else + { + /* Apply the hash function */ + locfcinfo.arg[0] = elt; + locfcinfo.arg[1] = seed; + locfcinfo.argnull[0] = false; + locfcinfo.argnull[1] = false; + locfcinfo.isnull = false; + elthash = DatumGetUInt64(FunctionCallInvoke(&locfcinfo)); + } + + result = (result << 5) - result + elthash; + } + + AARR_FREE_IF_COPY(array, 0); + + PG_RETURN_UINT64(result); +} + /*----------------------------------------------------------------------------- * array overlap/containment comparisons |