aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/arrayfuncs.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/arrayfuncs.c')
-rw-r--r--src/backend/utils/adt/arrayfuncs.c243
1 files changed, 243 insertions, 0 deletions
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index f8e94ec3652..fafc852a4f3 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -15,8 +15,10 @@
#include "postgres.h"
#include <ctype.h>
+#include <math.h>
#include "access/htup_details.h"
+#include "catalog/pg_type.h"
#include "funcapi.h"
#include "libpq/pqformat.h"
#include "utils/array.h"
@@ -130,6 +132,15 @@ static ArrayType *array_replace_internal(ArrayType *array,
Datum replace, bool replace_isnull,
bool remove, Oid collation,
FunctionCallInfo fcinfo);
+static int width_bucket_array_float8(Datum operand, ArrayType *thresholds);
+static int width_bucket_array_fixed(Datum operand,
+ ArrayType *thresholds,
+ Oid collation,
+ TypeCacheEntry *typentry);
+static int width_bucket_array_variable(Datum operand,
+ ArrayType *thresholds,
+ Oid collation,
+ TypeCacheEntry *typentry);
/*
@@ -5502,3 +5513,235 @@ array_replace(PG_FUNCTION_ARGS)
fcinfo);
PG_RETURN_ARRAYTYPE_P(array);
}
+
+/*
+ * Implements width_bucket(anyelement, anyarray).
+ *
+ * 'thresholds' is an array containing lower bound values for each bucket;
+ * these must be sorted from smallest to largest, or bogus results will be
+ * produced. If N thresholds are supplied, the output is from 0 to N:
+ * 0 is for inputs < first threshold, N is for inputs >= last threshold.
+ */
+Datum
+width_bucket_array(PG_FUNCTION_ARGS)
+{
+ Datum operand = PG_GETARG_DATUM(0);
+ ArrayType *thresholds = PG_GETARG_ARRAYTYPE_P(1);
+ Oid collation = PG_GET_COLLATION();
+ Oid element_type = ARR_ELEMTYPE(thresholds);
+ int result;
+
+ /* Check input */
+ if (ARR_NDIM(thresholds) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("thresholds must be one-dimensional array")));
+
+ if (array_contains_nulls(thresholds))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("thresholds array must not contain NULLs")));
+
+ /* We have a dedicated implementation for float8 data */
+ if (element_type == FLOAT8OID)
+ result = width_bucket_array_float8(operand, thresholds);
+ else
+ {
+ TypeCacheEntry *typentry;
+
+ /* Cache information about the input type */
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (typentry == NULL ||
+ typentry->type_id != element_type)
+ {
+ typentry = lookup_type_cache(element_type,
+ TYPECACHE_CMP_PROC_FINFO);
+ if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify a comparison function for type %s",
+ format_type_be(element_type))));
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
+
+ /*
+ * We have separate implementation paths for fixed- and variable-width
+ * types, since indexing the array is a lot cheaper in the first case.
+ */
+ if (typentry->typlen > 0)
+ result = width_bucket_array_fixed(operand, thresholds,
+ collation, typentry);
+ else
+ result = width_bucket_array_variable(operand, thresholds,
+ collation, typentry);
+ }
+
+ /* Avoid leaking memory when handed toasted input. */
+ PG_FREE_IF_COPY(thresholds, 1);
+
+ PG_RETURN_INT32(result);
+}
+
+/*
+ * width_bucket_array for float8 data.
+ */
+static int
+width_bucket_array_float8(Datum operand, ArrayType *thresholds)
+{
+ float8 op = DatumGetFloat8(operand);
+ float8 *thresholds_data;
+ int left;
+ int right;
+
+ /*
+ * Since we know the array contains no NULLs, we can just index it
+ * directly.
+ */
+ thresholds_data = (float8 *) ARR_DATA_PTR(thresholds);
+
+ left = 0;
+ right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds));
+
+ /*
+ * If the probe value is a NaN, it's greater than or equal to all possible
+ * threshold values (including other NaNs), so we need not search. Note
+ * that this would give the same result as searching even if the array
+ * contains multiple NaNs (as long as they're correctly sorted), since the
+ * loop logic will find the rightmost of multiple equal threshold values.
+ */
+ if (isnan(op))
+ return right;
+
+ /* Find the bucket */
+ while (left < right)
+ {
+ int mid = (left + right) / 2;
+
+ if (isnan(thresholds_data[mid]) || op < thresholds_data[mid])
+ right = mid;
+ else
+ left = mid + 1;
+ }
+
+ return left;
+}
+
+/*
+ * width_bucket_array for generic fixed-width data types.
+ */
+static int
+width_bucket_array_fixed(Datum operand,
+ ArrayType *thresholds,
+ Oid collation,
+ TypeCacheEntry *typentry)
+{
+ char *thresholds_data;
+ int typlen = typentry->typlen;
+ bool typbyval = typentry->typbyval;
+ FunctionCallInfoData locfcinfo;
+ int left;
+ int right;
+
+ /*
+ * Since we know the array contains no NULLs, we can just index it
+ * directly.
+ */
+ thresholds_data = (char *) ARR_DATA_PTR(thresholds);
+
+ InitFunctionCallInfoData(locfcinfo, &typentry->cmp_proc_finfo, 2,
+ collation, NULL, NULL);
+
+ /* Find the bucket */
+ left = 0;
+ right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds));
+ while (left < right)
+ {
+ int mid = (left + right) / 2;
+ char *ptr;
+ int32 cmpresult;
+
+ ptr = thresholds_data + mid * typlen;
+
+ locfcinfo.arg[0] = operand;
+ locfcinfo.arg[1] = fetch_att(ptr, typbyval, typlen);
+ locfcinfo.argnull[0] = false;
+ locfcinfo.argnull[1] = false;
+ locfcinfo.isnull = false;
+
+ cmpresult = DatumGetInt32(FunctionCallInvoke(&locfcinfo));
+
+ if (cmpresult < 0)
+ right = mid;
+ else
+ left = mid + 1;
+ }
+
+ return left;
+}
+
+/*
+ * width_bucket_array for generic variable-width data types.
+ */
+static int
+width_bucket_array_variable(Datum operand,
+ ArrayType *thresholds,
+ Oid collation,
+ TypeCacheEntry *typentry)
+{
+ char *thresholds_data;
+ int typlen = typentry->typlen;
+ bool typbyval = typentry->typbyval;
+ char typalign = typentry->typalign;
+ FunctionCallInfoData locfcinfo;
+ int left;
+ int right;
+
+ thresholds_data = (char *) ARR_DATA_PTR(thresholds);
+
+ InitFunctionCallInfoData(locfcinfo, &typentry->cmp_proc_finfo, 2,
+ collation, NULL, NULL);
+
+ /* Find the bucket */
+ left = 0;
+ right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds));
+ while (left < right)
+ {
+ int mid = (left + right) / 2;
+ char *ptr;
+ int i;
+ int32 cmpresult;
+
+ /* Locate mid'th array element by advancing from left element */
+ ptr = thresholds_data;
+ for (i = left; i < mid; i++)
+ {
+ ptr = att_addlength_pointer(ptr, typlen, ptr);
+ ptr = (char *) att_align_nominal(ptr, typalign);
+ }
+
+ locfcinfo.arg[0] = operand;
+ locfcinfo.arg[1] = fetch_att(ptr, typbyval, typlen);
+ locfcinfo.argnull[0] = false;
+ locfcinfo.argnull[1] = false;
+ locfcinfo.isnull = false;
+
+ cmpresult = DatumGetInt32(FunctionCallInvoke(&locfcinfo));
+
+ if (cmpresult < 0)
+ right = mid;
+ else
+ {
+ left = mid + 1;
+
+ /*
+ * Move the thresholds pointer to match new "left" index, so we
+ * don't have to seek over those elements again. This trick
+ * ensures we do only O(N) array indexing work, not O(N^2).
+ */
+ ptr = att_addlength_pointer(ptr, typlen, ptr);
+ thresholds_data = (char *) att_align_nominal(ptr, typalign);
+ }
+ }
+
+ return left;
+}