aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/numeric.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2014-04-12 20:33:09 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2014-04-12 20:33:09 -0400
commit9d229f399e87d2ae7132c2e8feef317ce1479728 (patch)
treef808e533fa6b89202733e2b8666361dc5482ca97 /src/backend/utils/adt/numeric.c
parenta9d9acbf219b9e96585779cd5f99d674d4ccba74 (diff)
downloadpostgresql-9d229f399e87d2ae7132c2e8feef317ce1479728.tar.gz
postgresql-9d229f399e87d2ae7132c2e8feef317ce1479728.zip
Provide moving-aggregate support for a bunch of numerical aggregates.
First installment of the promised moving-aggregate support in built-in aggregates: count(), sum(), avg(), stddev() and variance() for assorted datatypes, though not for float4/float8. In passing, remove a 2001-vintage kluge in interval_accum(): interval array elements have been properly aligned since around 2003, but nobody remembered to take out this workaround. Also, fix a thinko in the opr_sanity tests for moving-aggregate catalog entries. David Rowley and Florian Pflug, reviewed by Dean Rasheed
Diffstat (limited to 'src/backend/utils/adt/numeric.c')
-rw-r--r--src/backend/utils/adt/numeric.c379
1 files changed, 334 insertions, 45 deletions
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
index 64eb0f8d16e..bf4f29d14d7 100644
--- a/src/backend/utils/adt/numeric.c
+++ b/src/backend/utils/adt/numeric.c
@@ -2506,22 +2506,19 @@ numeric_float4(PG_FUNCTION_ARGS)
* Actually, it's a pointer to a NumericAggState allocated in the aggregate
* context. The digit buffers for the NumericVars will be there too.
*
- * Note that the transition functions don't bother to create a NumericAggState
- * until they see the first non-null input value; therefore, the final
- * functions will never see N == 0. (The case is represented as a NULL
- * state pointer, instead.)
- *
* ----------------------------------------------------------------------
*/
typedef struct NumericAggState
{
bool calcSumX2; /* if true, calculate sumX2 */
- bool isNaN; /* true if any processed number was NaN */
MemoryContext agg_context; /* context we're calculating in */
int64 N; /* count of processed numbers */
NumericVar sumX; /* sum of processed numbers */
NumericVar sumX2; /* sum of squares of processed numbers */
+ int maxScale; /* maximum scale seen so far */
+ int64 maxScaleCount; /* number of values seen with maximum scale */
+ int64 NaNcount; /* count of NaN values (not included in N!) */
} NumericAggState;
/*
@@ -2559,16 +2556,28 @@ do_numeric_accum(NumericAggState *state, Numeric newval)
NumericVar X2;
MemoryContext old_context;
- /* result is NaN if any processed number is NaN */
- if (state->isNaN || NUMERIC_IS_NAN(newval))
+ /* Count NaN inputs separately from all else */
+ if (NUMERIC_IS_NAN(newval))
{
- state->isNaN = true;
+ state->NaNcount++;
return;
}
/* load processed number in short-lived context */
init_var_from_num(newval, &X);
+ /*
+ * Track the highest input dscale that we've seen, to support inverse
+ * transitions (see do_numeric_discard).
+ */
+ if (X.dscale > state->maxScale)
+ {
+ state->maxScale = X.dscale;
+ state->maxScaleCount = 1;
+ }
+ else if (X.dscale == state->maxScale)
+ state->maxScaleCount++;
+
/* if we need X^2, calculate that in short-lived context */
if (state->calcSumX2)
{
@@ -2600,6 +2609,97 @@ do_numeric_accum(NumericAggState *state, Numeric newval)
}
/*
+ * Attempt to remove an input value from the aggregated state.
+ *
+ * If the value cannot be removed then the function will return false; the
+ * possible reasons for failing are described below.
+ *
+ * If we aggregate the values 1.01 and 2 then the result will be 3.01.
+ * If we are then asked to un-aggregate the 1.01 then we must fail as we
+ * won't be able to tell what the new aggregated value's dscale should be.
+ * We don't want to return 2.00 (dscale = 2), since the sum's dscale would
+ * have been zero if we'd really aggregated only 2.
+ *
+ * Note: alternatively, we could count the number of inputs with each possible
+ * dscale (up to some sane limit). Not yet clear if it's worth the trouble.
+ */
+static bool
+do_numeric_discard(NumericAggState *state, Numeric newval)
+{
+ NumericVar X;
+ NumericVar X2;
+ MemoryContext old_context;
+
+ /* Count NaN inputs separately from all else */
+ if (NUMERIC_IS_NAN(newval))
+ {
+ state->NaNcount--;
+ return true;
+ }
+
+ /* load processed number in short-lived context */
+ init_var_from_num(newval, &X);
+
+ /*
+ * state->sumX's dscale is the maximum dscale of any of the inputs.
+ * Removing the last input with that dscale would require us to recompute
+ * the maximum dscale of the *remaining* inputs, which we cannot do unless
+ * no more non-NaN inputs remain at all. So we report a failure instead,
+ * and force the aggregation to be redone from scratch.
+ */
+ if (X.dscale == state->maxScale)
+ {
+ if (state->maxScaleCount > 1 || state->maxScale == 0)
+ {
+ /*
+ * Some remaining inputs have same dscale, or dscale hasn't
+ * gotten above zero anyway
+ */
+ state->maxScaleCount--;
+ }
+ else if (state->N == 1)
+ {
+ /* No remaining non-NaN inputs at all, so reset maxScale */
+ state->maxScale = 0;
+ state->maxScaleCount = 0;
+ }
+ else
+ {
+ /* Correct new maxScale is uncertain, must fail */
+ return false;
+ }
+ }
+
+ /* if we need X^2, calculate that in short-lived context */
+ if (state->calcSumX2)
+ {
+ init_var(&X2);
+ mul_var(&X, &X, &X2, X.dscale * 2);
+ }
+
+ /* The rest of this needs to work in the aggregate context */
+ old_context = MemoryContextSwitchTo(state->agg_context);
+
+ if (state->N-- > 1)
+ {
+ /* De-accumulate sums */
+ sub_var(&(state->sumX), &X, &(state->sumX));
+
+ if (state->calcSumX2)
+ sub_var(&(state->sumX2), &X2, &(state->sumX2));
+ }
+ else
+ {
+ /* Sums will be reset by next call to do_numeric_accum */
+ Assert(state->N == 0);
+ }
+
+ MemoryContextSwitchTo(old_context);
+
+ return true;
+}
+
+/*
* Generic transition function for numeric aggregates that require sumX2.
*/
Datum
@@ -2609,14 +2709,12 @@ numeric_accum(PG_FUNCTION_ARGS)
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
- if (!PG_ARGISNULL(1))
- {
- /* Create the state data when we see the first non-null input. */
- if (state == NULL)
- state = makeNumericAggState(fcinfo, true);
+ /* Create the state data on the first call */
+ if (state == NULL)
+ state = makeNumericAggState(fcinfo, true);
+ if (!PG_ARGISNULL(1))
do_numeric_accum(state, PG_GETARG_NUMERIC(1));
- }
PG_RETURN_POINTER(state);
}
@@ -2631,18 +2729,42 @@ numeric_avg_accum(PG_FUNCTION_ARGS)
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
- if (!PG_ARGISNULL(1))
- {
- /* Create the state data when we see the first non-null input. */
- if (state == NULL)
- state = makeNumericAggState(fcinfo, false);
+ /* Create the state data on the first call */
+ if (state == NULL)
+ state = makeNumericAggState(fcinfo, false);
+ if (!PG_ARGISNULL(1))
do_numeric_accum(state, PG_GETARG_NUMERIC(1));
+
+ PG_RETURN_POINTER(state);
+}
+
+/*
+ * Generic inverse transition function for numeric aggregates
+ * (with or without requirement for X^2).
+ */
+Datum
+numeric_accum_inv(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+ /* Should not get here with no state */
+ if (state == NULL)
+ elog(ERROR, "numeric_accum_inv called with NULL state");
+
+ if (!PG_ARGISNULL(1))
+ {
+ /* If we fail to perform the inverse transition, return NULL */
+ if (!do_numeric_discard(state, PG_GETARG_NUMERIC(1)))
+ PG_RETURN_NULL();
}
PG_RETURN_POINTER(state);
}
+
/*
* Integer data types all use Numeric accumulators to share code and
* avoid risk of overflow. For int2 and int4 inputs, Numeric accumulation
@@ -2659,17 +2781,16 @@ int2_accum(PG_FUNCTION_ARGS)
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+ /* Create the state data on the first call */
+ if (state == NULL)
+ state = makeNumericAggState(fcinfo, true);
+
if (!PG_ARGISNULL(1))
{
Numeric newval;
newval = DatumGetNumeric(DirectFunctionCall1(int2_numeric,
PG_GETARG_DATUM(1)));
-
- /* Create the state data when we see the first non-null input. */
- if (state == NULL)
- state = makeNumericAggState(fcinfo, true);
-
do_numeric_accum(state, newval);
}
@@ -2683,17 +2804,16 @@ int4_accum(PG_FUNCTION_ARGS)
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+ /* Create the state data on the first call */
+ if (state == NULL)
+ state = makeNumericAggState(fcinfo, true);
+
if (!PG_ARGISNULL(1))
{
Numeric newval;
newval = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
PG_GETARG_DATUM(1)));
-
- /* Create the state data when we see the first non-null input. */
- if (state == NULL)
- state = makeNumericAggState(fcinfo, true);
-
do_numeric_accum(state, newval);
}
@@ -2707,17 +2827,16 @@ int8_accum(PG_FUNCTION_ARGS)
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+ /* Create the state data on the first call */
+ if (state == NULL)
+ state = makeNumericAggState(fcinfo, true);
+
if (!PG_ARGISNULL(1))
{
Numeric newval;
newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric,
PG_GETARG_DATUM(1)));
-
- /* Create the state data when we see the first non-null input. */
- if (state == NULL)
- state = makeNumericAggState(fcinfo, true);
-
do_numeric_accum(state, newval);
}
@@ -2734,23 +2853,104 @@ int8_avg_accum(PG_FUNCTION_ARGS)
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+ /* Create the state data on the first call */
+ if (state == NULL)
+ state = makeNumericAggState(fcinfo, false);
+
if (!PG_ARGISNULL(1))
{
Numeric newval;
newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric,
PG_GETARG_DATUM(1)));
+ do_numeric_accum(state, newval);
+ }
- /* Create the state data when we see the first non-null input. */
- if (state == NULL)
- state = makeNumericAggState(fcinfo, false);
+ PG_RETURN_POINTER(state);
+}
- do_numeric_accum(state, newval);
+
+/*
+ * Inverse transition functions to go with the above.
+ */
+
+Datum
+int2_accum_inv(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+ /* Should not get here with no state */
+ if (state == NULL)
+ elog(ERROR, "int2_accum_inv called with NULL state");
+
+ if (!PG_ARGISNULL(1))
+ {
+ Numeric newval;
+
+ newval = DatumGetNumeric(DirectFunctionCall1(int2_numeric,
+ PG_GETARG_DATUM(1)));
+
+ /* Should never fail, all inputs have dscale 0 */
+ if (!do_numeric_discard(state, newval))
+ elog(ERROR, "do_numeric_discard failed unexpectedly");
}
PG_RETURN_POINTER(state);
}
+Datum
+int4_accum_inv(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+ /* Should not get here with no state */
+ if (state == NULL)
+ elog(ERROR, "int4_accum_inv called with NULL state");
+
+ if (!PG_ARGISNULL(1))
+ {
+ Numeric newval;
+
+ newval = DatumGetNumeric(DirectFunctionCall1(int4_numeric,
+ PG_GETARG_DATUM(1)));
+
+ /* Should never fail, all inputs have dscale 0 */
+ if (!do_numeric_discard(state, newval))
+ elog(ERROR, "do_numeric_discard failed unexpectedly");
+ }
+
+ PG_RETURN_POINTER(state);
+}
+
+Datum
+int8_accum_inv(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+ /* Should not get here with no state */
+ if (state == NULL)
+ elog(ERROR, "int8_accum_inv called with NULL state");
+
+ if (!PG_ARGISNULL(1))
+ {
+ Numeric newval;
+
+ newval = DatumGetNumeric(DirectFunctionCall1(int8_numeric,
+ PG_GETARG_DATUM(1)));
+
+ /* Should never fail, all inputs have dscale 0 */
+ if (!do_numeric_discard(state, newval))
+ elog(ERROR, "do_numeric_discard failed unexpectedly");
+ }
+
+ PG_RETURN_POINTER(state);
+}
Datum
numeric_avg(PG_FUNCTION_ARGS)
@@ -2760,10 +2960,12 @@ numeric_avg(PG_FUNCTION_ARGS)
Datum sumX_datum;
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
- if (state == NULL) /* there were no non-null inputs */
+
+ /* If there were no non-null inputs, return NULL */
+ if (state == NULL || (state->N + state->NaNcount) == 0)
PG_RETURN_NULL();
- if (state->isNaN) /* there was at least one NaN input */
+ if (state->NaNcount > 0) /* there was at least one NaN input */
PG_RETURN_NUMERIC(make_result(&const_nan));
N_datum = DirectFunctionCall1(int8_numeric, Int64GetDatum(state->N));
@@ -2778,10 +2980,12 @@ numeric_sum(PG_FUNCTION_ARGS)
NumericAggState *state;
state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
- if (state == NULL) /* there were no non-null inputs */
+
+ /* If there were no non-null inputs, return NULL */
+ if (state == NULL || (state->N + state->NaNcount) == 0)
PG_RETURN_NULL();
- if (state->isNaN) /* there was at least one NaN input */
+ if (state->NaNcount > 0) /* there was at least one NaN input */
PG_RETURN_NUMERIC(make_result(&const_nan));
PG_RETURN_NUMERIC(make_result(&(state->sumX)));
@@ -2812,7 +3016,7 @@ numeric_stddev_internal(NumericAggState *state,
int rscale;
/* Deal with empty input and NaN-input cases */
- if (state == NULL)
+ if (state == NULL || (state->N + state->NaNcount) == 0)
{
*is_null = true;
return NULL;
@@ -2820,7 +3024,7 @@ numeric_stddev_internal(NumericAggState *state,
*is_null = false;
- if (state->isNaN)
+ if (state->NaNcount > 0)
return make_result(&const_nan);
init_var(&vN);
@@ -2965,6 +3169,9 @@ numeric_stddev_pop(PG_FUNCTION_ARGS)
* data value into the transition data: it doesn't know how to do the type
* conversion. The upshot is that these routines have to be marked non-strict
* and handle substitution of the first non-null input themselves.
+ *
+ * Note: these functions are used only in plain aggregation mode.
+ * In moving-aggregate mode, we use intX_avg_accum and intX_avg_accum_inv.
*/
Datum
@@ -3107,6 +3314,10 @@ int8_sum(PG_FUNCTION_ARGS)
/*
* Routines for avg(int2) and avg(int4). The transition datatype
* is a two-element int8 array, holding count and sum.
+ *
+ * These functions are also used for sum(int2) and sum(int4) when
+ * operating in moving-aggregate mode, since for correct inverse transitions
+ * we need to count the inputs.
*/
typedef struct Int8TransTypeData
@@ -3172,6 +3383,62 @@ int4_avg_accum(PG_FUNCTION_ARGS)
}
Datum
+int2_avg_accum_inv(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray;
+ int16 newval = PG_GETARG_INT16(1);
+ Int8TransTypeData *transdata;
+
+ /*
+ * If we're invoked as an aggregate, we can cheat and modify our first
+ * parameter in-place to reduce palloc overhead. Otherwise we need to make
+ * a copy of it before scribbling on it.
+ */
+ if (AggCheckCallContext(fcinfo, NULL))
+ transarray = PG_GETARG_ARRAYTYPE_P(0);
+ else
+ transarray = PG_GETARG_ARRAYTYPE_P_COPY(0);
+
+ if (ARR_HASNULL(transarray) ||
+ ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData))
+ elog(ERROR, "expected 2-element int8 array");
+
+ transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray);
+ transdata->count--;
+ transdata->sum -= newval;
+
+ PG_RETURN_ARRAYTYPE_P(transarray);
+}
+
+Datum
+int4_avg_accum_inv(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray;
+ int32 newval = PG_GETARG_INT32(1);
+ Int8TransTypeData *transdata;
+
+ /*
+ * If we're invoked as an aggregate, we can cheat and modify our first
+ * parameter in-place to reduce palloc overhead. Otherwise we need to make
+ * a copy of it before scribbling on it.
+ */
+ if (AggCheckCallContext(fcinfo, NULL))
+ transarray = PG_GETARG_ARRAYTYPE_P(0);
+ else
+ transarray = PG_GETARG_ARRAYTYPE_P_COPY(0);
+
+ if (ARR_HASNULL(transarray) ||
+ ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData))
+ elog(ERROR, "expected 2-element int8 array");
+
+ transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray);
+ transdata->count--;
+ transdata->sum -= newval;
+
+ PG_RETURN_ARRAYTYPE_P(transarray);
+}
+
+Datum
int8_avg(PG_FUNCTION_ARGS)
{
ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
@@ -3196,6 +3463,28 @@ int8_avg(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(DirectFunctionCall2(numeric_div, sumd, countd));
}
+/*
+ * SUM(int2) and SUM(int4) both return int8, so we can use this
+ * final function for both.
+ */
+Datum
+int2int4_sum(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ Int8TransTypeData *transdata;
+
+ if (ARR_HASNULL(transarray) ||
+ ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData))
+ elog(ERROR, "expected 2-element int8 array");
+ transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray);
+
+ /* SQL defines SUM of no values to be NULL */
+ if (transdata->count == 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_DATUM(Int64GetDatumFast(transdata->sum));
+}
+
/* ----------------------------------------------------------------------
*