diff options
Diffstat (limited to 'src/backend/utils/adt/float.c')
-rw-r--r-- | src/backend/utils/adt/float.c | 732 |
1 files changed, 470 insertions, 262 deletions
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c index df35557b737..d1e12c14bed 100644 --- a/src/backend/utils/adt/float.c +++ b/src/backend/utils/adt/float.c @@ -2401,13 +2401,39 @@ setseed(PG_FUNCTION_ARGS) * float8_stddev_samp - produce final result for float STDDEV_SAMP() * float8_stddev_pop - produce final result for float STDDEV_POP() * + * The naive schoolbook implementation of these aggregates works by + * accumulating sum(X) and sum(X^2). However, this approach suffers from + * large rounding errors in the final computation of quantities like the + * population variance (N*sum(X^2) - sum(X)^2) / N^2, since each of the + * intermediate terms is potentially very large, while the difference is often + * quite small. + * + * Instead we use the Youngs-Cramer algorithm [1] which works by accumulating + * Sx=sum(X) and Sxx=sum((X-Sx/N)^2), using a numerically stable algorithm to + * incrementally update those quantities. The final computations of each of + * the aggregate values is then trivial and gives more accurate results (for + * example, the population variance is just Sxx/N). This algorithm is also + * fairly easy to generalize to allow parallel execution without loss of + * precision (see, for example, [2]). For more details, and a comparison of + * this with other algorithms, see [3]. + * * The transition datatype for all these aggregates is a 3-element array - * of float8, holding the values N, sum(X), sum(X*X) in that order. + * of float8, holding the values N, Sx, Sxx in that order. * * Note that we represent N as a float to avoid having to build a special * datatype. Given a reasonable floating-point implementation, there should * be no accuracy loss unless N exceeds 2 ^ 52 or so (by which time the * user will have doubtless lost interest anyway...) + * + * [1] Some Results Relevant to Choice of Sum and Sum-of-Product Algorithms, + * E. A. Youngs and E. M. Cramer, Technometrics Vol 13, No 3, August 1971. + * + * [2] Updating Formulae and a Pairwise Algorithm for Computing Sample + * Variances, T. F. Chan, G. H. Golub & R. J. LeVeque, COMPSTAT 1982. + * + * [3] Numerically Stable Parallel Computation of (Co-)Variance, Erich + * Schubert and Michael Gertz, Proceedings of the 30th International + * Conference on Scientific and Statistical Database Management, 2018. */ static float8 * @@ -2441,18 +2467,90 @@ float8_combine(PG_FUNCTION_ARGS) ArrayType *transarray2 = PG_GETARG_ARRAYTYPE_P(1); float8 *transvalues1; float8 *transvalues2; - - if (!AggCheckCallContext(fcinfo, NULL)) - elog(ERROR, "aggregate function called in non-aggregate context"); + float8 N1, + Sx1, + Sxx1, + N2, + Sx2, + Sxx2, + tmp, + N, + Sx, + Sxx; transvalues1 = check_float8_array(transarray1, "float8_combine", 3); transvalues2 = check_float8_array(transarray2, "float8_combine", 3); - transvalues1[0] = transvalues1[0] + transvalues2[0]; - transvalues1[1] = float8_pl(transvalues1[1], transvalues2[1]); - transvalues1[2] = float8_pl(transvalues1[2], transvalues2[2]); + N1 = transvalues1[0]; + Sx1 = transvalues1[1]; + Sxx1 = transvalues1[2]; + + N2 = transvalues2[0]; + Sx2 = transvalues2[1]; + Sxx2 = transvalues2[2]; + + /*-------------------- + * The transition values combine using a generalization of the + * Youngs-Cramer algorithm as follows: + * + * N = N1 + N2 + * Sx = Sx1 + Sx2 + * Sxx = Sxx1 + Sxx2 + N1 * N2 * (Sx1/N1 - Sx2/N2)^2 / N; + * + * It's worth handling the special cases N1 = 0 and N2 = 0 separately + * since those cases are trivial, and we then don't need to worry about + * division-by-zero errors in the general case. + *-------------------- + */ + if (N1 == 0.0) + { + N = N2; + Sx = Sx2; + Sxx = Sxx2; + } + else if (N2 == 0.0) + { + N = N1; + Sx = Sx1; + Sxx = Sxx1; + } + else + { + N = N1 + N2; + Sx = float8_pl(Sx1, Sx2); + tmp = Sx1 / N1 - Sx2 / N2; + Sxx = Sxx1 + Sxx2 + N1 * N2 * tmp * tmp / N; + check_float8_val(Sxx, isinf(Sxx1) || isinf(Sxx2), true); + } + + /* + * If we're invoked as an aggregate, we can cheat and modify our first + * parameter in-place to reduce palloc overhead. Otherwise we construct a + * new array with the updated transition data and return it. + */ + if (AggCheckCallContext(fcinfo, NULL)) + { + transvalues1[0] = N; + transvalues1[1] = Sx; + transvalues1[2] = Sxx; + + PG_RETURN_ARRAYTYPE_P(transarray1); + } + else + { + Datum transdatums[3]; + ArrayType *result; + + transdatums[0] = Float8GetDatumFast(N); + transdatums[1] = Float8GetDatumFast(Sx); + transdatums[2] = Float8GetDatumFast(Sxx); + + result = construct_array(transdatums, 3, + FLOAT8OID, + sizeof(float8), FLOAT8PASSBYVAL, 'd'); - PG_RETURN_ARRAYTYPE_P(transarray1); + PG_RETURN_ARRAYTYPE_P(result); + } } Datum @@ -2461,8 +2559,43 @@ float8_accum(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 newval = PG_GETARG_FLOAT8(1); float8 *transvalues; + float8 N, + Sx, + Sxx, + tmp; transvalues = check_float8_array(transarray, "float8_accum", 3); + N = transvalues[0]; + Sx = transvalues[1]; + Sxx = transvalues[2]; + + /* + * Use the Youngs-Cramer algorithm to incorporate the new value into the + * transition values. + */ + N += 1.0; + Sx += newval; + if (transvalues[0] > 0.0) + { + tmp = newval * N - Sx; + Sxx += tmp * tmp / (N * transvalues[0]); + + /* + * Overflow check. We only report an overflow error when finite + * inputs lead to infinite results. Note also that Sxx should be NaN + * if any of the inputs are infinite, so we intentionally prevent Sxx + * from becoming infinite. + */ + if (isinf(Sx) || isinf(Sxx)) + { + if (!isinf(transvalues[1]) && !isinf(newval)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value out of range: overflow"))); + + Sxx = get_float8_nan(); + } + } /* * If we're invoked as an aggregate, we can cheat and modify our first @@ -2471,9 +2604,9 @@ float8_accum(PG_FUNCTION_ARGS) */ if (AggCheckCallContext(fcinfo, NULL)) { - transvalues[0] = transvalues[0] + 1.0; - transvalues[1] = float8_pl(transvalues[1], newval); - transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval)); + transvalues[0] = N; + transvalues[1] = Sx; + transvalues[2] = Sxx; PG_RETURN_ARRAYTYPE_P(transarray); } @@ -2482,9 +2615,9 @@ float8_accum(PG_FUNCTION_ARGS) Datum transdatums[3]; ArrayType *result; - transvalues[0] = transvalues[0] + 1.0; - transvalues[1] = float8_pl(transvalues[1], newval); - transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval)); + transdatums[0] = Float8GetDatumFast(N); + transdatums[1] = Float8GetDatumFast(Sx); + transdatums[2] = Float8GetDatumFast(Sxx); result = construct_array(transdatums, 3, FLOAT8OID, @@ -2502,8 +2635,43 @@ float4_accum(PG_FUNCTION_ARGS) /* do computations as float8 */ float8 newval = PG_GETARG_FLOAT4(1); float8 *transvalues; + float8 N, + Sx, + Sxx, + tmp; transvalues = check_float8_array(transarray, "float4_accum", 3); + N = transvalues[0]; + Sx = transvalues[1]; + Sxx = transvalues[2]; + + /* + * Use the Youngs-Cramer algorithm to incorporate the new value into the + * transition values. + */ + N += 1.0; + Sx += newval; + if (transvalues[0] > 0.0) + { + tmp = newval * N - Sx; + Sxx += tmp * tmp / (N * transvalues[0]); + + /* + * Overflow check. We only report an overflow error when finite + * inputs lead to infinite results. Note also that Sxx should be NaN + * if any of the inputs are infinite, so we intentionally prevent Sxx + * from becoming infinite. + */ + if (isinf(Sx) || isinf(Sxx)) + { + if (!isinf(transvalues[1]) && !isinf(newval)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value out of range: overflow"))); + + Sxx = get_float8_nan(); + } + } /* * If we're invoked as an aggregate, we can cheat and modify our first @@ -2512,9 +2680,9 @@ float4_accum(PG_FUNCTION_ARGS) */ if (AggCheckCallContext(fcinfo, NULL)) { - transvalues[0] = transvalues[0] + 1.0; - transvalues[1] = float8_pl(transvalues[1], newval); - transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval)); + transvalues[0] = N; + transvalues[1] = Sx; + transvalues[2] = Sxx; PG_RETURN_ARRAYTYPE_P(transarray); } @@ -2523,9 +2691,9 @@ float4_accum(PG_FUNCTION_ARGS) Datum transdatums[3]; ArrayType *result; - transvalues[0] = transvalues[0] + 1.0; - transvalues[1] = float8_pl(transvalues[1], newval); - transvalues[2] = float8_pl(transvalues[2], float8_mul(newval, newval)); + transdatums[0] = Float8GetDatumFast(N); + transdatums[1] = Float8GetDatumFast(Sx); + transdatums[2] = Float8GetDatumFast(Sxx); result = construct_array(transdatums, 3, FLOAT8OID, @@ -2541,18 +2709,18 @@ float8_avg(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX; + Sx; transvalues = check_float8_array(transarray, "float8_avg", 3); N = transvalues[0]; - sumX = transvalues[1]; - /* ignore sumX2 */ + Sx = transvalues[1]; + /* ignore Sxx */ /* SQL defines AVG of no values to be NULL */ if (N == 0.0) PG_RETURN_NULL(); - PG_RETURN_FLOAT8(sumX / N); + PG_RETURN_FLOAT8(Sx / N); } Datum @@ -2561,27 +2729,20 @@ float8_var_pop(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - numerator; + Sxx; transvalues = check_float8_array(transarray, "float8_var_pop", 3); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; + /* ignore Sx */ + Sxx = transvalues[2]; /* Population variance is undefined when N is 0, so return NULL */ if (N == 0.0) PG_RETURN_NULL(); - numerator = N * sumX2 - sumX * sumX; - check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true); - - /* Watch out for roundoff error producing a negative numerator */ - if (numerator <= 0.0) - PG_RETURN_FLOAT8(0.0); + /* Note that Sxx is guaranteed to be non-negative */ - PG_RETURN_FLOAT8(numerator / (N * N)); + PG_RETURN_FLOAT8(Sxx / N); } Datum @@ -2590,27 +2751,20 @@ float8_var_samp(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - numerator; + Sxx; transvalues = check_float8_array(transarray, "float8_var_samp", 3); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; + /* ignore Sx */ + Sxx = transvalues[2]; /* Sample variance is undefined when N is 0 or 1, so return NULL */ if (N <= 1.0) PG_RETURN_NULL(); - numerator = N * sumX2 - sumX * sumX; - check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true); + /* Note that Sxx is guaranteed to be non-negative */ - /* Watch out for roundoff error producing a negative numerator */ - if (numerator <= 0.0) - PG_RETURN_FLOAT8(0.0); - - PG_RETURN_FLOAT8(numerator / (N * (N - 1.0))); + PG_RETURN_FLOAT8(Sxx / (N - 1.0)); } Datum @@ -2619,27 +2773,20 @@ float8_stddev_pop(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - numerator; + Sxx; transvalues = check_float8_array(transarray, "float8_stddev_pop", 3); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; + /* ignore Sx */ + Sxx = transvalues[2]; /* Population stddev is undefined when N is 0, so return NULL */ if (N == 0.0) PG_RETURN_NULL(); - numerator = N * sumX2 - sumX * sumX; - check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true); - - /* Watch out for roundoff error producing a negative numerator */ - if (numerator <= 0.0) - PG_RETURN_FLOAT8(0.0); + /* Note that Sxx is guaranteed to be non-negative */ - PG_RETURN_FLOAT8(sqrt(numerator / (N * N))); + PG_RETURN_FLOAT8(sqrt(Sxx / N)); } Datum @@ -2648,27 +2795,20 @@ float8_stddev_samp(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - numerator; + Sxx; transvalues = check_float8_array(transarray, "float8_stddev_samp", 3); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; + /* ignore Sx */ + Sxx = transvalues[2]; /* Sample stddev is undefined when N is 0 or 1, so return NULL */ if (N <= 1.0) PG_RETURN_NULL(); - numerator = N * sumX2 - sumX * sumX; - check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true); + /* Note that Sxx is guaranteed to be non-negative */ - /* Watch out for roundoff error producing a negative numerator */ - if (numerator <= 0.0) - PG_RETURN_FLOAT8(0.0); - - PG_RETURN_FLOAT8(sqrt(numerator / (N * (N - 1.0)))); + PG_RETURN_FLOAT8(sqrt(Sxx / (N - 1.0))); } /* @@ -2676,9 +2816,14 @@ float8_stddev_samp(PG_FUNCTION_ARGS) * SQL2003 BINARY AGGREGATES * ========================= * + * As with the preceding aggregates, we use the Youngs-Cramer algorithm to + * reduce rounding errors in the aggregate final functions. + * * The transition datatype for all these aggregates is a 6-element array of - * float8, holding the values N, sum(X), sum(X*X), sum(Y), sum(Y*Y), sum(X*Y) - * in that order. Note that Y is the first argument to the aggregates! + * float8, holding the values N, Sx=sum(X), Sxx=sum((X-Sx/N)^2), Sy=sum(Y), + * Syy=sum((Y-Sy/N)^2), Sxy=sum((X-Sx/N)*(Y-Sy/N)) in that order. + * + * Note that Y is the first argument to all these aggregates! * * It might seem attractive to optimize this by having multiple accumulator * functions that only calculate the sums actually needed. But on most @@ -2695,32 +2840,66 @@ float8_regr_accum(PG_FUNCTION_ARGS) float8 newvalX = PG_GETARG_FLOAT8(2); float8 *transvalues; float8 N, - sumX, - sumX2, - sumY, - sumY2, - sumXY; + Sx, + Sxx, + Sy, + Syy, + Sxy, + tmpX, + tmpY, + scale; transvalues = check_float8_array(transarray, "float8_regr_accum", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; - sumY = transvalues[3]; - sumY2 = transvalues[4]; - sumXY = transvalues[5]; + Sx = transvalues[1]; + Sxx = transvalues[2]; + Sy = transvalues[3]; + Syy = transvalues[4]; + Sxy = transvalues[5]; + /* + * Use the Youngs-Cramer algorithm to incorporate the new values into the + * transition values. + */ N += 1.0; - sumX += newvalX; - check_float8_val(sumX, isinf(transvalues[1]) || isinf(newvalX), true); - sumX2 += newvalX * newvalX; - check_float8_val(sumX2, isinf(transvalues[2]) || isinf(newvalX), true); - sumY += newvalY; - check_float8_val(sumY, isinf(transvalues[3]) || isinf(newvalY), true); - sumY2 += newvalY * newvalY; - check_float8_val(sumY2, isinf(transvalues[4]) || isinf(newvalY), true); - sumXY += newvalX * newvalY; - check_float8_val(sumXY, isinf(transvalues[5]) || isinf(newvalX) || - isinf(newvalY), true); + Sx += newvalX; + Sy += newvalY; + if (transvalues[0] > 0.0) + { + tmpX = newvalX * N - Sx; + tmpY = newvalY * N - Sy; + scale = 1.0 / (N * transvalues[0]); + Sxx += tmpX * tmpX * scale; + Syy += tmpY * tmpY * scale; + Sxy += tmpX * tmpY * scale; + + /* + * Overflow check. We only report an overflow error when finite + * inputs lead to infinite results. Note also that Sxx, Syy and Sxy + * should be NaN if any of the relevant inputs are infinite, so we + * intentionally prevent them from becoming infinite. + */ + if (isinf(Sx) || isinf(Sxx) || isinf(Sy) || isinf(Syy) || isinf(Sxy)) + { + if (((isinf(Sx) || isinf(Sxx)) && + !isinf(transvalues[1]) && !isinf(newvalX)) || + ((isinf(Sy) || isinf(Syy)) && + !isinf(transvalues[3]) && !isinf(newvalY)) || + (isinf(Sxy) && + !isinf(transvalues[1]) && !isinf(newvalX) && + !isinf(transvalues[3]) && !isinf(newvalY))) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("value out of range: overflow"))); + + if (isinf(Sxx)) + Sxx = get_float8_nan(); + if (isinf(Syy)) + Syy = get_float8_nan(); + if (isinf(Sxy)) + Sxy = get_float8_nan(); + } + } /* * If we're invoked as an aggregate, we can cheat and modify our first @@ -2730,11 +2909,11 @@ float8_regr_accum(PG_FUNCTION_ARGS) if (AggCheckCallContext(fcinfo, NULL)) { transvalues[0] = N; - transvalues[1] = sumX; - transvalues[2] = sumX2; - transvalues[3] = sumY; - transvalues[4] = sumY2; - transvalues[5] = sumXY; + transvalues[1] = Sx; + transvalues[2] = Sxx; + transvalues[3] = Sy; + transvalues[4] = Syy; + transvalues[5] = Sxy; PG_RETURN_ARRAYTYPE_P(transarray); } @@ -2744,11 +2923,11 @@ float8_regr_accum(PG_FUNCTION_ARGS) ArrayType *result; transdatums[0] = Float8GetDatumFast(N); - transdatums[1] = Float8GetDatumFast(sumX); - transdatums[2] = Float8GetDatumFast(sumX2); - transdatums[3] = Float8GetDatumFast(sumY); - transdatums[4] = Float8GetDatumFast(sumY2); - transdatums[5] = Float8GetDatumFast(sumXY); + transdatums[1] = Float8GetDatumFast(Sx); + transdatums[2] = Float8GetDatumFast(Sxx); + transdatums[3] = Float8GetDatumFast(Sy); + transdatums[4] = Float8GetDatumFast(Syy); + transdatums[5] = Float8GetDatumFast(Sxy); result = construct_array(transdatums, 6, FLOAT8OID, @@ -2773,21 +2952,127 @@ float8_regr_combine(PG_FUNCTION_ARGS) ArrayType *transarray2 = PG_GETARG_ARRAYTYPE_P(1); float8 *transvalues1; float8 *transvalues2; - - if (!AggCheckCallContext(fcinfo, NULL)) - elog(ERROR, "aggregate function called in non-aggregate context"); + float8 N1, + Sx1, + Sxx1, + Sy1, + Syy1, + Sxy1, + N2, + Sx2, + Sxx2, + Sy2, + Syy2, + Sxy2, + tmp1, + tmp2, + N, + Sx, + Sxx, + Sy, + Syy, + Sxy; transvalues1 = check_float8_array(transarray1, "float8_regr_combine", 6); transvalues2 = check_float8_array(transarray2, "float8_regr_combine", 6); - transvalues1[0] = transvalues1[0] + transvalues2[0]; - transvalues1[1] = float8_pl(transvalues1[1], transvalues2[1]); - transvalues1[2] = float8_pl(transvalues1[2], transvalues2[2]); - transvalues1[3] = float8_pl(transvalues1[3], transvalues2[3]); - transvalues1[4] = float8_pl(transvalues1[4], transvalues2[4]); - transvalues1[5] = float8_pl(transvalues1[5], transvalues2[5]); + N1 = transvalues1[0]; + Sx1 = transvalues1[1]; + Sxx1 = transvalues1[2]; + Sy1 = transvalues1[3]; + Syy1 = transvalues1[4]; + Sxy1 = transvalues1[5]; + + N2 = transvalues2[0]; + Sx2 = transvalues2[1]; + Sxx2 = transvalues2[2]; + Sy2 = transvalues2[3]; + Syy2 = transvalues2[4]; + Sxy2 = transvalues2[5]; + + /*-------------------- + * The transition values combine using a generalization of the + * Youngs-Cramer algorithm as follows: + * + * N = N1 + N2 + * Sx = Sx1 + Sx2 + * Sxx = Sxx1 + Sxx2 + N1 * N2 * (Sx1/N1 - Sx2/N2)^2 / N + * Sy = Sy1 + Sy2 + * Syy = Syy1 + Syy2 + N1 * N2 * (Sy1/N1 - Sy2/N2)^2 / N + * Sxy = Sxy1 + Sxy2 + N1 * N2 * (Sx1/N1 - Sx2/N2) * (Sy1/N1 - Sy2/N2) / N + * + * It's worth handling the special cases N1 = 0 and N2 = 0 separately + * since those cases are trivial, and we then don't need to worry about + * division-by-zero errors in the general case. + *-------------------- + */ + if (N1 == 0.0) + { + N = N2; + Sx = Sx2; + Sxx = Sxx2; + Sy = Sy2; + Syy = Syy2; + Sxy = Sxy2; + } + else if (N2 == 0.0) + { + N = N1; + Sx = Sx1; + Sxx = Sxx1; + Sy = Sy1; + Syy = Syy1; + Sxy = Sxy1; + } + else + { + N = N1 + N2; + Sx = float8_pl(Sx1, Sx2); + tmp1 = Sx1 / N1 - Sx2 / N2; + Sxx = Sxx1 + Sxx2 + N1 * N2 * tmp1 * tmp1 / N; + check_float8_val(Sxx, isinf(Sxx1) || isinf(Sxx2), true); + Sy = float8_pl(Sy1, Sy2); + tmp2 = Sy1 / N1 - Sy2 / N2; + Syy = Syy1 + Syy2 + N1 * N2 * tmp2 * tmp2 / N; + check_float8_val(Syy, isinf(Syy1) || isinf(Syy2), true); + Sxy = Sxy1 + Sxy2 + N1 * N2 * tmp1 * tmp2 / N; + check_float8_val(Sxy, isinf(Sxy1) || isinf(Sxy2), true); + } + + /* + * If we're invoked as an aggregate, we can cheat and modify our first + * parameter in-place to reduce palloc overhead. Otherwise we construct a + * new array with the updated transition data and return it. + */ + if (AggCheckCallContext(fcinfo, NULL)) + { + transvalues1[0] = N; + transvalues1[1] = Sx; + transvalues1[2] = Sxx; + transvalues1[3] = Sy; + transvalues1[4] = Syy; + transvalues1[5] = Sxy; + + PG_RETURN_ARRAYTYPE_P(transarray1); + } + else + { + Datum transdatums[6]; + ArrayType *result; + + transdatums[0] = Float8GetDatumFast(N); + transdatums[1] = Float8GetDatumFast(Sx); + transdatums[2] = Float8GetDatumFast(Sxx); + transdatums[3] = Float8GetDatumFast(Sy); + transdatums[4] = Float8GetDatumFast(Syy); + transdatums[5] = Float8GetDatumFast(Sxy); - PG_RETURN_ARRAYTYPE_P(transarray1); + result = construct_array(transdatums, 6, + FLOAT8OID, + sizeof(float8), FLOAT8PASSBYVAL, 'd'); + + PG_RETURN_ARRAYTYPE_P(result); + } } @@ -2797,27 +3082,19 @@ float8_regr_sxx(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - numerator; + Sxx; transvalues = check_float8_array(transarray, "float8_regr_sxx", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; + Sxx = transvalues[2]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - numerator = N * sumX2 - sumX * sumX; - check_float8_val(numerator, isinf(sumX2) || isinf(sumX), true); + /* Note that Sxx is guaranteed to be non-negative */ - /* Watch out for roundoff error producing a negative numerator */ - if (numerator <= 0.0) - PG_RETURN_FLOAT8(0.0); - - PG_RETURN_FLOAT8(numerator / N); + PG_RETURN_FLOAT8(Sxx); } Datum @@ -2826,27 +3103,19 @@ float8_regr_syy(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumY, - sumY2, - numerator; + Syy; transvalues = check_float8_array(transarray, "float8_regr_syy", 6); N = transvalues[0]; - sumY = transvalues[3]; - sumY2 = transvalues[4]; + Syy = transvalues[4]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - numerator = N * sumY2 - sumY * sumY; - check_float8_val(numerator, isinf(sumY2) || isinf(sumY), true); - - /* Watch out for roundoff error producing a negative numerator */ - if (numerator <= 0.0) - PG_RETURN_FLOAT8(0.0); + /* Note that Syy is guaranteed to be non-negative */ - PG_RETURN_FLOAT8(numerator / N); + PG_RETURN_FLOAT8(Syy); } Datum @@ -2855,28 +3124,19 @@ float8_regr_sxy(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumY, - sumXY, - numerator; + Sxy; transvalues = check_float8_array(transarray, "float8_regr_sxy", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumY = transvalues[3]; - sumXY = transvalues[5]; + Sxy = transvalues[5]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - numerator = N * sumXY - sumX * sumY; - check_float8_val(numerator, isinf(sumXY) || isinf(sumX) || - isinf(sumY), true); - /* A negative result is valid here */ - PG_RETURN_FLOAT8(numerator / N); + PG_RETURN_FLOAT8(Sxy); } Datum @@ -2885,17 +3145,17 @@ float8_regr_avgx(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX; + Sx; transvalues = check_float8_array(transarray, "float8_regr_avgx", 6); N = transvalues[0]; - sumX = transvalues[1]; + Sx = transvalues[1]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - PG_RETURN_FLOAT8(sumX / N); + PG_RETURN_FLOAT8(Sx / N); } Datum @@ -2904,17 +3164,17 @@ float8_regr_avgy(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumY; + Sy; transvalues = check_float8_array(transarray, "float8_regr_avgy", 6); N = transvalues[0]; - sumY = transvalues[3]; + Sy = transvalues[3]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - PG_RETURN_FLOAT8(sumY / N); + PG_RETURN_FLOAT8(Sy / N); } Datum @@ -2923,26 +3183,17 @@ float8_covar_pop(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumY, - sumXY, - numerator; + Sxy; transvalues = check_float8_array(transarray, "float8_covar_pop", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumY = transvalues[3]; - sumXY = transvalues[5]; + Sxy = transvalues[5]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - numerator = N * sumXY - sumX * sumY; - check_float8_val(numerator, isinf(sumXY) || isinf(sumX) || - isinf(sumY), true); - - PG_RETURN_FLOAT8(numerator / (N * N)); + PG_RETURN_FLOAT8(Sxy / N); } Datum @@ -2951,26 +3202,17 @@ float8_covar_samp(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumY, - sumXY, - numerator; + Sxy; transvalues = check_float8_array(transarray, "float8_covar_samp", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumY = transvalues[3]; - sumXY = transvalues[5]; + Sxy = transvalues[5]; /* if N is <= 1 we should return NULL */ if (N < 2.0) PG_RETURN_NULL(); - numerator = N * sumXY - sumX * sumY; - check_float8_val(numerator, isinf(sumXY) || isinf(sumX) || - isinf(sumY), true); - - PG_RETURN_FLOAT8(numerator / (N * (N - 1.0))); + PG_RETURN_FLOAT8(Sxy / (N - 1.0)); } Datum @@ -2979,38 +3221,27 @@ float8_corr(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - sumY, - sumY2, - sumXY, - numeratorX, - numeratorY, - numeratorXY; + Sxx, + Syy, + Sxy; transvalues = check_float8_array(transarray, "float8_corr", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; - sumY = transvalues[3]; - sumY2 = transvalues[4]; - sumXY = transvalues[5]; + Sxx = transvalues[2]; + Syy = transvalues[4]; + Sxy = transvalues[5]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - numeratorX = N * sumX2 - sumX * sumX; - check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true); - numeratorY = N * sumY2 - sumY * sumY; - check_float8_val(numeratorY, isinf(sumY2) || isinf(sumY), true); - numeratorXY = N * sumXY - sumX * sumY; - check_float8_val(numeratorXY, isinf(sumXY) || isinf(sumX) || - isinf(sumY), true); - if (numeratorX <= 0 || numeratorY <= 0) + /* Note that Sxx and Syy are guaranteed to be non-negative */ + + /* per spec, return NULL for horizontal and vertical lines */ + if (Sxx == 0 || Syy == 0) PG_RETURN_NULL(); - PG_RETURN_FLOAT8(numeratorXY / sqrt(numeratorX * numeratorY)); + PG_RETURN_FLOAT8(Sxy / sqrt(Sxx * Syy)); } Datum @@ -3019,42 +3250,31 @@ float8_regr_r2(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - sumY, - sumY2, - sumXY, - numeratorX, - numeratorY, - numeratorXY; + Sxx, + Syy, + Sxy; transvalues = check_float8_array(transarray, "float8_regr_r2", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; - sumY = transvalues[3]; - sumY2 = transvalues[4]; - sumXY = transvalues[5]; + Sxx = transvalues[2]; + Syy = transvalues[4]; + Sxy = transvalues[5]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - numeratorX = N * sumX2 - sumX * sumX; - check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true); - numeratorY = N * sumY2 - sumY * sumY; - check_float8_val(numeratorY, isinf(sumY2) || isinf(sumY), true); - numeratorXY = N * sumXY - sumX * sumY; - check_float8_val(numeratorXY, isinf(sumXY) || isinf(sumX) || - isinf(sumY), true); - if (numeratorX <= 0) + /* Note that Sxx and Syy are guaranteed to be non-negative */ + + /* per spec, return NULL for a vertical line */ + if (Sxx == 0) PG_RETURN_NULL(); - /* per spec, horizontal line produces 1.0 */ - if (numeratorY <= 0) + + /* per spec, return 1.0 for a horizontal line */ + if (Syy == 0) PG_RETURN_FLOAT8(1.0); - PG_RETURN_FLOAT8((numeratorXY * numeratorXY) / - (numeratorX * numeratorY)); + PG_RETURN_FLOAT8((Sxy * Sxy) / (Sxx * Syy)); } Datum @@ -3063,33 +3283,25 @@ float8_regr_slope(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - sumY, - sumXY, - numeratorX, - numeratorXY; + Sxx, + Sxy; transvalues = check_float8_array(transarray, "float8_regr_slope", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; - sumY = transvalues[3]; - sumXY = transvalues[5]; + Sxx = transvalues[2]; + Sxy = transvalues[5]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - numeratorX = N * sumX2 - sumX * sumX; - check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true); - numeratorXY = N * sumXY - sumX * sumY; - check_float8_val(numeratorXY, isinf(sumXY) || isinf(sumX) || - isinf(sumY), true); - if (numeratorX <= 0) + /* Note that Sxx is guaranteed to be non-negative */ + + /* per spec, return NULL for a vertical line */ + if (Sxx == 0) PG_RETURN_NULL(); - PG_RETURN_FLOAT8(numeratorXY / numeratorX); + PG_RETURN_FLOAT8(Sxy / Sxx); } Datum @@ -3098,33 +3310,29 @@ float8_regr_intercept(PG_FUNCTION_ARGS) ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0); float8 *transvalues; float8 N, - sumX, - sumX2, - sumY, - sumXY, - numeratorX, - numeratorXXY; + Sx, + Sxx, + Sy, + Sxy; transvalues = check_float8_array(transarray, "float8_regr_intercept", 6); N = transvalues[0]; - sumX = transvalues[1]; - sumX2 = transvalues[2]; - sumY = transvalues[3]; - sumXY = transvalues[5]; + Sx = transvalues[1]; + Sxx = transvalues[2]; + Sy = transvalues[3]; + Sxy = transvalues[5]; /* if N is 0 we should return NULL */ if (N < 1.0) PG_RETURN_NULL(); - numeratorX = N * sumX2 - sumX * sumX; - check_float8_val(numeratorX, isinf(sumX2) || isinf(sumX), true); - numeratorXXY = sumY * sumX2 - sumX * sumXY; - check_float8_val(numeratorXXY, isinf(sumY) || isinf(sumX2) || - isinf(sumX) || isinf(sumXY), true); - if (numeratorX <= 0) + /* Note that Sxx is guaranteed to be non-negative */ + + /* per spec, return NULL for a vertical line */ + if (Sxx == 0) PG_RETURN_NULL(); - PG_RETURN_FLOAT8(numeratorXXY / numeratorX); + PG_RETURN_FLOAT8((Sy - Sx * Sxy / Sxx) / N); } |