diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2018-02-07 00:06:50 -0500 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2018-02-07 00:06:56 -0500 |
commit | 0a459cec96d3856f476c2db298c6b52f592894e8 (patch) | |
tree | 3d10f137b48de039c46914fa8e854bd69daaaec1 /src/backend/utils/adt | |
parent | 23209457314f6fd89fcd251a8173b0129aaa95a2 (diff) | |
download | postgresql-0a459cec96d3856f476c2db298c6b52f592894e8.tar.gz postgresql-0a459cec96d3856f476c2db298c6b52f592894e8.zip |
Support all SQL:2011 options for window frame clauses.
This patch adds the ability to use "RANGE offset PRECEDING/FOLLOWING"
frame boundaries in window functions. We'd punted on that back in the
original patch to add window functions, because it was not clear how to
do it in a reasonably data-type-extensible fashion. That problem is
resolved here by adding the ability for btree operator classes to provide
an "in_range" support function that defines how to add or subtract the
RANGE offset value. Factoring it this way also allows the operator class
to avoid overflow problems near the ends of the datatype's range, if it
wishes to expend effort on that. (In the committed patch, the integer
opclasses handle that issue, but it did not seem worth the trouble to
avoid overflow failures for datetime types.)
The patch includes in_range support for the integer_ops opfamily
(int2/int4/int8) as well as the standard datetime types. Support for
other numeric types has been requested, but that seems like suitable
material for a follow-on patch.
In addition, the patch adds GROUPS mode which counts the offset in
ORDER-BY peer groups rather than rows, and it adds the frame_exclusion
options specified by SQL:2011. As far as I can see, we are now fully
up to spec on window framing options.
Existing behaviors remain unchanged, except that I changed the errcode
for a couple of existing error reports to meet the SQL spec's expectation
that negative "offset" values should be reported as SQLSTATE 22013.
Internally and in relevant parts of the documentation, we now consistently
use the terminology "offset PRECEDING/FOLLOWING" rather than "value
PRECEDING/FOLLOWING", since the term "value" is confusingly vague.
Oliver Ford, reviewed and whacked around some by me
Discussion: https://postgr.es/m/CAGMVOdu9sivPAxbNN0X+q19Sfv9edEPv=HibOJhB14TJv_RCQg@mail.gmail.com
Diffstat (limited to 'src/backend/utils/adt')
-rw-r--r-- | src/backend/utils/adt/date.c | 107 | ||||
-rw-r--r-- | src/backend/utils/adt/int.c | 152 | ||||
-rw-r--r-- | src/backend/utils/adt/int8.c | 42 | ||||
-rw-r--r-- | src/backend/utils/adt/ruleutils.c | 20 | ||||
-rw-r--r-- | src/backend/utils/adt/timestamp.c | 104 |
5 files changed, 418 insertions, 7 deletions
diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c index 747ef497897..eea29044146 100644 --- a/src/backend/utils/adt/date.c +++ b/src/backend/utils/adt/date.c @@ -1011,6 +1011,34 @@ timestamptz_cmp_date(PG_FUNCTION_ARGS) PG_RETURN_INT32(timestamptz_cmp_internal(dt1, dt2)); } +/* + * in_range support function for date. + * + * We implement this by promoting the dates to timestamp (without time zone) + * and then using the timestamp-and-interval in_range function. + */ +Datum +in_range_date_interval(PG_FUNCTION_ARGS) +{ + DateADT val = PG_GETARG_DATEADT(0); + DateADT base = PG_GETARG_DATEADT(1); + Interval *offset = PG_GETARG_INTERVAL_P(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + Timestamp valStamp; + Timestamp baseStamp; + + valStamp = date2timestamp(val); + baseStamp = date2timestamp(base); + + return DirectFunctionCall5(in_range_timestamp_interval, + TimestampGetDatum(valStamp), + TimestampGetDatum(baseStamp), + IntervalPGetDatum(offset), + BoolGetDatum(sub), + BoolGetDatum(less)); +} + /* Add an interval to a date, giving a new date. * Must handle both positive and negative intervals. @@ -1842,6 +1870,45 @@ time_mi_interval(PG_FUNCTION_ARGS) PG_RETURN_TIMEADT(result); } +/* + * in_range support function for time. + */ +Datum +in_range_time_interval(PG_FUNCTION_ARGS) +{ + TimeADT val = PG_GETARG_TIMEADT(0); + TimeADT base = PG_GETARG_TIMEADT(1); + Interval *offset = PG_GETARG_INTERVAL_P(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + TimeADT sum; + + /* + * Like time_pl_interval/time_mi_interval, we disregard the month and day + * fields of the offset. So our test for negative should too. + */ + if (offset->time < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + /* + * We can't use time_pl_interval/time_mi_interval here, because their + * wraparound behavior would give wrong (or at least undesirable) answers. + * Fortunately the equivalent non-wrapping behavior is trivial, especially + * since we don't worry about integer overflow. + */ + if (sub) + sum = base - offset->time; + else + sum = base + offset->time; + + if (less) + PG_RETURN_BOOL(val <= sum); + else + PG_RETURN_BOOL(val >= sum); +} + /* time_part() * Extract specified field from time type. @@ -2305,6 +2372,46 @@ timetz_mi_interval(PG_FUNCTION_ARGS) PG_RETURN_TIMETZADT_P(result); } +/* + * in_range support function for timetz. + */ +Datum +in_range_timetz_interval(PG_FUNCTION_ARGS) +{ + TimeTzADT *val = PG_GETARG_TIMETZADT_P(0); + TimeTzADT *base = PG_GETARG_TIMETZADT_P(1); + Interval *offset = PG_GETARG_INTERVAL_P(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + TimeTzADT sum; + + /* + * Like timetz_pl_interval/timetz_mi_interval, we disregard the month and + * day fields of the offset. So our test for negative should too. + */ + if (offset->time < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + /* + * We can't use timetz_pl_interval/timetz_mi_interval here, because their + * wraparound behavior would give wrong (or at least undesirable) answers. + * Fortunately the equivalent non-wrapping behavior is trivial, especially + * since we don't worry about integer overflow. + */ + if (sub) + sum.time = base->time - offset->time; + else + sum.time = base->time + offset->time; + sum.zone = base->zone; + + if (less) + PG_RETURN_BOOL(timetz_cmp_internal(val, &sum) <= 0); + else + PG_RETURN_BOOL(timetz_cmp_internal(val, &sum) >= 0); +} + /* overlaps_timetz() --- implements the SQL OVERLAPS operator. * * Algorithm is per SQL spec. This is much harder than you'd think diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c index 73529083655..559c365fecd 100644 --- a/src/backend/utils/adt/int.c +++ b/src/backend/utils/adt/int.c @@ -585,6 +585,158 @@ int42ge(PG_FUNCTION_ARGS) PG_RETURN_BOOL(arg1 >= arg2); } + +/*---------------------------------------------------------- + * in_range functions for int4 and int2, + * including cross-data-type comparisons. + * + * Note: we provide separate intN_int8 functions for performance + * reasons. This forces also providing intN_int2, else cases with a + * smallint offset value would fail to resolve which function to use. + * But that's an unlikely situation, so don't duplicate code for it. + *---------------------------------------------------------*/ + +Datum +in_range_int4_int4(PG_FUNCTION_ARGS) +{ + int32 val = PG_GETARG_INT32(0); + int32 base = PG_GETARG_INT32(1); + int32 offset = PG_GETARG_INT32(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + int32 sum; + + if (offset < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + if (sub) + offset = -offset; /* cannot overflow */ + + if (unlikely(pg_add_s32_overflow(base, offset, &sum))) + { + /* + * If sub is false, the true sum is surely more than val, so correct + * answer is the same as "less". If sub is true, the true sum is + * surely less than val, so the answer is "!less". + */ + PG_RETURN_BOOL(sub ? !less : less); + } + + if (less) + PG_RETURN_BOOL(val <= sum); + else + PG_RETURN_BOOL(val >= sum); +} + +Datum +in_range_int4_int2(PG_FUNCTION_ARGS) +{ + /* Doesn't seem worth duplicating code for, so just invoke int4_int4 */ + return DirectFunctionCall5(in_range_int4_int4, + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + Int32GetDatum((int32) PG_GETARG_INT16(2)), + PG_GETARG_DATUM(3), + PG_GETARG_DATUM(4)); +} + +Datum +in_range_int4_int8(PG_FUNCTION_ARGS) +{ + /* We must do all the math in int64 */ + int64 val = (int64) PG_GETARG_INT32(0); + int64 base = (int64) PG_GETARG_INT32(1); + int64 offset = PG_GETARG_INT64(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + int64 sum; + + if (offset < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + if (sub) + offset = -offset; /* cannot overflow */ + + if (unlikely(pg_add_s64_overflow(base, offset, &sum))) + { + /* + * If sub is false, the true sum is surely more than val, so correct + * answer is the same as "less". If sub is true, the true sum is + * surely less than val, so the answer is "!less". + */ + PG_RETURN_BOOL(sub ? !less : less); + } + + if (less) + PG_RETURN_BOOL(val <= sum); + else + PG_RETURN_BOOL(val >= sum); +} + +Datum +in_range_int2_int4(PG_FUNCTION_ARGS) +{ + /* We must do all the math in int32 */ + int32 val = (int32) PG_GETARG_INT16(0); + int32 base = (int32) PG_GETARG_INT16(1); + int32 offset = PG_GETARG_INT32(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + int32 sum; + + if (offset < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + if (sub) + offset = -offset; /* cannot overflow */ + + if (unlikely(pg_add_s32_overflow(base, offset, &sum))) + { + /* + * If sub is false, the true sum is surely more than val, so correct + * answer is the same as "less". If sub is true, the true sum is + * surely less than val, so the answer is "!less". + */ + PG_RETURN_BOOL(sub ? !less : less); + } + + if (less) + PG_RETURN_BOOL(val <= sum); + else + PG_RETURN_BOOL(val >= sum); +} + +Datum +in_range_int2_int2(PG_FUNCTION_ARGS) +{ + /* Doesn't seem worth duplicating code for, so just invoke int2_int4 */ + return DirectFunctionCall5(in_range_int2_int4, + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + Int32GetDatum((int32) PG_GETARG_INT16(2)), + PG_GETARG_DATUM(3), + PG_GETARG_DATUM(4)); +} + +Datum +in_range_int2_int8(PG_FUNCTION_ARGS) +{ + /* Doesn't seem worth duplicating code for, so just invoke int4_int8 */ + return DirectFunctionCall5(in_range_int4_int8, + Int32GetDatum((int32) PG_GETARG_INT16(0)), + Int32GetDatum((int32) PG_GETARG_INT16(1)), + PG_GETARG_DATUM(2), + PG_GETARG_DATUM(3), + PG_GETARG_DATUM(4)); +} + + /* * int[24]pl - returns arg1 + arg2 * int[24]mi - returns arg1 - arg2 diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c index ae6a4683d4d..e6bae6860da 100644 --- a/src/backend/utils/adt/int8.c +++ b/src/backend/utils/adt/int8.c @@ -14,7 +14,7 @@ #include "postgres.h" #include <ctype.h> -#include <float.h> /* for _isnan */ +#include <float.h> /* for _isnan */ #include <limits.h> #include <math.h> @@ -469,6 +469,46 @@ int28ge(PG_FUNCTION_ARGS) PG_RETURN_BOOL(val1 >= val2); } +/* + * in_range support function for int8. + * + * Note: we needn't supply int8_int4 or int8_int2 variants, as implicit + * coercion of the offset value takes care of those scenarios just as well. + */ +Datum +in_range_int8_int8(PG_FUNCTION_ARGS) +{ + int64 val = PG_GETARG_INT64(0); + int64 base = PG_GETARG_INT64(1); + int64 offset = PG_GETARG_INT64(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + int64 sum; + + if (offset < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + if (sub) + offset = -offset; /* cannot overflow */ + + if (unlikely(pg_add_s64_overflow(base, offset, &sum))) + { + /* + * If sub is false, the true sum is surely more than val, so correct + * answer is the same as "less". If sub is true, the true sum is + * surely less than val, so the answer is "!less". + */ + PG_RETURN_BOOL(sub ? !less : less); + } + + if (less) + PG_RETURN_BOOL(val <= sum); + else + PG_RETURN_BOOL(val >= sum); +} + /*---------------------------------------------------------- * Arithmetic operators on 64-bit integers. diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c index c5f5a1ca3f9..28767a129af 100644 --- a/src/backend/utils/adt/ruleutils.c +++ b/src/backend/utils/adt/ruleutils.c @@ -5877,6 +5877,8 @@ get_rule_windowspec(WindowClause *wc, List *targetList, appendStringInfoString(buf, "RANGE "); else if (wc->frameOptions & FRAMEOPTION_ROWS) appendStringInfoString(buf, "ROWS "); + else if (wc->frameOptions & FRAMEOPTION_GROUPS) + appendStringInfoString(buf, "GROUPS "); else Assert(false); if (wc->frameOptions & FRAMEOPTION_BETWEEN) @@ -5885,12 +5887,12 @@ get_rule_windowspec(WindowClause *wc, List *targetList, appendStringInfoString(buf, "UNBOUNDED PRECEDING "); else if (wc->frameOptions & FRAMEOPTION_START_CURRENT_ROW) appendStringInfoString(buf, "CURRENT ROW "); - else if (wc->frameOptions & FRAMEOPTION_START_VALUE) + else if (wc->frameOptions & FRAMEOPTION_START_OFFSET) { get_rule_expr(wc->startOffset, context, false); - if (wc->frameOptions & FRAMEOPTION_START_VALUE_PRECEDING) + if (wc->frameOptions & FRAMEOPTION_START_OFFSET_PRECEDING) appendStringInfoString(buf, " PRECEDING "); - else if (wc->frameOptions & FRAMEOPTION_START_VALUE_FOLLOWING) + else if (wc->frameOptions & FRAMEOPTION_START_OFFSET_FOLLOWING) appendStringInfoString(buf, " FOLLOWING "); else Assert(false); @@ -5904,12 +5906,12 @@ get_rule_windowspec(WindowClause *wc, List *targetList, appendStringInfoString(buf, "UNBOUNDED FOLLOWING "); else if (wc->frameOptions & FRAMEOPTION_END_CURRENT_ROW) appendStringInfoString(buf, "CURRENT ROW "); - else if (wc->frameOptions & FRAMEOPTION_END_VALUE) + else if (wc->frameOptions & FRAMEOPTION_END_OFFSET) { get_rule_expr(wc->endOffset, context, false); - if (wc->frameOptions & FRAMEOPTION_END_VALUE_PRECEDING) + if (wc->frameOptions & FRAMEOPTION_END_OFFSET_PRECEDING) appendStringInfoString(buf, " PRECEDING "); - else if (wc->frameOptions & FRAMEOPTION_END_VALUE_FOLLOWING) + else if (wc->frameOptions & FRAMEOPTION_END_OFFSET_FOLLOWING) appendStringInfoString(buf, " FOLLOWING "); else Assert(false); @@ -5917,6 +5919,12 @@ get_rule_windowspec(WindowClause *wc, List *targetList, else Assert(false); } + if (wc->frameOptions & FRAMEOPTION_EXCLUDE_CURRENT_ROW) + appendStringInfoString(buf, "EXCLUDE CURRENT ROW "); + else if (wc->frameOptions & FRAMEOPTION_EXCLUDE_GROUP) + appendStringInfoString(buf, "EXCLUDE GROUP "); + else if (wc->frameOptions & FRAMEOPTION_EXCLUDE_TIES) + appendStringInfoString(buf, "EXCLUDE TIES "); /* we will now have a trailing space; remove it */ buf->len--; } diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index e6a1eed191e..103f91ae624 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -3258,6 +3258,110 @@ interval_div(PG_FUNCTION_ARGS) PG_RETURN_INTERVAL_P(result); } + +/* + * in_range support functions for timestamps and intervals. + * + * Per SQL spec, we support these with interval as the offset type. + * The spec's restriction that the offset not be negative is a bit hard to + * decipher for intervals, but we choose to interpret it the same as our + * interval comparison operators would. + */ + +Datum +in_range_timestamptz_interval(PG_FUNCTION_ARGS) +{ + TimestampTz val = PG_GETARG_TIMESTAMPTZ(0); + TimestampTz base = PG_GETARG_TIMESTAMPTZ(1); + Interval *offset = PG_GETARG_INTERVAL_P(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + TimestampTz sum; + + if (int128_compare(interval_cmp_value(offset), int64_to_int128(0)) < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + /* We don't currently bother to avoid overflow hazards here */ + if (sub) + sum = DatumGetTimestampTz(DirectFunctionCall2(timestamptz_mi_interval, + TimestampTzGetDatum(base), + IntervalPGetDatum(offset))); + else + sum = DatumGetTimestampTz(DirectFunctionCall2(timestamptz_pl_interval, + TimestampTzGetDatum(base), + IntervalPGetDatum(offset))); + + if (less) + PG_RETURN_BOOL(val <= sum); + else + PG_RETURN_BOOL(val >= sum); +} + +Datum +in_range_timestamp_interval(PG_FUNCTION_ARGS) +{ + Timestamp val = PG_GETARG_TIMESTAMP(0); + Timestamp base = PG_GETARG_TIMESTAMP(1); + Interval *offset = PG_GETARG_INTERVAL_P(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + Timestamp sum; + + if (int128_compare(interval_cmp_value(offset), int64_to_int128(0)) < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + /* We don't currently bother to avoid overflow hazards here */ + if (sub) + sum = DatumGetTimestamp(DirectFunctionCall2(timestamp_mi_interval, + TimestampGetDatum(base), + IntervalPGetDatum(offset))); + else + sum = DatumGetTimestamp(DirectFunctionCall2(timestamp_pl_interval, + TimestampGetDatum(base), + IntervalPGetDatum(offset))); + + if (less) + PG_RETURN_BOOL(val <= sum); + else + PG_RETURN_BOOL(val >= sum); +} + +Datum +in_range_interval_interval(PG_FUNCTION_ARGS) +{ + Interval *val = PG_GETARG_INTERVAL_P(0); + Interval *base = PG_GETARG_INTERVAL_P(1); + Interval *offset = PG_GETARG_INTERVAL_P(2); + bool sub = PG_GETARG_BOOL(3); + bool less = PG_GETARG_BOOL(4); + Interval *sum; + + if (int128_compare(interval_cmp_value(offset), int64_to_int128(0)) < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE), + errmsg("invalid preceding or following size in window function"))); + + /* We don't currently bother to avoid overflow hazards here */ + if (sub) + sum = DatumGetIntervalP(DirectFunctionCall2(interval_mi, + IntervalPGetDatum(base), + IntervalPGetDatum(offset))); + else + sum = DatumGetIntervalP(DirectFunctionCall2(interval_pl, + IntervalPGetDatum(base), + IntervalPGetDatum(offset))); + + if (less) + PG_RETURN_BOOL(interval_cmp_internal(val, sum) <= 0); + else + PG_RETURN_BOOL(interval_cmp_internal(val, sum) >= 0); +} + + /* * interval_accum, interval_accum_inv, and interval_avg implement the * AVG(interval) aggregate. |