Support all SQL:2011 options for window frame clauses.

This patch adds the ability to use "RANGE offset PRECEDING/FOLLOWING" frame boundaries in window functions. We'd punted on that back in the original patch to add window functions, because it was not clear how to do it in a reasonably data-type-extensible fashion. That problem is resolved here by adding the ability for btree operator classes to provide an "in_range" support function that defines how to add or subtract the RANGE offset value. Factoring it this way also allows the operator class to avoid overflow problems near the ends of the datatype's range, if it wishes to expend effort on that. (In the committed patch, the integer opclasses handle that issue, but it did not seem worth the trouble to avoid overflow failures for datetime types.) The patch includes in_range support for the integer_ops opfamily (int2/int4/int8) as well as the standard datetime types. Support for other numeric types has been requested, but that seems like suitable material for a follow-on patch. In addition, the patch adds GROUPS mode which counts the offset in ORDER-BY peer groups rather than rows, and it adds the frame_exclusion options specified by SQL:2011. As far as I can see, we are now fully up to spec on window framing options. Existing behaviors remain unchanged, except that I changed the errcode for a couple of existing error reports to meet the SQL spec's expectation that negative "offset" values should be reported as SQLSTATE 22013. Internally and in relevant parts of the documentation, we now consistently use the terminology "offset PRECEDING/FOLLOWING" rather than "value PRECEDING/FOLLOWING", since the term "value" is confusingly vague. Oliver Ford, reviewed and whacked around some by me Discussion: https://postgr.es/m/CAGMVOdu9sivPAxbNN0X+q19Sfv9edEPv=HibOJhB14TJv_RCQg@mail.gmail.com
author: Tom Lane <tgl@sss.pgh.pa.us> 2018-02-07 00:06:50 -0500
committer: Tom Lane <tgl@sss.pgh.pa.us> 2018-02-07 00:06:56 -0500
commit: 0a459cec96d3856f476c2db298c6b52f592894e8 (patch)
tree: 3d10f137b48de039c46914fa8e854bd69daaaec1 /src/backend/utils/adt
parent: 23209457314f6fd89fcd251a8173b0129aaa95a2 (diff)
download: postgresql-0a459cec96d3856f476c2db298c6b52f592894e8.tar.gz
postgresql-0a459cec96d3856f476c2db298c6b52f592894e8.zip
5 files changed, 418 insertions, 7 deletions
diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
index 747ef497897..eea29044146 100644
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -1011,6 +1011,34 @@ timestamptz_cmp_date(PG_FUNCTION_ARGS)
 	PG_RETURN_INT32(timestamptz_cmp_internal(dt1, dt2));
 }
 
+/*
+ * in_range support function for date.
+ *
+ * We implement this by promoting the dates to timestamp (without time zone)
+ * and then using the timestamp-and-interval in_range function.
+ */
+Datum
+in_range_date_interval(PG_FUNCTION_ARGS)
+{
+	DateADT		val = PG_GETARG_DATEADT(0);
+	DateADT		base = PG_GETARG_DATEADT(1);
+	Interval   *offset = PG_GETARG_INTERVAL_P(2);
+	bool		sub = PG_GETARG_BOOL(3);
+	bool		less = PG_GETARG_BOOL(4);
+	Timestamp	valStamp;
+	Timestamp	baseStamp;
+
+	valStamp = date2timestamp(val);
+	baseStamp = date2timestamp(base);
+
+	return DirectFunctionCall5(in_range_timestamp_interval,
+							   TimestampGetDatum(valStamp),
+							   TimestampGetDatum(baseStamp),
+							   IntervalPGetDatum(offset),
+							   BoolGetDatum(sub),
+							   BoolGetDatum(less));
+}
+
 
 /* Add an interval to a date, giving a new date.
  * Must handle both positive and negative intervals.
@@ -1842,6 +1870,45 @@ time_mi_interval(PG_FUNCTION_ARGS)
 	PG_RETURN_TIMEADT(result);
 }
 
+/*
+ * in_range support function for time.
+ */
+Datum
+in_range_time_interval(PG_FUNCTION_ARGS)
+{
+	TimeADT		val = PG_GETARG_TIMEADT(0);
+	TimeADT		base = PG_GETARG_TIMEADT(1);
+	Interval   *offset = PG_GETARG_INTERVAL_P(2);
+	bool		sub = PG_GETARG_BOOL(3);
+	bool		less = PG_GETARG_BOOL(4);
+	TimeADT		sum;
+
+	/*
+	 * Like time_pl_interval/time_mi_interval, we disregard the month and day
+	 * fields of the offset.  So our test for negative should too.
+	 */
+	if (offset->time < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE),
+				 errmsg("invalid preceding or following size in window function")));
+
+	/*
+	 * We can't use time_pl_interval/time_mi_interval here, because their
+	 * wraparound behavior would give wrong (or at least undesirable) answers.
+	 * Fortunately the equivalent non-wrapping behavior is trivial, especially
+	 * since we don't worry about integer overflow.
+	 */
+	if (sub)
+		sum = base - offset->time;
+	else
+		sum = base + offset->time;
+
+	if (less)
+		PG_RETURN_BOOL(val <= sum);
+	else
+		PG_RETURN_BOOL(val >= sum);
+}
+
 
 /* time_part()
  * Extract specified field from time type.
@@ -2305,6 +2372,46 @@ timetz_mi_interval(PG_FUNCTION_ARGS)
 	PG_RETURN_TIMETZADT_P(result);
 }
 
+/*
+ * in_range support function for timetz.
+ */
+Datum
+in_range_timetz_interval(PG_FUNCTION_ARGS)
+{
+	TimeTzADT  *val = PG_GETARG_TIMETZADT_P(0);
+	TimeTzADT  *base = PG_GETARG_TIMETZADT_P(1);
+	Interval   *offset = PG_GETARG_INTERVAL_P(2);
+	bool		sub = PG_GETARG_BOOL(3);
+	bool		less = PG_GETARG_BOOL(4);
+	TimeTzADT	sum;
+
+	/*
+	 * Like timetz_pl_interval/timetz_mi_interval, we disregard the month and
+	 * day fields of the offset.  So our test for negative should too.
+	 */
+	if (offset->time < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE),
+				 errmsg("invalid preceding or following size in window function")));
+
+	/*
+	 * We can't use timetz_pl_interval/timetz_mi_interval here, because their
+	 * wraparound behavior would give wrong (or at least undesirable) answers.
+	 * Fortunately the equivalent non-wrapping behavior is trivial, especially
+	 * since we don't worry about integer overflow.
+	 */
+	if (sub)
+		sum.time = base->time - offset->time;
+	else
+		sum.time = base->time + offset->time;
+	sum.zone = base->zone;
+
+	if (less)
+		PG_RETURN_BOOL(timetz_cmp_internal(val, &sum) <= 0);
+	else
+		PG_RETURN_BOOL(timetz_cmp_internal(val, &sum) >= 0);
+}
+
 /* overlaps_timetz() --- implements the SQL OVERLAPS operator.
  *
  * Algorithm is per SQL spec.  This is much harder than you'd think
diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c
index 73529083655..559c365fecd 100644
--- a/src/backend/utils/adt/int.c
+++ b/src/backend/utils/adt/int.c
@@ -585,6 +585,158 @@ int42ge(PG_FUNCTION_ARGS)
 	PG_RETURN_BOOL(arg1 >= arg2);
 }
 
+
+/*----------------------------------------------------------
+ *	in_range functions for int4 and int2,
+ *	including cross-data-type comparisons.
+ *
+ *	Note: we provide separate intN_int8 functions for performance
+ *	reasons.  This forces also providing intN_int2, else cases with a
+ *	smallint offset value would fail to resolve which function to use.
+ *	But that's an unlikely situation, so don't duplicate code for it.
+ *---------------------------------------------------------*/
+
+Datum
+in_range_int4_int4(PG_FUNCTION_ARGS)
+{
+	int32		val = PG_GETARG_INT32(0);
+	int32		base = PG_GETARG_INT32(1);
+	int32		offset = PG_GETARG_INT32(2);
+	bool		sub = PG_GETARG_BOOL(3);
+	bool		less = PG_GETARG_BOOL(4);
+	int32		sum;
+
+	if (offset < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE),
+				 errmsg("invalid preceding or following size in window function")));
+
+	if (sub)
+		offset = -offset;		/* cannot overflow */
+
+	if (unlikely(pg_add_s32_overflow(base, offset, &sum)))
+	{
+		/*
+		 * If sub is false, the true sum is surely more than val, so correct
+		 * answer is the same as "less".  If sub is true, the true sum is
+		 * surely less than val, so the answer is "!less".
+		 */
+		PG_RETURN_BOOL(sub ? !less : less);
+	}
+
+	if (less)
+		PG_RETURN_BOOL(val <= sum);
+	else
+		PG_RETURN_BOOL(val >= sum);
+}
+
+Datum
+in_range_int4_int2(PG_FUNCTION_ARGS)
+{
+	/* Doesn't seem worth duplicating code for, so just invoke int4_int4 */
+	return DirectFunctionCall5(in_range_int4_int4,
+							   PG_GETARG_DATUM(0),
+							   PG_GETARG_DATUM(1),
+							   Int32GetDatum((int32) PG_GETARG_INT16(2)),
+							   PG_GETARG_DATUM(3),
+							   PG_GETARG_DATUM(4));
+}
+
+Datum
+in_range_int4_int8(PG_FUNCTION_ARGS)
+{
+	/* We must do all the math in int64 */
+	int64		val = (int64) PG_GETARG_INT32(0);
+	int64		base = (int64) PG_GETARG_INT32(1);
+	int64		offset = PG_GETARG_INT64(2);
+	bool		sub = PG_GETARG_BOOL(3);
+	bool		less = PG_GETARG_BOOL(4);
+	int64		sum;
+
+	if (offset < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE),
+				 errmsg("invalid preceding or following size in window function")));
+
+	if (sub)
+		offset = -offset;		/* cannot overflow */
+
+	if (unlikely(pg_add_s64_overflow(base, offset, &sum)))
+	{
+		/*
+		 * If sub is false, the true sum is surely more than val, so correct
+		 * answer is the same as "less".  If sub is true, the true sum is
+		 * surely less than val, so the answer is "!less".
+		 */
+		PG_RETURN_BOOL(sub ? !less : less);
+	}
+
+	if (less)
+		PG_RETURN_BOOL(val <= sum);
+	else
+		PG_RETURN_BOOL(val >= sum);
+}
+
+Datum
+in_range_int2_int4(PG_FUNCTION_ARGS)
+{
+	/* We must do all the math in int32 */
+	int32		val = (int32) PG_GETARG_INT16(0);
+	int32		base = (int32) PG_GETARG_INT16(1);
+	int32		offset = PG_GETARG_INT32(2);
+	bool		sub = PG_GETARG_BOOL(3);
+	bool		less = PG_GETARG_BOOL(4);
+	int32		sum;
+
+	if (offset < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE),
+				 errmsg("invalid preceding or following size in window function")));
+
+	if (sub)
+		offset = -offset;		/* cannot overflow */
+
+	if (unlikely(pg_add_s32_overflow(base, offset, &sum)))
+	{
+		/*
+		 * If sub is false, the true sum is surely more than val, so correct
+		 * answer is the same as "less".  If sub is true, the true sum is
+		 * surely less than val, so the answer is "!less".
+		 */
+		PG_RETURN_BOOL(sub ? !less : less);
+	}
+
+	if (less)
+		PG_RETURN_BOOL(val <= sum);
+	else
+		PG_RETURN_BOOL(val >= sum);
+}
+
+Datum
+in_range_int2_int2(PG_FUNCTION_ARGS)
+{
+	/* Doesn't seem worth duplicating code for, so just invoke int2_int4 */
+	return DirectFunctionCall5(in_range_int2_int4,
+							   PG_GETARG_DATUM(0),
+							   PG_GETARG_DATUM(1),
+							   Int32GetDatum((int32) PG_GETARG_INT16(2)),
+							   PG_GETARG_DATUM(3),
+							   PG_GETARG_DATUM(4));
+}
+
+Datum
+in_range_int2_int8(PG_FUNCTION_ARGS)
+{
+	/* Doesn't seem worth duplicating code for, so just invoke int4_int8 */
+	return DirectFunctionCall5(in_range_int4_int8,
+							   Int32GetDatum((int32) PG_GETARG_INT16(0)),
+							   Int32GetDatum((int32) PG_GETARG_INT16(1)),
+							   PG_GETARG_DATUM(2),
+							   PG_GETARG_DATUM(3),
+							   PG_GETARG_DATUM(4));
+}
+
+
 /*
  *		int[24]pl		- returns arg1 + arg2
  *		int[24]mi		- returns arg1 - arg2
diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c
index ae6a4683d4d..e6bae6860da 100644
--- a/src/backend/utils/adt/int8.c
+++ b/src/backend/utils/adt/int8.c
@@ -14,7 +14,7 @@
 #include "postgres.h"
 
 #include <ctype.h>
-#include <float.h> /* for _isnan */
+#include <float.h>				/* for _isnan */
 #include <limits.h>
 #include <math.h>
 
@@ -469,6 +469,46 @@ int28ge(PG_FUNCTION_ARGS)
 	PG_RETURN_BOOL(val1 >= val2);
 }
 
+/*
+ * in_range support function for int8.
+ *
+ * Note: we needn't supply int8_int4 or int8_int2 variants, as implicit
+ * coercion of the offset value takes care of those scenarios just as well.
+ */
+Datum
+in_range_int8_int8(PG_FUNCTION_ARGS)
+{
+	int64		val = PG_GETARG_INT64(0);
+	int64		base = PG_GETARG_INT64(1);
+	int64		offset = PG_GETARG_INT64(2);
+	bool		sub = PG_GETARG_BOOL(3);
+	bool		less = PG_GETARG_BOOL(4);
+	int64		sum;
+
+	if (offset < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE),
+				 errmsg("invalid preceding or following size in window function")));
+
+	if (sub)
+		offset = -offset;		/* cannot overflow */
+
+	if (unlikely(pg_add_s64_overflow(base, offset, &sum)))
+	{
+		/*
+		 * If sub is false, the true sum is surely more than val, so correct
+		 * answer is the same as "less".  If sub is true, the true sum is
+		 * surely less than val, so the answer is "!less".
+		 */
+		PG_RETURN_BOOL(sub ? !less : less);
+	}
+
+	if (less)
+		PG_RETURN_BOOL(val <= sum);
+	else
+		PG_RETURN_BOOL(val >= sum);
+}
+
 
 /*----------------------------------------------------------
  *	Arithmetic operators on 64-bit integers.
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
index c5f5a1ca3f9..28767a129af 100644
--- a/src/backend/utils/adt/ruleutils.c
+++ b/src/backend/utils/adt/ruleutils.c
@@ -5877,6 +5877,8 @@ get_rule_windowspec(WindowClause *wc, List *targetList,
 			appendStringInfoString(buf, "RANGE ");
 		else if (wc->frameOptions & FRAMEOPTION_ROWS)
 			appendStringInfoString(buf, "ROWS ");
+		else if (wc->frameOptions & FRAMEOPTION_GROUPS)
+			appendStringInfoString(buf, "GROUPS ");
 		else
 			Assert(false);
 		if (wc->frameOptions & FRAMEOPTION_BETWEEN)
@@ -5885,12 +5887,12 @@ get_rule_windowspec(WindowClause *wc, List *targetList,
 			appendStringInfoString(buf, "UNBOUNDED PRECEDING ");
 		else if (wc->frameOptions & FRAMEOPTION_START_CURRENT_ROW)
 			appendStringInfoString(buf, "CURRENT ROW ");
-		else if (wc->frameOptions & FRAMEOPTION_START_VALUE)
+		else if (wc->frameOptions & FRAMEOPTION_START_OFFSET)
 		{
 			get_rule_expr(wc->startOffset, context, false);
-			if (wc->frameOptions & FRAMEOPTION_START_VALUE_PRECEDING)
+			if (wc->frameOptions & FRAMEOPTION_START_OFFSET_PRECEDING)
 				appendStringInfoString(buf, " PRECEDING ");
-			else if (wc->frameOptions & FRAMEOPTION_START_VALUE_FOLLOWING)
+			else if (wc->frameOptions & FRAMEOPTION_START_OFFSET_FOLLOWING)
 				appendStringInfoString(buf, " FOLLOWING ");
 			else
 				Assert(false);
@@ -5904,12 +5906,12 @@ get_rule_windowspec(WindowClause *wc, List *targetList,
 				appendStringInfoString(buf, "UNBOUNDED FOLLOWING ");
 			else if (wc->frameOptions & FRAMEOPTION_END_CURRENT_ROW)
 				appendStringInfoString(buf, "CURRENT ROW ");
-			else if (wc->frameOptions & FRAMEOPTION_END_VALUE)
+			else if (wc->frameOptions & FRAMEOPTION_END_OFFSET)
 			{
 				get_rule_expr(wc->endOffset, context, false);
-				if (wc->frameOptions & FRAMEOPTION_END_VALUE_PRECEDING)
+				if (wc->frameOptions & FRAMEOPTION_END_OFFSET_PRECEDING)
 					appendStringInfoString(buf, " PRECEDING ");
-				else if (wc->frameOptions & FRAMEOPTION_END_VALUE_FOLLOWING)
+				else if (wc->frameOptions & FRAMEOPTION_END_OFFSET_FOLLOWING)
 					appendStringInfoString(buf, " FOLLOWING ");
 				else
 					Assert(false);
@@ -5917,6 +5919,12 @@ get_rule_windowspec(WindowClause *wc, List *targetList,
 			else
 				Assert(false);
 		}
+		if (wc->frameOptions & FRAMEOPTION_EXCLUDE_CURRENT_ROW)
+			appendStringInfoString(buf, "EXCLUDE CURRENT ROW ");
+		else if (wc->frameOptions & FRAMEOPTION_EXCLUDE_GROUP)
+			appendStringInfoString(buf, "EXCLUDE GROUP ");
+		else if (wc->frameOptions & FRAMEOPTION_EXCLUDE_TIES)
+			appendStringInfoString(buf, "EXCLUDE TIES ");
 		/* we will now have a trailing space; remove it */
 		buf->len--;
 	}
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index e6a1eed191e..103f91ae624 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -3258,6 +3258,110 @@ interval_div(PG_FUNCTION_ARGS)
 	PG_RETURN_INTERVAL_P(result);
 }
 
+
+/*
+ * in_range support functions for timestamps and intervals.
+ *
+ * Per SQL spec, we support these with interval as the offset type.
+ * The spec's restriction that the offset not be negative is a bit hard to
+ * decipher for intervals, but we choose to interpret it the same as our
+ * interval comparison operators would.
+ */
+
+Datum
+in_range_timestamptz_interval(PG_FUNCTION_ARGS)
+{
+	TimestampTz val = PG_GETARG_TIMESTAMPTZ(0);
+	TimestampTz base = PG_GETARG_TIMESTAMPTZ(1);
+	Interval   *offset = PG_GETARG_INTERVAL_P(2);
+	bool		sub = PG_GETARG_BOOL(3);
+	bool		less = PG_GETARG_BOOL(4);
+	TimestampTz sum;
+
+	if (int128_compare(interval_cmp_value(offset), int64_to_int128(0)) < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE),
+				 errmsg("invalid preceding or following size in window function")));
+
+	/* We don't currently bother to avoid overflow hazards here */
+	if (sub)
+		sum = DatumGetTimestampTz(DirectFunctionCall2(timestamptz_mi_interval,
+													  TimestampTzGetDatum(base),
+													  IntervalPGetDatum(offset)));
+	else
+		sum = DatumGetTimestampTz(DirectFunctionCall2(timestamptz_pl_interval,
+													  TimestampTzGetDatum(base),
+													  IntervalPGetDatum(offset)));
+
+	if (less)
+		PG_RETURN_BOOL(val <= sum);
+	else
+		PG_RETURN_BOOL(val >= sum);
+}
+
+Datum
+in_range_timestamp_interval(PG_FUNCTION_ARGS)
+{
+	Timestamp	val = PG_GETARG_TIMESTAMP(0);
+	Timestamp	base = PG_GETARG_TIMESTAMP(1);
+	Interval   *offset = PG_GETARG_INTERVAL_P(2);
+	bool		sub = PG_GETARG_BOOL(3);
+	bool		less = PG_GETARG_BOOL(4);
+	Timestamp	sum;
+
+	if (int128_compare(interval_cmp_value(offset), int64_to_int128(0)) < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE),
+				 errmsg("invalid preceding or following size in window function")));
+
+	/* We don't currently bother to avoid overflow hazards here */
+	if (sub)
+		sum = DatumGetTimestamp(DirectFunctionCall2(timestamp_mi_interval,
+													TimestampGetDatum(base),
+													IntervalPGetDatum(offset)));
+	else
+		sum = DatumGetTimestamp(DirectFunctionCall2(timestamp_pl_interval,
+													TimestampGetDatum(base),
+													IntervalPGetDatum(offset)));
+
+	if (less)
+		PG_RETURN_BOOL(val <= sum);
+	else
+		PG_RETURN_BOOL(val >= sum);
+}
+
+Datum
+in_range_interval_interval(PG_FUNCTION_ARGS)
+{
+	Interval   *val = PG_GETARG_INTERVAL_P(0);
+	Interval   *base = PG_GETARG_INTERVAL_P(1);
+	Interval   *offset = PG_GETARG_INTERVAL_P(2);
+	bool		sub = PG_GETARG_BOOL(3);
+	bool		less = PG_GETARG_BOOL(4);
+	Interval   *sum;
+
+	if (int128_compare(interval_cmp_value(offset), int64_to_int128(0)) < 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PRECEDING_FOLLOWING_SIZE),
+				 errmsg("invalid preceding or following size in window function")));
+
+	/* We don't currently bother to avoid overflow hazards here */
+	if (sub)
+		sum = DatumGetIntervalP(DirectFunctionCall2(interval_mi,
+													IntervalPGetDatum(base),
+													IntervalPGetDatum(offset)));
+	else
+		sum = DatumGetIntervalP(DirectFunctionCall2(interval_pl,
+													IntervalPGetDatum(base),
+													IntervalPGetDatum(offset)));
+
+	if (less)
+		PG_RETURN_BOOL(interval_cmp_internal(val, sum) <= 0);
+	else
+		PG_RETURN_BOOL(interval_cmp_internal(val, sum) >= 0);
+}
+
+
 /*
  * interval_accum, interval_accum_inv, and interval_avg implement the
  * AVG(interval) aggregate.
author	Tom Lane <tgl@sss.pgh.pa.us>	2018-02-07 00:06:50 -0500
committer	Tom Lane <tgl@sss.pgh.pa.us>	2018-02-07 00:06:56 -0500
commit	0a459cec96d3856f476c2db298c6b52f592894e8 (patch)
tree	3d10f137b48de039c46914fa8e854bd69daaaec1 /src/backend/utils/adt
parent	23209457314f6fd89fcd251a8173b0129aaa95a2 (diff)
download	postgresql-0a459cec96d3856f476c2db298c6b52f592894e8.tar.gz postgresql-0a459cec96d3856f476c2db298c6b52f592894e8.zip