6 files changed, 92 insertions, 39 deletions
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index a309c446e37..8898b55d360 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -3063,7 +3063,7 @@ validate_index_heapscan(Relation heapRelation,
 			}
 
 			tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
-												  &ts_val, &ts_isnull);
+												  &ts_val, &ts_isnull, NULL);
 			Assert(tuplesort_empty || !ts_isnull);
 			if (!tuplesort_empty)
 			{
diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c
index b5aac67489d..03aa20f61e0 100644
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -575,7 +575,8 @@ fetch_input_tuple(AggState *aggstate)
 
 	if (aggstate->sort_in)
 	{
-		if (!tuplesort_gettupleslot(aggstate->sort_in, true, aggstate->sort_slot))
+		if (!tuplesort_gettupleslot(aggstate->sort_in, true, aggstate->sort_slot,
+									NULL))
 			return NULL;
 		slot = aggstate->sort_slot;
 	}
@@ -1076,6 +1077,8 @@ process_ordered_aggregate_single(AggState *aggstate,
 	MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
 	MemoryContext oldContext;
 	bool		isDistinct = (pertrans->numDistinctCols > 0);
+	Datum		newAbbrevVal = (Datum) 0;
+	Datum		oldAbbrevVal = (Datum) 0;
 	FunctionCallInfo fcinfo = &pertrans->transfn_fcinfo;
 	Datum	   *newVal;
 	bool	   *isNull;
@@ -1095,7 +1098,7 @@ process_ordered_aggregate_single(AggState *aggstate,
 	 */
 
 	while (tuplesort_getdatum(pertrans->sortstates[aggstate->current_set],
-							  true, newVal, isNull))
+							  true, newVal, isNull, &newAbbrevVal))
 	{
 		/*
 		 * Clear and select the working context for evaluation of the equality
@@ -1113,6 +1116,7 @@ process_ordered_aggregate_single(AggState *aggstate,
 			haveOldVal &&
 			((oldIsNull && *isNull) ||
 			 (!oldIsNull && !*isNull &&
+			  oldAbbrevVal == newAbbrevVal &&
 			  DatumGetBool(FunctionCall2(&pertrans->equalfns[0],
 										 oldVal, *newVal)))))
 		{
@@ -1128,6 +1132,7 @@ process_ordered_aggregate_single(AggState *aggstate,
 				pfree(DatumGetPointer(oldVal));
 			/* and remember the new one for subsequent equality checks */
 			oldVal = *newVal;
+			oldAbbrevVal = newAbbrevVal;
 			oldIsNull = *isNull;
 			haveOldVal = true;
 		}
@@ -1165,6 +1170,8 @@ process_ordered_aggregate_multi(AggState *aggstate,
 	TupleTableSlot *slot2 = pertrans->uniqslot;
 	int			numTransInputs = pertrans->numTransInputs;
 	int			numDistinctCols = pertrans->numDistinctCols;
+	Datum		newAbbrevVal = (Datum) 0;
+	Datum		oldAbbrevVal = (Datum) 0;
 	bool		haveOldValue = false;
 	int			i;
 
@@ -1175,7 +1182,7 @@ process_ordered_aggregate_multi(AggState *aggstate,
 		ExecClearTuple(slot2);
 
 	while (tuplesort_gettupleslot(pertrans->sortstates[aggstate->current_set],
-								  true, slot1))
+								  true, slot1, &newAbbrevVal))
 	{
 		/*
 		 * Extract the first numTransInputs columns as datums to pass to the
@@ -1186,6 +1193,7 @@ process_ordered_aggregate_multi(AggState *aggstate,
 
 		if (numDistinctCols == 0 ||
 			!haveOldValue ||
+			newAbbrevVal != oldAbbrevVal ||
 			!execTuplesMatch(slot1, slot2,
 							 numDistinctCols,
 							 pertrans->sortColIdx,
@@ -1209,6 +1217,8 @@ process_ordered_aggregate_multi(AggState *aggstate,
 
 				slot2 = slot1;
 				slot1 = tmpslot;
+				/* avoid execTuplesMatch() calls by reusing abbreviated keys */
+				oldAbbrevVal = newAbbrevVal;
 				haveOldValue = true;
 			}
 		}
diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c
index 102dbdfc124..a34dcc51358 100644
--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -137,7 +137,7 @@ ExecSort(SortState *node)
 	slot = node->ss.ps.ps_ResultTupleSlot;
 	(void) tuplesort_gettupleslot(tuplesortstate,
 								  ScanDirectionIsForward(dir),
-								  slot);
+								  slot, NULL);
 	return slot;
 }
 
diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c
index 57201fd03d4..fe44d561295 100644
--- a/src/backend/utils/adt/orderedsetaggs.c
+++ b/src/backend/utils/adt/orderedsetaggs.c
@@ -453,7 +453,7 @@ percentile_disc_final(PG_FUNCTION_ARGS)
 			elog(ERROR, "missing row in percentile_disc");
 	}
 
-	if (!tuplesort_getdatum(osastate->sortstate, true, &val, &isnull))
+	if (!tuplesort_getdatum(osastate->sortstate, true, &val, &isnull, NULL))
 		elog(ERROR, "missing row in percentile_disc");
 
 	/*
@@ -553,7 +553,7 @@ percentile_cont_final_common(FunctionCallInfo fcinfo,
 	if (!tuplesort_skiptuples(osastate->sortstate, first_row, true))
 		elog(ERROR, "missing row in percentile_cont");
 
-	if (!tuplesort_getdatum(osastate->sortstate, true, &first_val, &isnull))
+	if (!tuplesort_getdatum(osastate->sortstate, true, &first_val, &isnull, NULL))
 		elog(ERROR, "missing row in percentile_cont");
 	if (isnull)
 		PG_RETURN_NULL();
@@ -564,7 +564,7 @@ percentile_cont_final_common(FunctionCallInfo fcinfo,
 	}
 	else
 	{
-		if (!tuplesort_getdatum(osastate->sortstate, true, &second_val, &isnull))
+		if (!tuplesort_getdatum(osastate->sortstate, true, &second_val, &isnull, NULL))
 			elog(ERROR, "missing row in percentile_cont");
 
 		if (isnull)
@@ -792,7 +792,7 @@ percentile_disc_multi_final(PG_FUNCTION_ARGS)
 				if (!tuplesort_skiptuples(osastate->sortstate, target_row - rownum - 1, true))
 					elog(ERROR, "missing row in percentile_disc");
 
-				if (!tuplesort_getdatum(osastate->sortstate, true, &val, &isnull))
+				if (!tuplesort_getdatum(osastate->sortstate, true, &val, &isnull, NULL))
 					elog(ERROR, "missing row in percentile_disc");
 
 				rownum = target_row;
@@ -921,7 +921,8 @@ percentile_cont_multi_final_common(FunctionCallInfo fcinfo,
 				if (!tuplesort_skiptuples(osastate->sortstate, first_row - rownum - 1, true))
 					elog(ERROR, "missing row in percentile_cont");
 
-				if (!tuplesort_getdatum(osastate->sortstate, true, &first_val, &isnull) || isnull)
+				if (!tuplesort_getdatum(osastate->sortstate, true, &first_val,
+										&isnull, NULL) || isnull)
 					elog(ERROR, "missing row in percentile_cont");
 
 				rownum = first_row;
@@ -941,7 +942,8 @@ percentile_cont_multi_final_common(FunctionCallInfo fcinfo,
 			/* Fetch second_row if needed */
 			if (second_row > rownum)
 			{
-				if (!tuplesort_getdatum(osastate->sortstate, true, &second_val, &isnull) || isnull)
+				if (!tuplesort_getdatum(osastate->sortstate, true, &second_val,
+										&isnull, NULL) || isnull)
 					elog(ERROR, "missing row in percentile_cont");
 				rownum++;
 			}
@@ -1016,6 +1018,8 @@ mode_final(PG_FUNCTION_ARGS)
 	int64		last_val_freq = 0;
 	bool		last_val_is_mode = false;
 	FmgrInfo   *equalfn;
+	Datum		abbrev_val = (Datum) 0;
+	Datum		last_abbrev_val = (Datum) 0;
 	bool		shouldfree;
 
 	Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE);
@@ -1042,7 +1046,7 @@ mode_final(PG_FUNCTION_ARGS)
 	tuplesort_performsort(osastate->sortstate);
 
 	/* Scan tuples and count frequencies */
-	while (tuplesort_getdatum(osastate->sortstate, true, &val, &isnull))
+	while (tuplesort_getdatum(osastate->sortstate, true, &val, &isnull, &abbrev_val))
 	{
 		/* we don't expect any nulls, but ignore them if found */
 		if (isnull)
@@ -1054,8 +1058,10 @@ mode_final(PG_FUNCTION_ARGS)
 			mode_val = last_val = val;
 			mode_freq = last_val_freq = 1;
 			last_val_is_mode = true;
+			last_abbrev_val = abbrev_val;
 		}
-		else if (DatumGetBool(FunctionCall2(equalfn, val, last_val)))
+		else if (abbrev_val == last_abbrev_val &&
+				 DatumGetBool(FunctionCall2(equalfn, val, last_val)))
 		{
 			/* value equal to previous value, count it */
 			if (last_val_is_mode)
@@ -1078,6 +1084,8 @@ mode_final(PG_FUNCTION_ARGS)
 			if (shouldfree && !last_val_is_mode)
 				pfree(DatumGetPointer(last_val));
 			last_val = val;
+			/* avoid equality function calls by reusing abbreviated keys */
+			last_abbrev_val = abbrev_val;
 			last_val_freq = 1;
 			last_val_is_mode = false;
 		}
@@ -1181,7 +1189,7 @@ hypothetical_rank_common(FunctionCallInfo fcinfo, int flag,
 	tuplesort_performsort(osastate->sortstate);
 
 	/* iterate till we find the hypothetical row */
-	while (tuplesort_gettupleslot(osastate->sortstate, true, slot))
+	while (tuplesort_gettupleslot(osastate->sortstate, true, slot, NULL))
 	{
 		bool		isnull;
 		Datum		d = slot_getattr(slot, nargs + 1, &isnull);
@@ -1266,6 +1274,8 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS)
 	int64		duplicate_count = 0;
 	OSAPerGroupState *osastate;
 	int			numDistinctCols;
+	Datum		abbrevVal = (Datum) 0;
+	Datum		abbrevOld = (Datum) 0;
 	AttrNumber *sortColIdx;
 	FmgrInfo   *equalfns;
 	TupleTableSlot *slot;
@@ -1342,7 +1352,7 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS)
 	slot2 = extraslot;
 
 	/* iterate till we find the hypothetical row */
-	while (tuplesort_gettupleslot(osastate->sortstate, true, slot))
+	while (tuplesort_gettupleslot(osastate->sortstate, true, slot, &abbrevVal))
 	{
 		bool		isnull;
 		Datum		d = slot_getattr(slot, nargs + 1, &isnull);
@@ -1353,6 +1363,7 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS)
 
 		/* count non-distinct tuples */
 		if (!TupIsNull(slot2) &&
+			abbrevVal == abbrevOld &&
 			execTuplesMatch(slot, slot2,
 							numDistinctCols,
 							sortColIdx,
@@ -1363,6 +1374,8 @@ hypothetical_dense_rank_final(PG_FUNCTION_ARGS)
 		tmpslot = slot2;
 		slot2 = slot;
 		slot = tmpslot;
+		/* avoid execTuplesMatch() calls by reusing abbreviated keys */
+		abbrevOld = abbrevVal;
 
 		rank++;
 
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index a30e1707a79..67d86ed83b4 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -1280,7 +1280,8 @@ tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
 		 * converter it won't expect NULL values, and cost model is not
 		 * required to account for NULL, so in that case we avoid calling
 		 * converter and just set datum1 to zeroed representation (to be
-		 * consistent).
+		 * consistent, and to support cheap inequality tests for NULL
+		 * abbreviated keys).
 		 */
 		stup.datum1 = original;
 	}
@@ -1349,7 +1350,11 @@ tuplesort_putdatum(Tuplesortstate *state, Datum val, bool isNull)
 
 	if (isNull || state->datumTypeByVal)
 	{
-		stup.datum1 = val;
+		/*
+		 * Set datum1 to zeroed representation for NULLs (to be consistent, and
+		 * to support cheap inequality tests for NULL abbreviated keys).
+		 */
+		stup.datum1 = !isNull ? val : (Datum) 0;
 		stup.isnull1 = isNull;
 		stup.tuple = NULL;		/* no separate storage */
 	}
@@ -1866,10 +1871,17 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
  * Fetch the next tuple in either forward or back direction.
  * If successful, put tuple in slot and return TRUE; else, clear the slot
  * and return FALSE.
+ *
+ * Caller may optionally be passed back abbreviated value (on TRUE return
+ * value) when abbreviation was used, which can be used to cheaply avoid
+ * equality checks that might otherwise be required.  Caller can safely make a
+ * determination of "non-equal tuple" based on simple binary inequality.  A
+ * NULL value in leading attribute will set abbreviated value to zeroed
+ * representation, which caller may rely on in abbreviated inequality check.
  */
 bool
 tuplesort_gettupleslot(Tuplesortstate *state, bool forward,
-					   TupleTableSlot *slot)
+					   TupleTableSlot *slot, Datum *abbrev)
 {
 	MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
 	SortTuple	stup;
@@ -1882,6 +1894,10 @@ tuplesort_gettupleslot(Tuplesortstate *state, bool forward,
 
 	if (stup.tuple)
 	{
+		/* Record abbreviated key for caller */
+		if (state->sortKeys->abbrev_converter && abbrev)
+			*abbrev = stup.datum1;
+
 		ExecStoreMinimalTuple((MinimalTuple) stup.tuple, slot, should_free);
 		return true;
 	}
@@ -1937,10 +1953,17 @@ tuplesort_getindextuple(Tuplesortstate *state, bool forward,
  *
  * If the Datum is pass-by-ref type, the returned value is freshly palloc'd
  * and is now owned by the caller.
+ *
+ * Caller may optionally be passed back abbreviated value (on TRUE return
+ * value) when abbreviation was used, which can be used to cheaply avoid
+ * equality checks that might otherwise be required.  Caller can safely make a
+ * determination of "non-equal tuple" based on simple binary inequality.  A
+ * NULL value will have a zeroed abbreviated value representation, which caller
+ * may rely on in abbreviated inequality check.
  */
 bool
 tuplesort_getdatum(Tuplesortstate *state, bool forward,
-				   Datum *val, bool *isNull)
+				   Datum *val, bool *isNull, Datum *abbrev)
 {
 	MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
 	SortTuple	stup;
@@ -1952,6 +1975,10 @@ tuplesort_getdatum(Tuplesortstate *state, bool forward,
 		return false;
 	}
 
+	/* Record abbreviated key for caller */
+	if (state->sortKeys->abbrev_converter && abbrev)
+		*abbrev = stup.datum1;
+
 	if (stup.isnull1 || state->datumTypeByVal)
 	{
 		*val = stup.datum1;
@@ -2232,21 +2259,6 @@ mergeruns(Tuplesortstate *state)
 	Assert(state->status == TSS_BUILDRUNS);
 	Assert(state->memtupcount == 0);
 
-	/*
-	 * If we produced only one initial run (quite likely if the total data
-	 * volume is between 1X and 2X workMem), we can just use that tape as the
-	 * finished output, rather than doing a useless merge.  (This obvious
-	 * optimization is not in Knuth's algorithm.)
-	 */
-	if (state->currentRun == 1)
-	{
-		state->result_tape = state->tp_tapenum[state->destTape];
-		/* must freeze and rewind the finished output tape */
-		LogicalTapeFreeze(state->tapeset, state->result_tape);
-		state->status = TSS_SORTEDONTAPE;
-		return;
-	}
-
 	if (state->sortKeys != NULL && state->sortKeys->abbrev_converter != NULL)
 	{
 		/*
@@ -2263,6 +2275,21 @@ mergeruns(Tuplesortstate *state)
 		state->sortKeys->abbrev_full_comparator = NULL;
 	}
 
+	/*
+	 * If we produced only one initial run (quite likely if the total data
+	 * volume is between 1X and 2X workMem), we can just use that tape as the
+	 * finished output, rather than doing a useless merge.  (This obvious
+	 * optimization is not in Knuth's algorithm.)
+	 */
+	if (state->currentRun == 1)
+	{
+		state->result_tape = state->tp_tapenum[state->destTape];
+		/* must freeze and rewind the finished output tape */
+		LogicalTapeFreeze(state->tapeset, state->result_tape);
+		state->status = TSS_SORTEDONTAPE;
+		return;
+	}
+
 	/* End of step D2: rewind all output tapes to prepare for merging */
 	for (tapenum = 0; tapenum < state->tapeRange; tapenum++)
 		LogicalTapeRewind(state->tapeset, tapenum, false);
@@ -3164,7 +3191,8 @@ copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup)
 		 * converter it won't expect NULL values, and cost model is not
 		 * required to account for NULL, so in that case we avoid calling
 		 * converter and just set datum1 to zeroed representation (to be
-		 * consistent).
+		 * consistent, and to support cheap inequality tests for NULL
+		 * abbreviated keys).
 		 */
 		stup->datum1 = original;
 	}
@@ -3406,7 +3434,8 @@ copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup)
 		 * converter it won't expect NULL values, and cost model is not
 		 * required to account for NULL, so in that case we avoid calling
 		 * converter and just set datum1 to zeroed representation (to be
-		 * consistent).
+		 * consistent, and to support cheap inequality tests for NULL
+		 * abbreviated keys).
 		 */
 		stup->datum1 = original;
 	}
@@ -3710,7 +3739,8 @@ copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup)
 		 * converter it won't expect NULL values, and cost model is not
 		 * required to account for NULL, so in that case we avoid calling
 		 * converter and just set datum1 to zeroed representation (to be
-		 * consistent).
+		 * consistent, and to support cheap inequality tests for NULL
+		 * abbreviated keys).
 		 */
 		stup->datum1 = original;
 	}
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index d31d9945d18..5cecd6d1b86 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -93,13 +93,13 @@ extern void tuplesort_putdatum(Tuplesortstate *state, Datum val,
 extern void tuplesort_performsort(Tuplesortstate *state);
 
 extern bool tuplesort_gettupleslot(Tuplesortstate *state, bool forward,
-					   TupleTableSlot *slot);
+					   TupleTableSlot *slot, Datum *abbrev);
 extern HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward,
 					   bool *should_free);
 extern IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward,
 						bool *should_free);
 extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward,
-				   Datum *val, bool *isNull);
+				   Datum *val, bool *isNull, Datum *abbrev);
 
 extern bool tuplesort_skiptuples(Tuplesortstate *state, int64 ntuples,
 					 bool forward);