4 files changed, 696 insertions, 102 deletions
diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index 671b5e9186f..230625cf1e2 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -3,6 +3,24 @@
  * gistbuild.c
  *	  build algorithm for GiST indexes implementation.
  *
+ * There are two different strategies:
+ *
+ * 1. Sort all input tuples, pack them into GiST leaf pages in the sorted
+ *    order, and create downlinks and internal pages as we go. This builds
+ *    the index from the bottom up, similar to how B-tree index build
+ *    works.
+ *
+ * 2. Start with an empty index, and insert all tuples one by one.
+ *
+ * The sorted method is used if the operator classes for all columns have
+ * a 'sortsupport' defined. Otherwise, we resort to the second strategy.
+ *
+ * The second strategy can optionally use buffers at different levels of
+ * the tree to reduce I/O, see "Buffering build algorithm" in the README
+ * for a more detailed explanation. It initially calls insert over and
+ * over, but switches to the buffered algorithm after a certain number of
+ * tuples (unless buffering mode is disabled).
+ *
  *
  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
@@ -28,6 +46,7 @@
 #include "storage/smgr.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
+#include "utils/tuplesort.h"
 
 /* Step of index tuples for check whether to switch to buffering build mode */
 #define BUFFERING_MODE_SWITCH_CHECK_STEP 256
@@ -40,8 +59,14 @@
  */
 #define BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET 4096
 
+/*
+ * Strategy used to build the index. It can change between the
+ * GIST_BUFFERING_* modes on the fly, but if the Sorted method is used,
+ * that needs to be decided up-front and cannot be changed afterwards.
+ */
 typedef enum
 {
+	GIST_SORTED_BUILD,			/* bottom-up build by sorting */
 	GIST_BUFFERING_DISABLED,	/* in regular build mode and aren't going to
 								 * switch */
 	GIST_BUFFERING_AUTO,		/* in regular build mode, but will switch to
@@ -51,7 +76,7 @@ typedef enum
 								 * before switching to the buffering build
 								 * mode */
 	GIST_BUFFERING_ACTIVE		/* in buffering build mode */
-} GistBufferingMode;
+} GistBuildMode;
 
 /* Working state for gistbuild and its callback */
 typedef struct
@@ -60,23 +85,58 @@ typedef struct
 	Relation	heaprel;
 	GISTSTATE  *giststate;
 
-	int64		indtuples;		/* number of tuples indexed */
-	int64		indtuplesSize;	/* total size of all indexed tuples */
-
 	Size		freespace;		/* amount of free space to leave on pages */
 
+	GistBuildMode buildMode;
+
+	int64		indtuples;		/* number of tuples indexed */
+
 	/*
 	 * Extra data structures used during a buffering build. 'gfbb' contains
 	 * information related to managing the build buffers. 'parentMap' is a
 	 * lookup table of the parent of each internal page.
 	 */
+	int64		indtuplesSize;	/* total size of all indexed tuples */
 	GISTBuildBuffers *gfbb;
 	HTAB	   *parentMap;
 
-	GistBufferingMode bufferingMode;
+	/*
+	 * Extra data structures used during a sorting build.
+	 */
+	Tuplesortstate *sortstate;	/* state data for tuplesort.c */
+
+	BlockNumber pages_allocated;
+	BlockNumber pages_written;
+
+	int			ready_num_pages;
+	BlockNumber ready_blknos[XLR_MAX_BLOCK_ID];
+	Page		ready_pages[XLR_MAX_BLOCK_ID];
 } GISTBuildState;
 
+/*
+ * In sorted build, we use a stack of these structs, one for each level,
+ * to hold an in-memory buffer of the righmost page at the level. When the
+ * page fills up, it is written out and a new page is allocated.
+ */
+typedef struct GistSortedBuildPageState
+{
+	Page		page;
+	struct GistSortedBuildPageState *parent;	/* Upper level, if any */
+} GistSortedBuildPageState;
+
 /* prototypes for private functions */
+
+static void gistSortedBuildCallback(Relation index, ItemPointer tid,
+									Datum *values, bool *isnull,
+									bool tupleIsAlive, void *state);
+static void gist_indexsortbuild(GISTBuildState *state);
+static void gist_indexsortbuild_pagestate_add(GISTBuildState *state,
+											  GistSortedBuildPageState *pagestate,
+											  IndexTuple itup);
+static void gist_indexsortbuild_pagestate_flush(GISTBuildState *state,
+												GistSortedBuildPageState *pagestate);
+static void gist_indexsortbuild_flush_ready_pages(GISTBuildState *state);
+
 static void gistInitBuffering(GISTBuildState *buildstate);
 static int	calculatePagesPerBuffer(GISTBuildState *buildstate, int levelStep);
 static void gistBuildCallback(Relation index,
@@ -107,10 +167,9 @@ static void gistMemorizeParent(GISTBuildState *buildstate, BlockNumber child,
 static void gistMemorizeAllDownlinks(GISTBuildState *buildstate, Buffer parent);
 static BlockNumber gistGetParent(GISTBuildState *buildstate, BlockNumber child);
 
+
 /*
- * Main entry point to GiST index build. Initially calls insert over and over,
- * but switches to more efficient buffering build algorithm after a certain
- * number of tuples (unless buffering mode is disabled).
+ * Main entry point to GiST index build.
  */
 IndexBuildResult *
 gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
@@ -118,124 +177,407 @@ gistbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 	IndexBuildResult *result;
 	double		reltuples;
 	GISTBuildState buildstate;
-	Buffer		buffer;
-	Page		page;
 	MemoryContext oldcxt = CurrentMemoryContext;
 	int			fillfactor;
+	Oid			SortSupportFnOids[INDEX_MAX_KEYS];
+	bool		hasallsortsupports;
+	int			keyscount = IndexRelationGetNumberOfKeyAttributes(index);
+	GiSTOptions *options = NULL;
+
+	/*
+	 * We expect to be called exactly once for any index relation. If that's
+	 * not the case, big trouble's what we have.
+	 */
+	if (RelationGetNumberOfBlocks(index) != 0)
+		elog(ERROR, "index \"%s\" already contains data",
+			 RelationGetRelationName(index));
+
+	if (index->rd_options)
+		options = (GiSTOptions *) index->rd_options;
 
 	buildstate.indexrel = index;
 	buildstate.heaprel = heap;
+	buildstate.sortstate = NULL;
+	buildstate.giststate = initGISTstate(index);
 
-	if (index->rd_options)
+	/*
+	 * Create a temporary memory context that is reset once for each tuple
+	 * processed.  (Note: we don't bother to make this a child of the
+	 * giststate's scanCxt, so we have to delete it separately at the end.)
+	 */
+	buildstate.giststate->tempCxt = createTempGistContext();
+
+	/*
+	 * Choose build strategy. If all keys support sorting, do that. Otherwise
+	 * the default strategy is switch to buffering mode when the index grows
+	 * too large to fit in cache.
+	 */
+	hasallsortsupports = true;
+	for (int i = 0; i < keyscount; i++)
 	{
-		/* Get buffering mode from the options string */
-		GiSTOptions *options = (GiSTOptions *) index->rd_options;
+		SortSupportFnOids[i] = index_getprocid(index, i + 1,
+											   GIST_SORTSUPPORT_PROC);
+		if (!OidIsValid(SortSupportFnOids[i]))
+		{
+			hasallsortsupports = false;
+			break;
+		}
+	}
 
+	if (hasallsortsupports)
+	{
+		buildstate.buildMode = GIST_SORTED_BUILD;
+	}
+	else if (options)
+	{
 		if (options->buffering_mode == GIST_OPTION_BUFFERING_ON)
-			buildstate.bufferingMode = GIST_BUFFERING_STATS;
+			buildstate.buildMode = GIST_BUFFERING_STATS;
 		else if (options->buffering_mode == GIST_OPTION_BUFFERING_OFF)
-			buildstate.bufferingMode = GIST_BUFFERING_DISABLED;
+			buildstate.buildMode = GIST_BUFFERING_DISABLED;
 		else
-			buildstate.bufferingMode = GIST_BUFFERING_AUTO;
-
-		fillfactor = options->fillfactor;
+			buildstate.buildMode = GIST_BUFFERING_AUTO;
 	}
 	else
 	{
-		/*
-		 * By default, switch to buffering mode when the index grows too large
-		 * to fit in cache.
-		 */
-		buildstate.bufferingMode = GIST_BUFFERING_AUTO;
-		fillfactor = GIST_DEFAULT_FILLFACTOR;
+		buildstate.buildMode = GIST_BUFFERING_AUTO;
 	}
-	/* Calculate target amount of free space to leave on pages */
+
+	/*
+	 * Calculate target amount of free space to leave on pages.
+	 */
+	fillfactor = options ? options->fillfactor : GIST_DEFAULT_FILLFACTOR;
 	buildstate.freespace = BLCKSZ * (100 - fillfactor) / 100;
 
 	/*
-	 * We expect to be called exactly once for any index relation. If that's
-	 * not the case, big trouble's what we have.
+	 * Build the index using the chosen strategy.
 	 */
-	if (RelationGetNumberOfBlocks(index) != 0)
-		elog(ERROR, "index \"%s\" already contains data",
-			 RelationGetRelationName(index));
+	buildstate.indtuples = 0;
+	buildstate.indtuplesSize = 0;
 
-	/* no locking is needed */
-	buildstate.giststate = initGISTstate(index);
+	if (buildstate.buildMode == GIST_SORTED_BUILD)
+	{
+		/*
+		 * Sort all data, build the index from bottom up.
+		 */
+		buildstate.sortstate = tuplesort_begin_index_gist(heap,
+														  index,
+														  maintenance_work_mem,
+														  NULL,
+														  false);
+
+		/* Scan the table, adding all tuples to the tuplesort */
+		reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
+										   gistSortedBuildCallback,
+										   (void *) &buildstate, NULL);
+
+		/*
+		 * Perform the sort and build index pages.
+		 */
+		tuplesort_performsort(buildstate.sortstate);
+
+		gist_indexsortbuild(&buildstate);
+
+		tuplesort_end(buildstate.sortstate);
+	}
+	else
+	{
+		/*
+		 * Initialize an empty index and insert all tuples, possibly using
+		 * buffers on intermediate levels.
+		 */
+		Buffer		buffer;
+		Page		page;
+
+		/* initialize the root page */
+		buffer = gistNewBuffer(index);
+		Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
+		page = BufferGetPage(buffer);
+
+		START_CRIT_SECTION();
+
+		GISTInitBuffer(buffer, F_LEAF);
+
+		MarkBufferDirty(buffer);
+		PageSetLSN(page, GistBuildLSN);
+
+		UnlockReleaseBuffer(buffer);
+
+		END_CRIT_SECTION();
+
+		/* Scan the table, inserting all the tuples to the index. */
+		reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
+										   gistBuildCallback,
+										   (void *) &buildstate, NULL);
+
+		/*
+		 * If buffering was used, flush out all the tuples that are still in
+		 * the buffers.
+		 */
+		if (buildstate.buildMode == GIST_BUFFERING_ACTIVE)
+		{
+			elog(DEBUG1, "all tuples processed, emptying buffers");
+			gistEmptyAllBuffers(&buildstate);
+			gistFreeBuildBuffers(buildstate.gfbb);
+		}
+
+		/*
+		 * We didn't write WAL records as we built the index, so if
+		 * WAL-logging is required, write all pages to the WAL now.
+		 */
+		if (RelationNeedsWAL(index))
+		{
+			log_newpage_range(index, MAIN_FORKNUM,
+							  0, RelationGetNumberOfBlocks(index),
+							  true);
+		}
+	}
+
+	/* okay, all heap tuples are indexed */
+	MemoryContextSwitchTo(oldcxt);
+	MemoryContextDelete(buildstate.giststate->tempCxt);
+
+	freeGISTstate(buildstate.giststate);
 
 	/*
-	 * Create a temporary memory context that is reset once for each tuple
-	 * processed.  (Note: we don't bother to make this a child of the
-	 * giststate's scanCxt, so we have to delete it separately at the end.)
+	 * Return statistics
 	 */
-	buildstate.giststate->tempCxt = createTempGistContext();
+	result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
 
-	/* initialize the root page */
-	buffer = gistNewBuffer(index);
-	Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
-	page = BufferGetPage(buffer);
+	result->heap_tuples = reltuples;
+	result->index_tuples = (double) buildstate.indtuples;
 
-	START_CRIT_SECTION();
+	return result;
+}
 
-	GISTInitBuffer(buffer, F_LEAF);
+/*-------------------------------------------------------------------------
+ * Routines for sorted build
+ *-------------------------------------------------------------------------
+ */
 
-	MarkBufferDirty(buffer);
-	PageSetLSN(page, GistBuildLSN);
+/*
+ * Per-tuple callback for table_index_build_scan.
+ */
+static void
+gistSortedBuildCallback(Relation index,
+						ItemPointer tid,
+						Datum *values,
+						bool *isnull,
+						bool tupleIsAlive,
+						void *state)
+{
+	GISTBuildState *buildstate = (GISTBuildState *) state;
+	MemoryContext oldCtx;
+	Datum		compressed_values[INDEX_MAX_KEYS];
 
-	UnlockReleaseBuffer(buffer);
+	oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
 
-	END_CRIT_SECTION();
+	/* Form an index tuple and point it at the heap tuple */
+	gistCompressValues(buildstate->giststate, index,
+					   values, isnull,
+					   true, compressed_values);
 
-	/* build the index */
-	buildstate.indtuples = 0;
-	buildstate.indtuplesSize = 0;
+	tuplesort_putindextuplevalues(buildstate->sortstate,
+								  buildstate->indexrel,
+								  tid,
+								  compressed_values, isnull);
+
+	MemoryContextSwitchTo(oldCtx);
+	MemoryContextReset(buildstate->giststate->tempCxt);
+
+	/* Update tuple count. */
+	buildstate->indtuples += 1;
+}
+
+/*
+ * Build GiST index from bottom up from pre-sorted tuples.
+ */
+static void
+gist_indexsortbuild(GISTBuildState *state)
+{
+	IndexTuple	itup;
+	GistSortedBuildPageState *leafstate;
+	GistSortedBuildPageState *pagestate;
+	Page		page;
+
+	state->pages_allocated = 0;
+	state->pages_written = 0;
+	state->ready_num_pages = 0;
 
 	/*
-	 * Do the heap scan.
+	 * Write an empty page as a placeholder for the root page. It will be
+	 * replaced with the real root page at the end.
 	 */
-	reltuples = table_index_build_scan(heap, index, indexInfo, true, true,
-									   gistBuildCallback,
-									   (void *) &buildstate, NULL);
+	page = palloc0(BLCKSZ);
+	smgrextend(state->indexrel->rd_smgr, MAIN_FORKNUM, GIST_ROOT_BLKNO,
+			   page, true);
+	state->pages_allocated++;
+	state->pages_written++;
+
+	/* Allocate a temporary buffer for the first leaf page. */
+	leafstate = palloc(sizeof(GistSortedBuildPageState));
+	leafstate->page = page;
+	leafstate->parent = NULL;
+	gistinitpage(page, F_LEAF);
 
 	/*
-	 * If buffering was used, flush out all the tuples that are still in the
-	 * buffers.
+	 * Fill index pages with tuples in the sorted order.
 	 */
-	if (buildstate.bufferingMode == GIST_BUFFERING_ACTIVE)
+	while ((itup = tuplesort_getindextuple(state->sortstate, true)) != NULL)
 	{
-		elog(DEBUG1, "all tuples processed, emptying buffers");
-		gistEmptyAllBuffers(&buildstate);
-		gistFreeBuildBuffers(buildstate.gfbb);
+		gist_indexsortbuild_pagestate_add(state, leafstate, itup);
+		MemoryContextReset(state->giststate->tempCxt);
 	}
 
-	/* okay, all heap tuples are indexed */
-	MemoryContextSwitchTo(oldcxt);
-	MemoryContextDelete(buildstate.giststate->tempCxt);
-
-	freeGISTstate(buildstate.giststate);
-
 	/*
-	 * We didn't write WAL records as we built the index, so if WAL-logging is
-	 * required, write all pages to the WAL now.
+	 * Write out the partially full non-root pages.
+	 *
+	 * Keep in mind that flush can build a new root.
 	 */
-	if (RelationNeedsWAL(index))
+	pagestate = leafstate;
+	while (pagestate->parent != NULL)
 	{
-		log_newpage_range(index, MAIN_FORKNUM,
-						  0, RelationGetNumberOfBlocks(index),
-						  true);
+		GistSortedBuildPageState *parent;
+
+		gist_indexsortbuild_pagestate_flush(state, pagestate);
+		parent = pagestate->parent;
+		pfree(pagestate->page);
+		pfree(pagestate);
+		pagestate = parent;
 	}
 
+	gist_indexsortbuild_flush_ready_pages(state);
+
+	/* Write out the root */
+	PageSetLSN(pagestate->page, GistBuildLSN);
+	smgrwrite(state->indexrel->rd_smgr, MAIN_FORKNUM, GIST_ROOT_BLKNO,
+			  pagestate->page, true);
+	if (RelationNeedsWAL(state->indexrel))
+		log_newpage(&state->indexrel->rd_node, MAIN_FORKNUM, GIST_ROOT_BLKNO,
+					pagestate->page, true);
+
+	pfree(pagestate->page);
+	pfree(pagestate);
+}
+
+/*
+ * Add tuple to a page. If the pages is full, write it out and re-initialize
+ * a new page first.
+ */
+static void
+gist_indexsortbuild_pagestate_add(GISTBuildState *state,
+								  GistSortedBuildPageState *pagestate,
+								  IndexTuple itup)
+{
+	Size		sizeNeeded;
+
+	/* Does the tuple fit? If not, flush */
+	sizeNeeded = IndexTupleSize(itup) + sizeof(ItemIdData) + state->freespace;
+	if (PageGetFreeSpace(pagestate->page) < sizeNeeded)
+		gist_indexsortbuild_pagestate_flush(state, pagestate);
+
+	gistfillbuffer(pagestate->page, &itup, 1, InvalidOffsetNumber);
+}
+
+static void
+gist_indexsortbuild_pagestate_flush(GISTBuildState *state,
+									GistSortedBuildPageState *pagestate)
+{
+	GistSortedBuildPageState *parent;
+	IndexTuple *itvec;
+	IndexTuple	union_tuple;
+	int			vect_len;
+	bool		isleaf;
+	BlockNumber blkno;
+	MemoryContext oldCtx;
+
+	/* check once per page */
+	CHECK_FOR_INTERRUPTS();
+
+	if (state->ready_num_pages == XLR_MAX_BLOCK_ID)
+		gist_indexsortbuild_flush_ready_pages(state);
+
 	/*
-	 * Return statistics
+	 * The page is now complete. Assign a block number to it, and add it to
+	 * the list of finished pages. (We don't write it out immediately, because
+	 * we want to WAL-log the pages in batches.)
 	 */
-	result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
+	blkno = state->pages_allocated++;
+	state->ready_blknos[state->ready_num_pages] = blkno;
+	state->ready_pages[state->ready_num_pages] = pagestate->page;
+	state->ready_num_pages++;
 
-	result->heap_tuples = reltuples;
-	result->index_tuples = (double) buildstate.indtuples;
+	isleaf = GistPageIsLeaf(pagestate->page);
 
-	return result;
+	/*
+	 * Form a downlink tuple to represent all the tuples on the page.
+	 */
+	oldCtx = MemoryContextSwitchTo(state->giststate->tempCxt);
+	itvec = gistextractpage(pagestate->page, &vect_len);
+	union_tuple = gistunion(state->indexrel, itvec, vect_len,
+							state->giststate);
+	ItemPointerSetBlockNumber(&(union_tuple->t_tid), blkno);
+	MemoryContextSwitchTo(oldCtx);
+
+	/*
+	 * Insert the downlink to the parent page. If this was the root, create a
+	 * new page as the parent, which becomes the new root.
+	 */
+	parent = pagestate->parent;
+	if (parent == NULL)
+	{
+		parent = palloc(sizeof(GistSortedBuildPageState));
+		parent->page = (Page) palloc(BLCKSZ);
+		parent->parent = NULL;
+		gistinitpage(parent->page, 0);
+
+		pagestate->parent = parent;
+	}
+	gist_indexsortbuild_pagestate_add(state, parent, union_tuple);
+
+	/* Re-initialize the page buffer for next page on this level. */
+	pagestate->page = palloc(BLCKSZ);
+	gistinitpage(pagestate->page, isleaf ? F_LEAF : 0);
+}
+
+static void
+gist_indexsortbuild_flush_ready_pages(GISTBuildState *state)
+{
+	if (state->ready_num_pages == 0)
+		return;
+
+	for (int i = 0; i < state->ready_num_pages; i++)
+	{
+		Page		page = state->ready_pages[i];
+
+		/* Currently, the blocks must be buffered in order. */
+		if (state->ready_blknos[i] != state->pages_written)
+			elog(ERROR, "unexpected block number to flush GiST sorting build");
+
+		PageSetLSN(page, GistBuildLSN);
+
+		smgrextend(state->indexrel->rd_smgr,
+				   MAIN_FORKNUM,
+				   state->pages_written++,
+				   page,
+				   true);
+	}
+
+	if (RelationNeedsWAL(state->indexrel))
+		log_newpages(&state->indexrel->rd_node, MAIN_FORKNUM, state->ready_num_pages,
+					 state->ready_blknos, state->ready_pages, true);
+
+	for (int i = 0; i < state->ready_num_pages; i++)
+		pfree(state->ready_pages[i]);
+
+	state->ready_num_pages = 0;
 }
 
+
+/*-------------------------------------------------------------------------
+ * Routines for non-sorted build
+ *-------------------------------------------------------------------------
+ */
+
 /*
  * Attempt to switch to buffering mode.
  *
@@ -375,7 +717,7 @@ gistInitBuffering(GISTBuildState *buildstate)
 	if (levelStep <= 0)
 	{
 		elog(DEBUG1, "failed to switch to buffered GiST build");
-		buildstate->bufferingMode = GIST_BUFFERING_DISABLED;
+		buildstate->buildMode = GIST_BUFFERING_DISABLED;
 		return;
 	}
 
@@ -392,7 +734,7 @@ gistInitBuffering(GISTBuildState *buildstate)
 
 	gistInitParentMap(buildstate);
 
-	buildstate->bufferingMode = GIST_BUFFERING_ACTIVE;
+	buildstate->buildMode = GIST_BUFFERING_ACTIVE;
 
 	elog(DEBUG1, "switched to buffered GiST build; level step = %d, pagesPerBuffer = %d",
 		 levelStep, pagesPerBuffer);
@@ -453,10 +795,12 @@ gistBuildCallback(Relation index,
 	oldCtx = MemoryContextSwitchTo(buildstate->giststate->tempCxt);
 
 	/* form an index tuple and point it at the heap tuple */
-	itup = gistFormTuple(buildstate->giststate, index, values, isnull, true);
+	itup = gistFormTuple(buildstate->giststate, index,
+						 values, isnull,
+						 true);
 	itup->t_tid = *tid;
 
-	if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE)
+	if (buildstate->buildMode == GIST_BUFFERING_ACTIVE)
 	{
 		/* We have buffers, so use them. */
 		gistBufferingBuildInsert(buildstate, itup);
@@ -478,7 +822,7 @@ gistBuildCallback(Relation index,
 	MemoryContextSwitchTo(oldCtx);
 	MemoryContextReset(buildstate->giststate->tempCxt);
 
-	if (buildstate->bufferingMode == GIST_BUFFERING_ACTIVE &&
+	if (buildstate->buildMode == GIST_BUFFERING_ACTIVE &&
 		buildstate->indtuples % BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET == 0)
 	{
 		/* Adjust the target buffer size now */
@@ -493,10 +837,10 @@ gistBuildCallback(Relation index,
 	 * To avoid excessive calls to smgrnblocks(), only check this every
 	 * BUFFERING_MODE_SWITCH_CHECK_STEP index tuples
 	 */
-	if ((buildstate->bufferingMode == GIST_BUFFERING_AUTO &&
+	if ((buildstate->buildMode == GIST_BUFFERING_AUTO &&
 		 buildstate->indtuples % BUFFERING_MODE_SWITCH_CHECK_STEP == 0 &&
 		 effective_cache_size < smgrnblocks(index->rd_smgr, MAIN_FORKNUM)) ||
-		(buildstate->bufferingMode == GIST_BUFFERING_STATS &&
+		(buildstate->buildMode == GIST_BUFFERING_STATS &&
 		 buildstate->indtuples >= BUFFERING_MODE_TUPLE_SIZE_STATS_TARGET))
 	{
 		/*
diff --git a/src/backend/access/gist/gistproc.c b/src/backend/access/gist/gistproc.c
index 9ace64c3c4a..27d9c0f77c3 100644
--- a/src/backend/access/gist/gistproc.c
+++ b/src/backend/access/gist/gistproc.c
@@ -24,6 +24,7 @@
 #include "utils/builtins.h"
 #include "utils/float.h"
 #include "utils/geo_decls.h"
+#include "utils/sortsupport.h"
 
 
 static bool gist_box_leaf_consistent(BOX *key, BOX *query,
@@ -31,6 +32,15 @@ static bool gist_box_leaf_consistent(BOX *key, BOX *query,
 static bool rtree_internal_consistent(BOX *key, BOX *query,
 									  StrategyNumber strategy);
 
+static uint64 point_zorder_internal(float4 x, float4 y);
+static uint64 part_bits32_by2(uint32 x);
+static uint32 ieee_float32_to_uint32(float f);
+static int gist_bbox_zorder_cmp(Datum a, Datum b, SortSupport ssup);
+static Datum gist_bbox_zorder_abbrev_convert(Datum original, SortSupport ssup);
+static int gist_bbox_zorder_cmp_abbrev(Datum z1, Datum z2, SortSupport ssup);
+static bool gist_bbox_zorder_abbrev_abort(int memtupcount, SortSupport ssup);
+
+
 /* Minimum accepted ratio of split */
 #define LIMIT_RATIO 0.3
 
@@ -1540,3 +1550,222 @@ gist_poly_distance(PG_FUNCTION_ARGS)
 
 	PG_RETURN_FLOAT8(distance);
 }
+
+/*
+ * Z-order routines for fast index build
+ */
+
+/*
+ * Compute Z-value of a point
+ *
+ * Z-order (also known as Morton Code) maps a two-dimensional point to a
+ * single integer, in a way that preserves locality. Points that are close in
+ * the two-dimensional space are mapped to integer that are not far from each
+ * other. We do that by interleaving the bits in the X and Y components.
+ *
+ * Morton Code is normally defined only for integers, but the X and Y values
+ * of a point are floating point. We expect floats to be in IEEE format.
+ */
+static uint64
+point_zorder_internal(float4 x, float4 y)
+{
+	uint32		ix = ieee_float32_to_uint32(x);
+	uint32		iy = ieee_float32_to_uint32(y);
+
+	/* Interleave the bits */
+	return part_bits32_by2(ix) | (part_bits32_by2(iy) << 1);
+}
+
+/* Interleave 32 bits with zeroes */
+static uint64
+part_bits32_by2(uint32 x)
+{
+	uint64		n = x;
+
+	n = (n | (n << 16)) & UINT64CONST(0x0000FFFF0000FFFF);
+	n = (n | (n << 8)) & UINT64CONST(0x00FF00FF00FF00FF);
+	n = (n | (n << 4)) & UINT64CONST(0x0F0F0F0F0F0F0F0F);
+	n = (n | (n << 2)) & UINT64CONST(0x3333333333333333);
+	n = (n | (n << 1)) & UINT64CONST(0x5555555555555555);
+
+	return n;
+}
+
+/*
+ * Convert a 32-bit IEEE float to uint32 in a way that preserves the ordering
+ */
+static uint32
+ieee_float32_to_uint32(float f)
+{
+	/*----
+	 *
+	 * IEEE 754 floating point format
+	 * ------------------------------
+	 *
+	 * IEEE 754 floating point numbers have this format:
+	 *
+	 *   exponent (8 bits)
+	 *   |
+	 * s eeeeeeee mmmmmmmmmmmmmmmmmmmmmmm
+	 * |          |
+	 * sign       mantissa (23 bits)
+	 *
+	 * Infinity has all bits in the exponent set and the mantissa is all
+	 * zeros. Negative infinity is the same but with the sign bit set.
+	 *
+	 * NaNs are represented with all bits in the exponent set, and the least
+	 * significant bit in the mantissa also set. The rest of the mantissa bits
+	 * can be used to distinguish different kinds of NaNs.
+	 *
+	 * The IEEE format has the nice property that when you take the bit
+	 * representation and interpret it as an integer, the order is preserved,
+	 * except for the sign. That holds for the +-Infinity values too.
+	 *
+	 * Mapping to uint32
+	 * -----------------
+	 *
+	 * In order to have a smooth transition from negative to positive numbers,
+	 * we map floats to unsigned integers like this:
+	 *
+	 * x < 0 to range 0-7FFFFFFF
+	 * x = 0 to value 8000000 (both positive and negative zero)
+	 * x > 0 to range 8000001-FFFFFFFF
+	 *
+	 * We don't care to distinguish different kind of NaNs, so they are all
+	 * mapped to the same arbitrary value, FFFFFFFF. Because of the IEEE bit
+	 * representation of NaNs, there aren't any non-NaN values that would be
+	 * mapped to FFFFFFFF. In fact, there is a range of unused values on both
+	 * ends of the uint32 space.
+	 */
+	if (isnan(f))
+		return 0xFFFFFFFF;
+	else
+	{
+		union
+		{
+			float		f;
+			uint32		i;
+		}			u;
+
+		u.f = f;
+
+		/* Check the sign bit */
+		if ((u.i & 0x80000000) != 0)
+		{
+			/*
+			 * Map the negative value to range 0-7FFFFFFF. This flips the sign
+			 * bit to 0 in the same instruction.
+			 */
+			Assert(f <= 0);		/* can be -0 */
+			u.i ^= 0xFFFFFFFF;
+		}
+		else
+		{
+			/* Map the positive value (or 0) to range 80000000-FFFFFFFF */
+			u.i |= 0x80000000;
+		}
+
+		return u.i;
+	}
+}
+
+/*
+ * Compare the Z-order of points
+ */
+static int
+gist_bbox_zorder_cmp(Datum a, Datum b, SortSupport ssup)
+{
+	Point	   *p1 = &(DatumGetBoxP(a)->low);
+	Point	   *p2 = &(DatumGetBoxP(b)->low);
+	uint64		z1;
+	uint64		z2;
+
+	/*
+	 * Do a quick check for equality first. It's not clear if this is worth it
+	 * in general, but certainly is when used as tie-breaker with abbreviated
+	 * keys,
+	 */
+	if (p1->x == p2->x && p1->y == p2->y)
+		return 0;
+
+	z1 = point_zorder_internal(p1->x, p1->y);
+	z2 = point_zorder_internal(p2->x, p2->y);
+	if (z1 > z2)
+		return 1;
+	else if (z1 < z2)
+		return -1;
+	else
+		return 0;
+}
+
+/*
+ * Abbreviated version of Z-order comparison
+ *
+ * The abbreviated format is a Z-order value computed from the two 32-bit
+ * floats. If SIZEOF_DATUM == 8, the 64-bit Z-order value fits fully in the
+ * abbreviated Datum, otherwise use its most significant bits.
+ */
+static Datum
+gist_bbox_zorder_abbrev_convert(Datum original, SortSupport ssup)
+{
+	Point	   *p = &(DatumGetBoxP(original)->low);
+	uint64		z;
+
+	z = point_zorder_internal(p->x, p->y);
+
+#if SIZEOF_DATUM == 8
+	return (Datum) z;
+#else
+	return (Datum) (z >> 32);
+#endif
+}
+
+static int
+gist_bbox_zorder_cmp_abbrev(Datum z1, Datum z2, SortSupport ssup)
+{
+	/*
+	 * Compare the pre-computed Z-orders as unsigned integers. Datum is a
+	 * typedef for 'uintptr_t', so no casting is required.
+	 */
+	if (z1 > z2)
+		return 1;
+	else if (z1 < z2)
+		return -1;
+	else
+		return 0;
+}
+
+/*
+ * We never consider aborting the abbreviation.
+ *
+ * On 64-bit systems, the abbreviation is not lossy so it is always
+ * worthwhile. (Perhaps it's not on 32-bit systems, but we don't bother
+ * with logic to decide.)
+ */
+static bool
+gist_bbox_zorder_abbrev_abort(int memtupcount, SortSupport ssup)
+{
+	return false;
+}
+
+/*
+ * Sort support routine for fast GiST index build by sorting.
+ */
+Datum
+gist_point_sortsupport(PG_FUNCTION_ARGS)
+{
+	SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+	if (ssup->abbreviate)
+	{
+		ssup->comparator = gist_bbox_zorder_cmp_abbrev;
+		ssup->abbrev_converter = gist_bbox_zorder_abbrev_convert;
+		ssup->abbrev_abort = gist_bbox_zorder_abbrev_abort;
+		ssup->abbrev_full_comparator = gist_bbox_zorder_cmp;
+	}
+	else
+	{
+		ssup->comparator = gist_bbox_zorder_cmp;
+	}
+	PG_RETURN_VOID();
+}
diff --git a/src/backend/access/gist/gistutil.c b/src/backend/access/gist/gistutil.c
index 0516059e3dd..615b5ade233 100644
--- a/src/backend/access/gist/gistutil.c
+++ b/src/backend/access/gist/gistutil.c
@@ -572,12 +572,31 @@ gistdentryinit(GISTSTATE *giststate, int nkey, GISTENTRY *e,
 
 IndexTuple
 gistFormTuple(GISTSTATE *giststate, Relation r,
-			  Datum attdata[], bool isnull[], bool isleaf)
+			  Datum *attdata, bool *isnull, bool isleaf)
 {
 	Datum		compatt[INDEX_MAX_KEYS];
-	int			i;
 	IndexTuple	res;
 
+	gistCompressValues(giststate, r, attdata, isnull, isleaf, compatt);
+
+	res = index_form_tuple(isleaf ? giststate->leafTupdesc :
+						   giststate->nonLeafTupdesc,
+						   compatt, isnull);
+
+	/*
+	 * The offset number on tuples on internal pages is unused. For historical
+	 * reasons, it is set to 0xffff.
+	 */
+	ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
+	return res;
+}
+
+void
+gistCompressValues(GISTSTATE *giststate, Relation r,
+				   Datum *attdata, bool *isnull, bool isleaf, Datum *compatt)
+{
+	int			i;
+
 	/*
 	 * Call the compress method on each attribute.
 	 */
@@ -617,17 +636,6 @@ gistFormTuple(GISTSTATE *giststate, Relation r,
 				compatt[i] = attdata[i];
 		}
 	}
-
-	res = index_form_tuple(isleaf ? giststate->leafTupdesc :
-						   giststate->nonLeafTupdesc,
-						   compatt, isnull);
-
-	/*
-	 * The offset number on tuples on internal pages is unused. For historical
-	 * reasons, it is set to 0xffff.
-	 */
-	ItemPointerSetOffsetNumber(&(res->t_tid), 0xffff);
-	return res;
 }
 
 /*
@@ -745,14 +753,11 @@ gistpenalty(GISTSTATE *giststate, int attno,
  * Initialize a new index page
  */
 void
-GISTInitBuffer(Buffer b, uint32 f)
+gistinitpage(Page page, uint32 f)
 {
 	GISTPageOpaque opaque;
-	Page		page;
-	Size		pageSize;
+	Size		pageSize = BLCKSZ;
 
-	pageSize = BufferGetPageSize(b);
-	page = BufferGetPage(b);
 	PageInit(page, pageSize, sizeof(GISTPageOpaqueData));
 
 	opaque = GistPageGetOpaque(page);
@@ -764,6 +769,18 @@ GISTInitBuffer(Buffer b, uint32 f)
 }
 
 /*
+ * Initialize a new index buffer
+ */
+void
+GISTInitBuffer(Buffer b, uint32 f)
+{
+	Page		page;
+
+	page = BufferGetPage(b);
+	gistinitpage(page, f);
+}
+
+/*
  * Verify that a freshly-read page looks sane.
  */
 void
diff --git a/src/backend/access/gist/gistvalidate.c b/src/backend/access/gist/gistvalidate.c
index 2b9ab693be1..8a14620fab2 100644
--- a/src/backend/access/gist/gistvalidate.c
+++ b/src/backend/access/gist/gistvalidate.c
@@ -143,6 +143,10 @@ gistvalidate(Oid opclassoid)
 			case GIST_OPTIONS_PROC:
 				ok = check_amoptsproc_signature(procform->amproc);
 				break;
+			case GIST_SORTSUPPORT_PROC:
+				ok = check_amproc_signature(procform->amproc, VOIDOID, true,
+											1, 1, INTERNALOID);
+				break;
 			default:
 				ereport(INFO,
 						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
@@ -263,7 +267,7 @@ gistvalidate(Oid opclassoid)
 			continue;			/* got it */
 		if (i == GIST_DISTANCE_PROC || i == GIST_FETCH_PROC ||
 			i == GIST_COMPRESS_PROC || i == GIST_DECOMPRESS_PROC ||
-			i == GIST_OPTIONS_PROC)
+			i == GIST_OPTIONS_PROC  || i == GIST_SORTSUPPORT_PROC)
 			continue;			/* optional methods */
 		ereport(INFO,
 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),