Allow parallel CREATE INDEX for GIN indexes

Allow using parallel workers to build a GIN index, similarly to BTREE and BRIN. For large tables this may result in significant speedup when the build is CPU-bound. The work is divided so that each worker builds index entries on a subset of the table, determined by the regular parallel scan used to read the data. Each worker uses a local tuplesort to sort and merge the entries for the same key. The TID lists do not overlap (for a given key), which means the merge sort simply concatenates the two lists. The merged entries are written into a shared tuplesort for the leader. The leader needs to merge the sorted entries again, before writing them into the index. But this way a significant part of the work happens in the workers, and the leader is left with merging fewer large entries, which is more efficient. Most of the parallelism infrastructure is a simplified copy of the code used by BTREE indexes, omitting the parts irrelevant for GIN indexes (e.g. uniqueness checks). Original patch by me, with reviews and substantial improvements by Matthias van de Meent, certainly enough to make him a co-author. Author: Tomas Vondra, Matthias van de Meent Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
author: Tomas Vondra <tomas.vondra@postgresql.org> 2025-03-03 16:53:03 +0100
committer: Tomas Vondra <tomas.vondra@postgresql.org> 2025-03-03 16:53:06 +0100
commit: 8492feb98f6df3f0f03e84ed56f0d1cbb2ac514c (patch)
tree: 8b5775ca7cdb77a61c4ada41b45579e50bc3cf35
parent: 3f1db99bfabbb9d4afc41f362d9801512f4c7c65 (diff)
download: postgresql-8492feb98f6df3f0f03e84ed56f0d1cbb2ac514c.tar.gz
postgresql-8492feb98f6df3f0f03e84ed56f0d1cbb2ac514c.zip
9 files changed, 1937 insertions, 16 deletions
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index d1b5e8f0dd1..e399d867e0f 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -15,14 +15,126 @@
 #include "postgres.h"
 
 #include "access/gin_private.h"
+#include "access/gin_tuple.h"
+#include "access/parallel.h"
+#include "access/table.h"
 #include "access/tableam.h"
 #include "access/xloginsert.h"
+#include "catalog/index.h"
+#include "catalog/pg_collation.h"
+#include "commands/progress.h"
 #include "miscadmin.h"
 #include "nodes/execnodes.h"
+#include "pgstat.h"
 #include "storage/bufmgr.h"
 #include "storage/predicate.h"
+#include "tcop/tcopprot.h"		/* pgrminclude ignore */
+#include "utils/datum.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
+#include "utils/builtins.h"
+
+
+/* Magic numbers for parallel state sharing */
+#define PARALLEL_KEY_GIN_SHARED			UINT64CONST(0xB000000000000001)
+#define PARALLEL_KEY_TUPLESORT			UINT64CONST(0xB000000000000002)
+#define PARALLEL_KEY_QUERY_TEXT			UINT64CONST(0xB000000000000003)
+#define PARALLEL_KEY_WAL_USAGE			UINT64CONST(0xB000000000000004)
+#define PARALLEL_KEY_BUFFER_USAGE		UINT64CONST(0xB000000000000005)
+
+/*
+ * Status for index builds performed in parallel.  This is allocated in a
+ * dynamic shared memory segment.
+ */
+typedef struct GinBuildShared
+{
+	/*
+	 * These fields are not modified during the build.  They primarily exist
+	 * for the benefit of worker processes that need to create state
+	 * corresponding to that used by the leader.
+	 */
+	Oid			heaprelid;
+	Oid			indexrelid;
+	bool		isconcurrent;
+	int			scantuplesortstates;
+
+	/*
+	 * workersdonecv is used to monitor the progress of workers.  All parallel
+	 * participants must indicate that they are done before leader can use
+	 * results built by the workers (and before leader can write the data into
+	 * the index).
+	 */
+	ConditionVariable workersdonecv;
+
+	/*
+	 * mutex protects all following fields
+	 *
+	 * These fields contain status information of interest to GIN index builds
+	 * that must work just the same when an index is built in parallel.
+	 */
+	slock_t		mutex;
+
+	/*
+	 * Mutable state that is maintained by workers, and reported back to
+	 * leader at end of the scans.
+	 *
+	 * nparticipantsdone is number of worker processes finished.
+	 *
+	 * reltuples is the total number of input heap tuples.
+	 *
+	 * indtuples is the total number of tuples that made it into the index.
+	 */
+	int			nparticipantsdone;
+	double		reltuples;
+	double		indtuples;
+
+	/*
+	 * ParallelTableScanDescData data follows. Can't directly embed here, as
+	 * implementations of the parallel table scan desc interface might need
+	 * stronger alignment.
+	 */
+} GinBuildShared;
+
+/*
+ * Return pointer to a GinBuildShared's parallel table scan.
+ *
+ * c.f. shm_toc_allocate as to why BUFFERALIGN is used, rather than just
+ * MAXALIGN.
+ */
+#define ParallelTableScanFromGinBuildShared(shared) \
+	(ParallelTableScanDesc) ((char *) (shared) + BUFFERALIGN(sizeof(GinBuildShared)))
+
+/*
+ * Status for leader in parallel index build.
+ */
+typedef struct GinLeader
+{
+	/* parallel context itself */
+	ParallelContext *pcxt;
+
+	/*
+	 * nparticipanttuplesorts is the exact number of worker processes
+	 * successfully launched, plus one leader process if it participates as a
+	 * worker (only DISABLE_LEADER_PARTICIPATION builds avoid leader
+	 * participating as a worker).
+	 */
+	int			nparticipanttuplesorts;
+
+	/*
+	 * Leader process convenience pointers to shared state (leader avoids TOC
+	 * lookups).
+	 *
+	 * GinBuildShared is the shared state for entire build.  sharedsort is the
+	 * shared, tuplesort-managed state passed to each process tuplesort.
+	 * snapshot is the snapshot used by the scan iff an MVCC snapshot is
+	 * required.
+	 */
+	GinBuildShared *ginshared;
+	Sharedsort *sharedsort;
+	Snapshot	snapshot;
+	WalUsage   *walusage;
+	BufferUsage *bufferusage;
+} GinLeader;
 
 typedef struct
 {
@@ -32,9 +144,58 @@ typedef struct
 	MemoryContext tmpCtx;
 	MemoryContext funcCtx;
 	BuildAccumulator accum;
+	ItemPointerData tid;
+	int			work_mem;
+
+	/*
+	 * bs_leader is only present when a parallel index build is performed, and
+	 * only in the leader process.
+	 */
+	GinLeader  *bs_leader;
+	int			bs_worker_id;
+
+	/* used to pass information from workers to leader */
+	double		bs_numtuples;
+	double		bs_reltuples;
+
+	/*
+	 * The sortstate is used by workers (including the leader). It has to be
+	 * part of the build state, because that's the only thing passed to the
+	 * build callback etc.
+	 */
+	Tuplesortstate *bs_sortstate;
+
+	/*
+	 * The sortstate used only within a single worker for the first merge pass
+	 * happenning there. In principle it doesn't need to be part of the build
+	 * state and we could pass it around directly, but it's more convenient
+	 * this way. And it's part of the build state, after all.
+	 */
+	Tuplesortstate *bs_worker_sort;
 } GinBuildState;
 
 
+/* parallel index builds */
+static void _gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
+								bool isconcurrent, int request);
+static void _gin_end_parallel(GinLeader *ginleader, GinBuildState *state);
+static Size _gin_parallel_estimate_shared(Relation heap, Snapshot snapshot);
+static double _gin_parallel_heapscan(GinBuildState *buildstate);
+static double _gin_parallel_merge(GinBuildState *buildstate);
+static void _gin_leader_participate_as_worker(GinBuildState *buildstate,
+											  Relation heap, Relation index);
+static void _gin_parallel_scan_and_build(GinBuildState *buildstate,
+										 GinBuildShared *ginshared,
+										 Sharedsort *sharedsort,
+										 Relation heap, Relation index,
+										 int sortmem, bool progress);
+
+static Datum _gin_parse_tuple(GinTuple *a, ItemPointerData **items);
+static GinTuple *_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
+								  Datum key, int16 typlen, bool typbyval,
+								  ItemPointerData *items, uint32 nitems,
+								  Size *len);
+
 /*
  * Adds array of item pointers to tuple's posting list, or
  * creates posting tree and tuple pointing to tree in case
@@ -313,12 +474,114 @@ ginBuildCallback(Relation index, ItemPointer tid, Datum *values,
 	MemoryContextSwitchTo(oldCtx);
 }
 
+/*
+ * ginFlushBuildState
+ *		Write all data from BuildAccumulator into the tuplesort.
+ */
+static void
+ginFlushBuildState(GinBuildState *buildstate, Relation index)
+{
+	ItemPointerData *list;
+	Datum		key;
+	GinNullCategory category;
+	uint32		nlist;
+	OffsetNumber attnum;
+	TupleDesc	tdesc = RelationGetDescr(index);
+
+	ginBeginBAScan(&buildstate->accum);
+	while ((list = ginGetBAEntry(&buildstate->accum,
+								 &attnum, &key, &category, &nlist)) != NULL)
+	{
+		/* information about the key */
+		Form_pg_attribute attr = TupleDescAttr(tdesc, (attnum - 1));
+
+		/* GIN tuple and tuple length */
+		GinTuple   *tup;
+		Size		tuplen;
+
+		/* there could be many entries, so be willing to abort here */
+		CHECK_FOR_INTERRUPTS();
+
+		tup = _gin_build_tuple(attnum, category,
+							   key, attr->attlen, attr->attbyval,
+							   list, nlist, &tuplen);
+
+		tuplesort_putgintuple(buildstate->bs_worker_sort, tup, tuplen);
+
+		pfree(tup);
+	}
+
+	MemoryContextReset(buildstate->tmpCtx);
+	ginInitBA(&buildstate->accum);
+}
+
+/*
+ * ginBuildCallbackParallel
+ *		Callback for the parallel index build.
+ *
+ * This is similar to the serial build callback ginBuildCallback, but
+ * instead of writing the accumulated entries into the index, each worker
+ * writes them into a (local) tuplesort.
+ *
+ * The worker then sorts and combines these entries, before writing them
+ * into a shared tuplesort for the leader (see _gin_parallel_scan_and_build
+ * for the whole process).
+ */
+static void
+ginBuildCallbackParallel(Relation index, ItemPointer tid, Datum *values,
+						 bool *isnull, bool tupleIsAlive, void *state)
+{
+	GinBuildState *buildstate = (GinBuildState *) state;
+	MemoryContext oldCtx;
+	int			i;
+
+	oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
+
+	/*
+	 * if scan wrapped around - flush accumulated entries and start anew
+	 *
+	 * With parallel scans, we don't have a guarantee the scan does not start
+	 * half-way through the relation (serial builds disable sync scans and
+	 * always start from block 0, parallel scans require allow_sync=true).
+	 *
+	 * Building the posting lists assumes the TIDs are monotonic and never go
+	 * back, and the wrap around would break that. We handle that by detecting
+	 * the wraparound, and flushing all entries. This means we'll later see
+	 * two separate entries with non-overlapping TID lists (which can be
+	 * combined by merge sort).
+	 *
+	 * To detect a wraparound, we remember the last TID seen by each worker
+	 * (for any key). If the next TID seen by the worker is lower, the scan
+	 * must have wrapped around.
+	 */
+	if (ItemPointerCompare(tid, &buildstate->tid) < 0)
+		ginFlushBuildState(buildstate, index);
+
+	/* remember the TID we're about to process */
+	buildstate->tid = *tid;
+
+	for (i = 0; i < buildstate->ginstate.origTupdesc->natts; i++)
+		ginHeapTupleBulkInsert(buildstate, (OffsetNumber) (i + 1),
+							   values[i], isnull[i], tid);
+
+	/*
+	 * If we've maxed out our available memory, dump everything to the
+	 * tuplesort. We use half the per-worker fraction of maintenance_work_mem,
+	 * the other half is used for the tuplesort.
+	 */
+	if (buildstate->accum.allocatedMemory >= buildstate->work_mem * (Size) 1024)
+		ginFlushBuildState(buildstate, index);
+
+	MemoryContextSwitchTo(oldCtx);
+}
+
 IndexBuildResult *
 ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 {
 	IndexBuildResult *result;
 	double		reltuples;
 	GinBuildState buildstate;
+	GinBuildState *state = &buildstate;
 	Buffer		RootBuffer,
 				MetaBuffer;
 	ItemPointerData *list;
@@ -336,6 +599,12 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 	buildstate.indtuples = 0;
 	memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
 
+	/* Initialize fields for parallel build too. */
+	buildstate.bs_numtuples = 0;
+	buildstate.bs_reltuples = 0;
+	buildstate.bs_leader = NULL;
+	memset(&buildstate.tid, 0, sizeof(ItemPointerData));
+
 	/* initialize the meta page */
 	MetaBuffer = GinNewBuffer(index);
 
@@ -375,25 +644,96 @@ ginbuild(Relation heap, Relation index, IndexInfo *indexInfo)
 	buildstate.accum.ginstate = &buildstate.ginstate;
 	ginInitBA(&buildstate.accum);
 
+	/* Report table scan phase started */
+	pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
+								 PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN);
+
 	/*
-	 * Do the heap scan.  We disallow sync scan here because dataPlaceToPage
-	 * prefers to receive tuples in TID order.
+	 * Attempt to launch parallel worker scan when required
+	 *
+	 * XXX plan_create_index_workers makes the number of workers dependent on
+	 * maintenance_work_mem, requiring 32MB for each worker. For GIN that's
+	 * reasonable too, because we sort the data just like btree. It does
+	 * ignore the memory used to accumulate data in memory (set by work_mem),
+	 * but there is no way to communicate that to plan_create_index_workers.
 	 */
-	reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
-									   ginBuildCallback, &buildstate, NULL);
+	if (indexInfo->ii_ParallelWorkers > 0)
+		_gin_begin_parallel(state, heap, index, indexInfo->ii_Concurrent,
+							indexInfo->ii_ParallelWorkers);
 
-	/* dump remaining entries to the index */
-	oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
-	ginBeginBAScan(&buildstate.accum);
-	while ((list = ginGetBAEntry(&buildstate.accum,
-								 &attnum, &key, &category, &nlist)) != NULL)
+	/*
+	 * If parallel build requested and at least one worker process was
+	 * successfully launched, set up coordination state, wait for workers to
+	 * complete. Then read all tuples from the shared tuplesort and insert
+	 * them into the index.
+	 *
+	 * In serial mode, simply scan the table and build the index one index
+	 * tuple at a time.
+	 */
+	if (state->bs_leader)
 	{
-		/* there could be many entries, so be willing to abort here */
-		CHECK_FOR_INTERRUPTS();
-		ginEntryInsert(&buildstate.ginstate, attnum, key, category,
-					   list, nlist, &buildstate.buildStats);
+		SortCoordinate coordinate;
+
+		coordinate = (SortCoordinate) palloc0(sizeof(SortCoordinateData));
+		coordinate->isWorker = false;
+		coordinate->nParticipants =
+			state->bs_leader->nparticipanttuplesorts;
+		coordinate->sharedsort = state->bs_leader->sharedsort;
+
+		/*
+		 * Begin leader tuplesort.
+		 *
+		 * In cases where parallelism is involved, the leader receives the
+		 * same share of maintenance_work_mem as a serial sort (it is
+		 * generally treated in the same way as a serial sort once we return).
+		 * Parallel worker Tuplesortstates will have received only a fraction
+		 * of maintenance_work_mem, though.
+		 *
+		 * We rely on the lifetime of the Leader Tuplesortstate almost not
+		 * overlapping with any worker Tuplesortstate's lifetime.  There may
+		 * be some small overlap, but that's okay because we rely on leader
+		 * Tuplesortstate only allocating a small, fixed amount of memory
+		 * here. When its tuplesort_performsort() is called (by our caller),
+		 * and significant amounts of memory are likely to be used, all
+		 * workers must have already freed almost all memory held by their
+		 * Tuplesortstates (they are about to go away completely, too).  The
+		 * overall effect is that maintenance_work_mem always represents an
+		 * absolute high watermark on the amount of memory used by a CREATE
+		 * INDEX operation, regardless of the use of parallelism or any other
+		 * factor.
+		 */
+		state->bs_sortstate =
+			tuplesort_begin_index_gin(heap, index,
+									  maintenance_work_mem, coordinate,
+									  TUPLESORT_NONE);
+
+		/* scan the relation in parallel and merge per-worker results */
+		reltuples = _gin_parallel_merge(state);
+
+		_gin_end_parallel(state->bs_leader, state);
+	}
+	else						/* no parallel index build */
+	{
+		/*
+		 * Do the heap scan.  We disallow sync scan here because
+		 * dataPlaceToPage prefers to receive tuples in TID order.
+		 */
+		reltuples = table_index_build_scan(heap, index, indexInfo, false, true,
+										   ginBuildCallback, &buildstate, NULL);
+
+		/* dump remaining entries to the index */
+		oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
+		ginBeginBAScan(&buildstate.accum);
+		while ((list = ginGetBAEntry(&buildstate.accum,
+									 &attnum, &key, &category, &nlist)) != NULL)
+		{
+			/* there could be many entries, so be willing to abort here */
+			CHECK_FOR_INTERRUPTS();
+			ginEntryInsert(&buildstate.ginstate, attnum, key, category,
+						   list, nlist, &buildstate.buildStats);
+		}
+		MemoryContextSwitchTo(oldCtx);
 	}
-	MemoryContextSwitchTo(oldCtx);
 
 	MemoryContextDelete(buildstate.funcCtx);
 	MemoryContextDelete(buildstate.tmpCtx);
@@ -533,3 +873,1284 @@ gininsert(Relation index, Datum *values, bool *isnull,
 
 	return false;
 }
+
+/*
+ * Create parallel context, and launch workers for leader.
+ *
+ * buildstate argument should be initialized (with the exception of the
+ * tuplesort states, which may later be created based on shared
+ * state initially set up here).
+ *
+ * isconcurrent indicates if operation is CREATE INDEX CONCURRENTLY.
+ *
+ * request is the target number of parallel worker processes to launch.
+ *
+ * Sets buildstate's GinLeader, which caller must use to shut down parallel
+ * mode by passing it to _gin_end_parallel() at the very end of its index
+ * build.  If not even a single worker process can be launched, this is
+ * never set, and caller should proceed with a serial index build.
+ */
+static void
+_gin_begin_parallel(GinBuildState *buildstate, Relation heap, Relation index,
+					bool isconcurrent, int request)
+{
+	ParallelContext *pcxt;
+	int			scantuplesortstates;
+	Snapshot	snapshot;
+	Size		estginshared;
+	Size		estsort;
+	GinBuildShared *ginshared;
+	Sharedsort *sharedsort;
+	GinLeader  *ginleader = (GinLeader *) palloc0(sizeof(GinLeader));
+	WalUsage   *walusage;
+	BufferUsage *bufferusage;
+	bool		leaderparticipates = true;
+	int			querylen;
+
+#ifdef DISABLE_LEADER_PARTICIPATION
+	leaderparticipates = false;
+#endif
+
+	/*
+	 * Enter parallel mode, and create context for parallel build of gin index
+	 */
+	EnterParallelMode();
+	Assert(request > 0);
+	pcxt = CreateParallelContext("postgres", "_gin_parallel_build_main",
+								 request);
+
+	scantuplesortstates = leaderparticipates ? request + 1 : request;
+
+	/*
+	 * Prepare for scan of the base relation.  In a normal index build, we use
+	 * SnapshotAny because we must retrieve all tuples and do our own time
+	 * qual checks (because we have to index RECENTLY_DEAD tuples).  In a
+	 * concurrent build, we take a regular MVCC snapshot and index whatever's
+	 * live according to that.
+	 */
+	if (!isconcurrent)
+		snapshot = SnapshotAny;
+	else
+		snapshot = RegisterSnapshot(GetTransactionSnapshot());
+
+	/*
+	 * Estimate size for our own PARALLEL_KEY_GIN_SHARED workspace.
+	 */
+	estginshared = _gin_parallel_estimate_shared(heap, snapshot);
+	shm_toc_estimate_chunk(&pcxt->estimator, estginshared);
+	estsort = tuplesort_estimate_shared(scantuplesortstates);
+	shm_toc_estimate_chunk(&pcxt->estimator, estsort);
+
+	shm_toc_estimate_keys(&pcxt->estimator, 2);
+
+	/*
+	 * Estimate space for WalUsage and BufferUsage -- PARALLEL_KEY_WAL_USAGE
+	 * and PARALLEL_KEY_BUFFER_USAGE.
+	 *
+	 * If there are no extensions loaded that care, we could skip this.  We
+	 * have no way of knowing whether anyone's looking at pgWalUsage or
+	 * pgBufferUsage, so do it unconditionally.
+	 */
+	shm_toc_estimate_chunk(&pcxt->estimator,
+						   mul_size(sizeof(WalUsage), pcxt->nworkers));
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+	shm_toc_estimate_chunk(&pcxt->estimator,
+						   mul_size(sizeof(BufferUsage), pcxt->nworkers));
+	shm_toc_estimate_keys(&pcxt->estimator, 1);
+
+	/* Finally, estimate PARALLEL_KEY_QUERY_TEXT space */
+	if (debug_query_string)
+	{
+		querylen = strlen(debug_query_string);
+		shm_toc_estimate_chunk(&pcxt->estimator, querylen + 1);
+		shm_toc_estimate_keys(&pcxt->estimator, 1);
+	}
+	else
+		querylen = 0;			/* keep compiler quiet */
+
+	/* Everyone's had a chance to ask for space, so now create the DSM */
+	InitializeParallelDSM(pcxt);
+
+	/* If no DSM segment was available, back out (do serial build) */
+	if (pcxt->seg == NULL)
+	{
+		if (IsMVCCSnapshot(snapshot))
+			UnregisterSnapshot(snapshot);
+		DestroyParallelContext(pcxt);
+		ExitParallelMode();
+		return;
+	}
+
+	/* Store shared build state, for which we reserved space */
+	ginshared = (GinBuildShared *) shm_toc_allocate(pcxt->toc, estginshared);
+	/* Initialize immutable state */
+	ginshared->heaprelid = RelationGetRelid(heap);
+	ginshared->indexrelid = RelationGetRelid(index);
+	ginshared->isconcurrent = isconcurrent;
+	ginshared->scantuplesortstates = scantuplesortstates;
+
+	ConditionVariableInit(&ginshared->workersdonecv);
+	SpinLockInit(&ginshared->mutex);
+
+	/* Initialize mutable state */
+	ginshared->nparticipantsdone = 0;
+	ginshared->reltuples = 0.0;
+	ginshared->indtuples = 0.0;
+
+	table_parallelscan_initialize(heap,
+								  ParallelTableScanFromGinBuildShared(ginshared),
+								  snapshot);
+
+	/*
+	 * Store shared tuplesort-private state, for which we reserved space.
+	 * Then, initialize opaque state using tuplesort routine.
+	 */
+	sharedsort = (Sharedsort *) shm_toc_allocate(pcxt->toc, estsort);
+	tuplesort_initialize_shared(sharedsort, scantuplesortstates,
+								pcxt->seg);
+
+	shm_toc_insert(pcxt->toc, PARALLEL_KEY_GIN_SHARED, ginshared);
+	shm_toc_insert(pcxt->toc, PARALLEL_KEY_TUPLESORT, sharedsort);
+
+	/* Store query string for workers */
+	if (debug_query_string)
+	{
+		char	   *sharedquery;
+
+		sharedquery = (char *) shm_toc_allocate(pcxt->toc, querylen + 1);
+		memcpy(sharedquery, debug_query_string, querylen + 1);
+		shm_toc_insert(pcxt->toc, PARALLEL_KEY_QUERY_TEXT, sharedquery);
+	}
+
+	/*
+	 * Allocate space for each worker's WalUsage and BufferUsage; no need to
+	 * initialize.
+	 */
+	walusage = shm_toc_allocate(pcxt->toc,
+								mul_size(sizeof(WalUsage), pcxt->nworkers));
+	shm_toc_insert(pcxt->toc, PARALLEL_KEY_WAL_USAGE, walusage);
+	bufferusage = shm_toc_allocate(pcxt->toc,
+								   mul_size(sizeof(BufferUsage), pcxt->nworkers));
+	shm_toc_insert(pcxt->toc, PARALLEL_KEY_BUFFER_USAGE, bufferusage);
+
+	/* Launch workers, saving status for leader/caller */
+	LaunchParallelWorkers(pcxt);
+	ginleader->pcxt = pcxt;
+	ginleader->nparticipanttuplesorts = pcxt->nworkers_launched;
+	if (leaderparticipates)
+		ginleader->nparticipanttuplesorts++;
+	ginleader->ginshared = ginshared;
+	ginleader->sharedsort = sharedsort;
+	ginleader->snapshot = snapshot;
+	ginleader->walusage = walusage;
+	ginleader->bufferusage = bufferusage;
+
+	/* If no workers were successfully launched, back out (do serial build) */
+	if (pcxt->nworkers_launched == 0)
+	{
+		_gin_end_parallel(ginleader, NULL);
+		return;
+	}
+
+	/* Save leader state now that it's clear build will be parallel */
+	buildstate->bs_leader = ginleader;
+
+	/* Join heap scan ourselves */
+	if (leaderparticipates)
+		_gin_leader_participate_as_worker(buildstate, heap, index);
+
+	/*
+	 * Caller needs to wait for all launched workers when we return.  Make
+	 * sure that the failure-to-start case will not hang forever.
+	 */
+	WaitForParallelWorkersToAttach(pcxt);
+}
+
+/*
+ * Shut down workers, destroy parallel context, and end parallel mode.
+ */
+static void
+_gin_end_parallel(GinLeader *ginleader, GinBuildState *state)
+{
+	int			i;
+
+	/* Shutdown worker processes */
+	WaitForParallelWorkersToFinish(ginleader->pcxt);
+
+	/*
+	 * Next, accumulate WAL usage.  (This must wait for the workers to finish,
+	 * or we might get incomplete data.)
+	 */
+	for (i = 0; i < ginleader->pcxt->nworkers_launched; i++)
+		InstrAccumParallelQuery(&ginleader->bufferusage[i], &ginleader->walusage[i]);
+
+	/* Free last reference to MVCC snapshot, if one was used */
+	if (IsMVCCSnapshot(ginleader->snapshot))
+		UnregisterSnapshot(ginleader->snapshot);
+	DestroyParallelContext(ginleader->pcxt);
+	ExitParallelMode();
+}
+
+/*
+ * Within leader, wait for end of heap scan.
+ *
+ * When called, parallel heap scan started by _gin_begin_parallel() will
+ * already be underway within worker processes (when leader participates
+ * as a worker, we should end up here just as workers are finishing).
+ *
+ * Returns the total number of heap tuples scanned.
+ */
+static double
+_gin_parallel_heapscan(GinBuildState *state)
+{
+	GinBuildShared *ginshared = state->bs_leader->ginshared;
+	int			nparticipanttuplesorts;
+
+	nparticipanttuplesorts = state->bs_leader->nparticipanttuplesorts;
+	for (;;)
+	{
+		SpinLockAcquire(&ginshared->mutex);
+		if (ginshared->nparticipantsdone == nparticipanttuplesorts)
+		{
+			/* copy the data into leader state */
+			state->bs_reltuples = ginshared->reltuples;
+			state->bs_numtuples = ginshared->indtuples;
+
+			SpinLockRelease(&ginshared->mutex);
+			break;
+		}
+		SpinLockRelease(&ginshared->mutex);
+
+		ConditionVariableSleep(&ginshared->workersdonecv,
+							   WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN);
+	}
+
+	ConditionVariableCancelSleep();
+
+	return state->bs_reltuples;
+}
+
+/*
+ * Buffer used to accumulate TIDs from multiple GinTuples for the same key
+ * (we read these from the tuplesort, sorted by the key).
+ *
+ * This is similar to BuildAccumulator in that it's used to collect TIDs
+ * in memory before inserting them into the index, but it's much simpler
+ * as it only deals with a single index key at a time.
+ *
+ * When adding TIDs to the buffer, we make sure to keep them sorted, both
+ * during the initial table scan (and detecting when the scan wraps around),
+ * and during merging (where we do mergesort).
+ */
+typedef struct GinBuffer
+{
+	OffsetNumber attnum;
+	GinNullCategory category;
+	Datum		key;			/* 0 if no key (and keylen == 0) */
+	Size		keylen;			/* number of bytes (not typlen) */
+
+	/* type info */
+	int16		typlen;
+	bool		typbyval;
+
+	/* array of TID values */
+	int			nitems;
+	SortSupport ssup;			/* for sorting/comparing keys */
+	ItemPointerData *items;
+} GinBuffer;
+
+/*
+ * Check that TID array contains valid values, and that it's sorted (if we
+ * expect it to be).
+ */
+static void
+AssertCheckItemPointers(GinBuffer *buffer)
+{
+#ifdef USE_ASSERT_CHECKING
+	/* we should not have a buffer with no TIDs to sort */
+	Assert(buffer->items != NULL);
+	Assert(buffer->nitems > 0);
+
+	for (int i = 0; i < buffer->nitems; i++)
+	{
+		Assert(ItemPointerIsValid(&buffer->items[i]));
+
+		/* don't check ordering for the first TID item */
+		if (i == 0)
+			continue;
+
+		Assert(ItemPointerCompare(&buffer->items[i - 1], &buffer->items[i]) < 0);
+	}
+#endif
+}
+
+/*
+ * GinBuffer checks
+ *
+ * Make sure the nitems/items fields are consistent (either the array is empty
+ * or not empty, the fields need to agree). If there are items, check ordering.
+ */
+static void
+AssertCheckGinBuffer(GinBuffer *buffer)
+{
+#ifdef USE_ASSERT_CHECKING
+	/* if we have any items, the array must exist */
+	Assert(!((buffer->nitems > 0) && (buffer->items == NULL)));
+
+	/*
+	 * The buffer may be empty, in which case we must not call the check of
+	 * item pointers, because that assumes non-emptiness.
+	 */
+	if (buffer->nitems == 0)
+		return;
+
+	/* Make sure the item pointers are valid and sorted. */
+	AssertCheckItemPointers(buffer);
+#endif
+}
+
+/*
+ * GinBufferInit
+ *		Initialize buffer to store tuples for a GIN index.
+ *
+ * Initialize the buffer used to accumulate TID for a single key at a time
+ * (we process the data sorted), so we know when we received all data for
+ * a given key.
+ *
+ * Initializes sort support procedures for all index attributes.
+ */
+static GinBuffer *
+GinBufferInit(Relation index)
+{
+	GinBuffer  *buffer = palloc0(sizeof(GinBuffer));
+	int			i,
+				nKeys;
+	TupleDesc	desc = RelationGetDescr(index);
+
+	nKeys = IndexRelationGetNumberOfKeyAttributes(index);
+
+	buffer->ssup = palloc0(sizeof(SortSupportData) * nKeys);
+
+	/*
+	 * Lookup ordering operator for the index key data type, and initialize
+	 * the sort support function.
+	 */
+	for (i = 0; i < nKeys; i++)
+	{
+		Oid			cmpFunc;
+		SortSupport sortKey = &buffer->ssup[i];
+		Form_pg_attribute att = TupleDescAttr(desc, i);
+
+		sortKey->ssup_cxt = CurrentMemoryContext;
+		sortKey->ssup_collation = index->rd_indcollation[i];
+
+		if (!OidIsValid(sortKey->ssup_collation))
+			sortKey->ssup_collation = DEFAULT_COLLATION_OID;
+
+		sortKey->ssup_nulls_first = false;
+		sortKey->ssup_attno = i + 1;
+		sortKey->abbreviate = false;
+
+		Assert(sortKey->ssup_attno != 0);
+
+		/*
+		 * If the compare proc isn't specified in the opclass definition, look
+		 * up the index key type's default btree comparator.
+		 */
+		cmpFunc = index_getprocid(index, i + 1, GIN_COMPARE_PROC);
+		if (cmpFunc == InvalidOid)
+		{
+			TypeCacheEntry *typentry;
+
+			typentry = lookup_type_cache(att->atttypid,
+										 TYPECACHE_CMP_PROC_FINFO);
+			if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_FUNCTION),
+						 errmsg("could not identify a comparison function for type %s",
+								format_type_be(att->atttypid))));
+
+			cmpFunc = typentry->cmp_proc_finfo.fn_oid;
+		}
+
+		PrepareSortSupportComparisonShim(cmpFunc, sortKey);
+	}
+
+	return buffer;
+}
+
+/* Is the buffer empty, i.e. has no TID values in the array? */
+static bool
+GinBufferIsEmpty(GinBuffer *buffer)
+{
+	return (buffer->nitems == 0);
+}
+
+/*
+ * GinBufferKeyEquals
+ *		Can the buffer store TIDs for the provided GIN tuple (same key)?
+ *
+ * Compare if the tuple matches the already accumulated data in the GIN
+ * buffer. Compare scalar fields first, before the actual key.
+ *
+ * Returns true if the key matches, and the TID belonds to the buffer, or
+ * false if the key does not match.
+ */
+static bool
+GinBufferKeyEquals(GinBuffer *buffer, GinTuple *tup)
+{
+	int			r;
+	Datum		tupkey;
+
+	AssertCheckGinBuffer(buffer);
+
+	if (tup->attrnum != buffer->attnum)
+		return false;
+
+	/* same attribute should have the same type info */
+	Assert(tup->typbyval == buffer->typbyval);
+	Assert(tup->typlen == buffer->typlen);
+
+	if (tup->category != buffer->category)
+		return false;
+
+	/*
+	 * For NULL/empty keys, this means equality, for normal keys we need to
+	 * compare the actual key value.
+	 */
+	if (buffer->category != GIN_CAT_NORM_KEY)
+		return true;
+
+	/*
+	 * For the tuple, get either the first sizeof(Datum) bytes for byval
+	 * types, or a pointer to the beginning of the data array.
+	 */
+	tupkey = (buffer->typbyval) ? *(Datum *) tup->data : PointerGetDatum(tup->data);
+
+	r = ApplySortComparator(buffer->key, false,
+							tupkey, false,
+							&buffer->ssup[buffer->attnum - 1]);
+
+	return (r == 0);
+}
+
+/*
+ * GinBufferStoreTuple
+ *		Add data (especially TID list) from a GIN tuple to the buffer.
+ *
+ * The buffer is expected to be empty (in which case it's initialized), or
+ * having the same key. The TID values from the tuple are combined with the
+ * stored values using a merge sort.
+ *
+ * The tuples (for the same key) are expected to be sorted by first TID. But
+ * this does not guarantee the lists do not overlap, especially in the leader,
+ * because the workers process interleaving data. There should be no overlaps
+ * in a single worker - it could happen when the parallel scan wraps around,
+ * but we detect that and flush the data (see ginBuildCallbackParallel).
+ *
+ * By sorting the GinTuple not only by key, but also by the first TID, we make
+ * it more less likely the lists will overlap during merge. We merge them using
+ * mergesort, but it's cheaper to just append one list to the other.
+ *
+ * How often can the lists overlap? There should be no overlaps in workers,
+ * and in the leader we can see overlaps between lists built by different
+ * workers. But the workers merge the items as much as possible, so there
+ * should not be too many.
+ */
+static void
+GinBufferStoreTuple(GinBuffer *buffer, GinTuple *tup)
+{
+	ItemPointerData *items;
+	Datum		key;
+
+	AssertCheckGinBuffer(buffer);
+
+	key = _gin_parse_tuple(tup, &items);
+
+	/* if the buffer is empty, set the fields (and copy the key) */
+	if (GinBufferIsEmpty(buffer))
+	{
+		buffer->category = tup->category;
+		buffer->keylen = tup->keylen;
+		buffer->attnum = tup->attrnum;
+
+		buffer->typlen = tup->typlen;
+		buffer->typbyval = tup->typbyval;
+
+		if (tup->category == GIN_CAT_NORM_KEY)
+			buffer->key = datumCopy(key, buffer->typbyval, buffer->typlen);
+		else
+			buffer->key = (Datum) 0;
+	}
+
+	/* add the new TIDs into the buffer, combine using merge-sort */
+	{
+		int			nnew;
+		ItemPointer new;
+
+		new = ginMergeItemPointers(buffer->items, buffer->nitems,
+								   items, tup->nitems, &nnew);
+
+		Assert(nnew == buffer->nitems + tup->nitems);
+
+		if (buffer->items)
+			pfree(buffer->items);
+
+		buffer->items = new;
+		buffer->nitems = nnew;
+
+		AssertCheckItemPointers(buffer);
+	}
+}
+
+/*
+ * GinBufferReset
+ *		Reset the buffer into a state as if it contains no data.
+ */
+static void
+GinBufferReset(GinBuffer *buffer)
+{
+	Assert(!GinBufferIsEmpty(buffer));
+
+	/* release byref values, do nothing for by-val ones */
+	if ((buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
+		pfree(DatumGetPointer(buffer->key));
+
+	/*
+	 * Not required, but makes it more likely to trigger NULL derefefence if
+	 * using the value incorrectly, etc.
+	 */
+	buffer->key = (Datum) 0;
+
+	buffer->attnum = 0;
+	buffer->category = 0;
+	buffer->keylen = 0;
+	buffer->nitems = 0;
+
+	buffer->typlen = 0;
+	buffer->typbyval = 0;
+}
+
+/*
+ * GinBufferFree
+ *		Release memory associated with the GinBuffer (including TID array).
+ */
+static void
+GinBufferFree(GinBuffer *buffer)
+{
+	if (buffer->items)
+		pfree(buffer->items);
+
+	/* release byref values, do nothing for by-val ones */
+	if (!GinBufferIsEmpty(buffer) &&
+		(buffer->category == GIN_CAT_NORM_KEY) && !buffer->typbyval)
+		pfree(DatumGetPointer(buffer->key));
+
+	pfree(buffer);
+}
+
+/*
+ * GinBufferCanAddKey
+ *		Check if a given GIN tuple can be added to the current buffer.
+ *
+ * Returns true if the buffer is either empty or for the same index key.
+ */
+static bool
+GinBufferCanAddKey(GinBuffer *buffer, GinTuple *tup)
+{
+	/* empty buffer can accept data for any key */
+	if (GinBufferIsEmpty(buffer))
+		return true;
+
+	/* otherwise just data for the same key */
+	return GinBufferKeyEquals(buffer, tup);
+}
+
+/*
+ * Within leader, wait for end of heap scan and merge per-worker results.
+ *
+ * After waiting for all workers to finish, merge the per-worker results into
+ * the complete index. The results from each worker are sorted by block number
+ * (start of the page range). While combinig the per-worker results we merge
+ * summaries for the same page range, and also fill-in empty summaries for
+ * ranges without any tuples.
+ *
+ * Returns the total number of heap tuples scanned.
+ */
+static double
+_gin_parallel_merge(GinBuildState *state)
+{
+	GinTuple   *tup;
+	Size		tuplen;
+	double		reltuples = 0;
+	GinBuffer  *buffer;
+
+	/* GIN tuples from workers, merged by leader */
+	double		numtuples = 0;
+
+	/* wait for workers to scan table and produce partial results */
+	reltuples = _gin_parallel_heapscan(state);
+
+	/* Execute the sort */
+	pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
+								 PROGRESS_GIN_PHASE_PERFORMSORT_2);
+
+	/* do the actual sort in the leader */
+	tuplesort_performsort(state->bs_sortstate);
+
+	/* initialize buffer to combine entries for the same key */
+	buffer = GinBufferInit(state->ginstate.index);
+
+	/*
+	 * Set the progress target for the next phase.  Reset the block number
+	 * values set by table_index_build_scan
+	 */
+	{
+		const int	progress_index[] = {
+			PROGRESS_CREATEIDX_SUBPHASE,
+			PROGRESS_CREATEIDX_TUPLES_TOTAL,
+			PROGRESS_SCAN_BLOCKS_TOTAL,
+			PROGRESS_SCAN_BLOCKS_DONE
+		};
+		const int64 progress_vals[] = {
+			PROGRESS_GIN_PHASE_MERGE_2,
+			state->bs_numtuples,
+			0, 0
+		};
+
+		pgstat_progress_update_multi_param(4, progress_index, progress_vals);
+	}
+
+	/*
+	 * Read the GIN tuples from the shared tuplesort, sorted by category and
+	 * key. That probably gives us order matching how data is organized in the
+	 * index.
+	 *
+	 * We don't insert the GIN tuples right away, but instead accumulate as
+	 * many TIDs for the same key as possible, and then insert that at once.
+	 * This way we don't need to decompress/recompress the posting lists, etc.
+	 */
+	while ((tup = tuplesort_getgintuple(state->bs_sortstate, &tuplen, true)) != NULL)
+	{
+		CHECK_FOR_INTERRUPTS();
+
+		/*
+		 * If the buffer can accept the new GIN tuple, just store it there and
+		 * we're done. If it's a different key (or maybe too much data) flush
+		 * the current contents into the index first.
+		 */
+		if (!GinBufferCanAddKey(buffer, tup))
+		{
+			/*
+			 * Buffer is not empty and it's storing a different key - flush
+			 * the data into the insert, and start a new entry for current
+			 * GinTuple.
+			 */
+			AssertCheckItemPointers(buffer);
+
+			ginEntryInsert(&state->ginstate,
+						   buffer->attnum, buffer->key, buffer->category,
+						   buffer->items, buffer->nitems, &state->buildStats);
+
+			/* discard the existing data */
+			GinBufferReset(buffer);
+		}
+
+		/*
+		 * Remember data for the current tuple (either remember the new key,
+		 * or append if to the existing data).
+		 */
+		GinBufferStoreTuple(buffer, tup);
+
+		/* Report progress */
+		pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
+									 ++numtuples);
+	}
+
+	/* flush data remaining in the buffer (for the last key) */
+	if (!GinBufferIsEmpty(buffer))
+	{
+		AssertCheckItemPointers(buffer);
+
+		ginEntryInsert(&state->ginstate,
+					   buffer->attnum, buffer->key, buffer->category,
+					   buffer->items, buffer->nitems, &state->buildStats);
+
+		/* discard the existing data */
+		GinBufferReset(buffer);
+
+		/* Report progress */
+		pgstat_progress_update_param(PROGRESS_CREATEIDX_TUPLES_DONE,
+									 ++numtuples);
+	}
+
+	/* relase all the memory */
+	GinBufferFree(buffer);
+
+	tuplesort_end(state->bs_sortstate);
+
+	return reltuples;
+}
+
+/*
+ * Returns size of shared memory required to store state for a parallel
+ * gin index build based on the snapshot its parallel scan will use.
+ */
+static Size
+_gin_parallel_estimate_shared(Relation heap, Snapshot snapshot)
+{
+	/* c.f. shm_toc_allocate as to why BUFFERALIGN is used */
+	return add_size(BUFFERALIGN(sizeof(GinBuildShared)),
+					table_parallelscan_estimate(heap, snapshot));
+}
+
+/*
+ * Within leader, participate as a parallel worker.
+ */
+static void
+_gin_leader_participate_as_worker(GinBuildState *buildstate, Relation heap, Relation index)
+{
+	GinLeader  *ginleader = buildstate->bs_leader;
+	int			sortmem;
+
+	/*
+	 * Might as well use reliable figure when doling out maintenance_work_mem
+	 * (when requested number of workers were not launched, this will be
+	 * somewhat higher than it is for other workers).
+	 */
+	sortmem = maintenance_work_mem / ginleader->nparticipanttuplesorts;
+
+	/* Perform work common to all participants */
+	_gin_parallel_scan_and_build(buildstate, ginleader->ginshared,
+								 ginleader->sharedsort, heap, index,
+								 sortmem, true);
+}
+
+/*
+ * _gin_process_worker_data
+ *		First phase of the key merging, happening in the worker.
+ *
+ * Depending on the number of distinct keys, the TID lists produced by the
+ * callback may be very short (due to frequent evictions in the callback).
+ * But combining many tiny lists is expensive, so we try to do as much as
+ * possible in the workers and only then pass the results to the leader.
+ *
+ * We read the tuples sorted by the key, and merge them into larger lists.
+ * At the moment there's no memory limit, so this will just produce one
+ * huge (sorted) list per key in each worker. Which means the leader will
+ * do a very limited number of mergesorts, which is good.
+ */
+static void
+_gin_process_worker_data(GinBuildState *state, Tuplesortstate *worker_sort,
+						 bool progress)
+{
+	GinTuple   *tup;
+	Size		tuplen;
+
+	GinBuffer  *buffer;
+
+	/* initialize buffer to combine entries for the same key */
+	buffer = GinBufferInit(state->ginstate.index);
+
+	/* sort the raw per-worker data */
+	if (progress)
+		pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
+									 PROGRESS_GIN_PHASE_PERFORMSORT_1);
+
+	tuplesort_performsort(state->bs_worker_sort);
+
+	/* reset the number of GIN tuples produced by this worker */
+	state->bs_numtuples = 0;
+
+	if (progress)
+		pgstat_progress_update_param(PROGRESS_CREATEIDX_SUBPHASE,
+									 PROGRESS_GIN_PHASE_MERGE_1);
+
+	/*
+	 * Read the GIN tuples from the shared tuplesort, sorted by the key, and
+	 * merge them into larger chunks for the leader to combine.
+	 */
+	while ((tup = tuplesort_getgintuple(worker_sort, &tuplen, true)) != NULL)
+	{
+
+		CHECK_FOR_INTERRUPTS();
+
+		/*
+		 * If the buffer can accept the new GIN tuple, just store it there and
+		 * we're done. If it's a different key (or maybe too much data) flush
+		 * the current contents into the index first.
+		 */
+		if (!GinBufferCanAddKey(buffer, tup))
+		{
+			GinTuple   *ntup;
+			Size		ntuplen;
+
+			/*
+			 * Buffer is not empty and it's storing a different key - flush
+			 * the data into the insert, and start a new entry for current
+			 * GinTuple.
+			 */
+			AssertCheckItemPointers(buffer);
+
+			ntup = _gin_build_tuple(buffer->attnum, buffer->category,
+									buffer->key, buffer->typlen, buffer->typbyval,
+									buffer->items, buffer->nitems, &ntuplen);
+
+			tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
+			state->bs_numtuples++;
+
+			pfree(ntup);
+
+			/* discard the existing data */
+			GinBufferReset(buffer);
+		}
+
+		/*
+		 * Remember data for the current tuple (either remember the new key,
+		 * or append if to the existing data).
+		 */
+		GinBufferStoreTuple(buffer, tup);
+	}
+
+	/* flush data remaining in the buffer (for the last key) */
+	if (!GinBufferIsEmpty(buffer))
+	{
+		GinTuple   *ntup;
+		Size		ntuplen;
+
+		AssertCheckItemPointers(buffer);
+
+		ntup = _gin_build_tuple(buffer->attnum, buffer->category,
+								buffer->key, buffer->typlen, buffer->typbyval,
+								buffer->items, buffer->nitems, &ntuplen);
+
+		tuplesort_putgintuple(state->bs_sortstate, ntup, ntuplen);
+		state->bs_numtuples++;
+
+		pfree(ntup);
+
+		/* discard the existing data */
+		GinBufferReset(buffer);
+	}
+
+	/* relase all the memory */
+	GinBufferFree(buffer);
+
+	tuplesort_end(worker_sort);
+}
+
+/*
+ * Perform a worker's portion of a parallel GIN index build sort.
+ *
+ * This generates a tuplesort for the worker portion of the table.
+ *
+ * sortmem is the amount of working memory to use within each worker,
+ * expressed in KBs.
+ *
+ * When this returns, workers are done, and need only release resources.
+ *
+ * Before feeding data into a shared tuplesort (for the leader process),
+ * the workers process data in two phases.
+ *
+ * 1) A worker reads a portion of rows from the table, accumulates entries
+ * in memory, and flushes them into a private tuplesort (e.g. because of
+ * using too much memory).
+ *
+ * 2) The private tuplesort gets sorted (by key and TID), the worker reads
+ * the data again, and combines the entries as much as possible. This has
+ * to happen eventually, and this way it's done in workers in parallel.
+ *
+ * Finally, the combined entries are written into the shared tuplesort, so
+ * that the leader can process them.
+ *
+ * How well this works (compared to just writing entries into the shared
+ * tuplesort) depends on the data set. For large tables with many distinct
+ * keys this helps a lot. With many distinct keys it's likely the buffers has
+ * to be flushed often, generating many entries with the same key and short
+ * TID lists. These entries need to be sorted and merged at some point,
+ * before writing them to the index. The merging is quite expensive, it can
+ * easily be ~50% of a serial build, and doing as much of it in the workers
+ * means it's parallelized. The leader still has to merge results from the
+ * workers, but it's much more efficient to merge few large entries than
+ * many tiny ones.
+ *
+ * This also reduces the amount of data the workers pass to the leader through
+ * the shared tuplesort. OTOH the workers need more space for the private sort,
+ * possibly up to 2x of the data, if no entries be merged in a worker. But this
+ * is very unlikely, and the only consequence is inefficiency, so we ignore it.
+ */
+static void
+_gin_parallel_scan_and_build(GinBuildState *state,
+							 GinBuildShared *ginshared, Sharedsort *sharedsort,
+							 Relation heap, Relation index,
+							 int sortmem, bool progress)
+{
+	SortCoordinate coordinate;
+	TableScanDesc scan;
+	double		reltuples;
+	IndexInfo  *indexInfo;
+
+	/* Initialize local tuplesort coordination state */
+	coordinate = palloc0(sizeof(SortCoordinateData));
+	coordinate->isWorker = true;
+	coordinate->nParticipants = -1;
+	coordinate->sharedsort = sharedsort;
+
+	/* remember how much space is allowed for the accumulated entries */
+	state->work_mem = (sortmem / 2);
+
+	/* Begin "partial" tuplesort */
+	state->bs_sortstate = tuplesort_begin_index_gin(heap, index,
+													state->work_mem,
+													coordinate,
+													TUPLESORT_NONE);
+
+	/* Local per-worker sort of raw-data */
+	state->bs_worker_sort = tuplesort_begin_index_gin(heap, index,
+													  state->work_mem,
+													  NULL,
+													  TUPLESORT_NONE);
+
+	/* Join parallel scan */
+	indexInfo = BuildIndexInfo(index);
+	indexInfo->ii_Concurrent = ginshared->isconcurrent;
+
+	scan = table_beginscan_parallel(heap,
+									ParallelTableScanFromGinBuildShared(ginshared));
+
+	reltuples = table_index_build_scan(heap, index, indexInfo, true, progress,
+									   ginBuildCallbackParallel, state, scan);
+
+	/* write remaining accumulated entries */
+	ginFlushBuildState(state, index);
+
+	/*
+	 * Do the first phase of in-worker processing - sort the data produced by
+	 * the callback, and combine them into much larger chunks and place that
+	 * into the shared tuplestore for leader to process.
+	 */
+	_gin_process_worker_data(state, state->bs_worker_sort, progress);
+
+	/* sort the GIN tuples built by this worker */
+	tuplesort_performsort(state->bs_sortstate);
+
+	state->bs_reltuples += reltuples;
+
+	/*
+	 * Done.  Record ambuild statistics.
+	 */
+	SpinLockAcquire(&ginshared->mutex);
+	ginshared->nparticipantsdone++;
+	ginshared->reltuples += state->bs_reltuples;
+	ginshared->indtuples += state->bs_numtuples;
+	SpinLockRelease(&ginshared->mutex);
+
+	/* Notify leader */
+	ConditionVariableSignal(&ginshared->workersdonecv);
+
+	tuplesort_end(state->bs_sortstate);
+}
+
+/*
+ * Perform work within a launched parallel process.
+ */
+void
+_gin_parallel_build_main(dsm_segment *seg, shm_toc *toc)
+{
+	char	   *sharedquery;
+	GinBuildShared *ginshared;
+	Sharedsort *sharedsort;
+	GinBuildState buildstate;
+	Relation	heapRel;
+	Relation	indexRel;
+	LOCKMODE	heapLockmode;
+	LOCKMODE	indexLockmode;
+	WalUsage   *walusage;
+	BufferUsage *bufferusage;
+	int			sortmem;
+
+	/*
+	 * The only possible status flag that can be set to the parallel worker is
+	 * PROC_IN_SAFE_IC.
+	 */
+	Assert((MyProc->statusFlags == 0) ||
+		   (MyProc->statusFlags == PROC_IN_SAFE_IC));
+
+	/* Set debug_query_string for individual workers first */
+	sharedquery = shm_toc_lookup(toc, PARALLEL_KEY_QUERY_TEXT, true);
+	debug_query_string = sharedquery;
+
+	/* Report the query string from leader */
+	pgstat_report_activity(STATE_RUNNING, debug_query_string);
+
+	/* Look up gin shared state */
+	ginshared = shm_toc_lookup(toc, PARALLEL_KEY_GIN_SHARED, false);
+
+	/* Open relations using lock modes known to be obtained by index.c */
+	if (!ginshared->isconcurrent)
+	{
+		heapLockmode = ShareLock;
+		indexLockmode = AccessExclusiveLock;
+	}
+	else
+	{
+		heapLockmode = ShareUpdateExclusiveLock;
+		indexLockmode = RowExclusiveLock;
+	}
+
+	/* Open relations within worker */
+	heapRel = table_open(ginshared->heaprelid, heapLockmode);
+	indexRel = index_open(ginshared->indexrelid, indexLockmode);
+
+	/* initialize the GIN build state */
+	initGinState(&buildstate.ginstate, indexRel);
+	buildstate.indtuples = 0;
+	memset(&buildstate.buildStats, 0, sizeof(GinStatsData));
+	memset(&buildstate.tid, 0, sizeof(ItemPointerData));
+
+	/*
+	 * create a temporary memory context that is used to hold data not yet
+	 * dumped out to the index
+	 */
+	buildstate.tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
+											  "Gin build temporary context",
+											  ALLOCSET_DEFAULT_SIZES);
+
+	/*
+	 * create a temporary memory context that is used for calling
+	 * ginExtractEntries(), and can be reset after each tuple
+	 */
+	buildstate.funcCtx = AllocSetContextCreate(CurrentMemoryContext,
+											   "Gin build temporary context for user-defined function",
+											   ALLOCSET_DEFAULT_SIZES);
+
+	buildstate.accum.ginstate = &buildstate.ginstate;
+	ginInitBA(&buildstate.accum);
+
+
+	/* Look up shared state private to tuplesort.c */
+	sharedsort = shm_toc_lookup(toc, PARALLEL_KEY_TUPLESORT, false);
+	tuplesort_attach_shared(sharedsort, seg);
+
+	/* Prepare to track buffer usage during parallel execution */
+	InstrStartParallelQuery();
+
+	/*
+	 * Might as well use reliable figure when doling out maintenance_work_mem
+	 * (when requested number of workers were not launched, this will be
+	 * somewhat higher than it is for other workers).
+	 */
+	sortmem = maintenance_work_mem / ginshared->scantuplesortstates;
+
+	_gin_parallel_scan_and_build(&buildstate, ginshared, sharedsort,
+								 heapRel, indexRel, sortmem, false);
+
+	/* Report WAL/buffer usage during parallel execution */
+	bufferusage = shm_toc_lookup(toc, PARALLEL_KEY_BUFFER_USAGE, false);
+	walusage = shm_toc_lookup(toc, PARALLEL_KEY_WAL_USAGE, false);
+	InstrEndParallelQuery(&bufferusage[ParallelWorkerNumber],
+						  &walusage[ParallelWorkerNumber]);
+
+	index_close(indexRel, indexLockmode);
+	table_close(heapRel, heapLockmode);
+}
+
+/*
+ * _gin_build_tuple
+ *		Serialize the state for an index key into a tuple for tuplesort.
+ *
+ * The tuple has a number of scalar fields (mostly matching the build state),
+ * and then a data array that stores the key first, and then the TID list.
+ *
+ * For by-reference data types, we store the actual data. For by-val types
+ * we simply copy the whole Datum, so that we don't have to care about stuff
+ * like endianess etc. We could make it a little bit smaller, but it's not
+ * worth it - it's a tiny fraction of the data, and we need to MAXALIGN the
+ * start of the TID list anyway. So we wouldn't save anything.
+ */
+static GinTuple *
+_gin_build_tuple(OffsetNumber attrnum, unsigned char category,
+				 Datum key, int16 typlen, bool typbyval,
+				 ItemPointerData *items, uint32 nitems,
+				 Size *len)
+{
+	GinTuple   *tuple;
+	char	   *ptr;
+
+	Size		tuplen;
+	int			keylen;
+
+	/*
+	 * Calculate how long is the key value. Only keys with GIN_CAT_NORM_KEY
+	 * have actual non-empty key. We include varlena headers and \0 bytes for
+	 * strings, to make it easier to access the data in-line.
+	 *
+	 * For byval types we simply copy the whole Datum. We could store just the
+	 * necessary bytes, but this is simpler to work with and not worth the
+	 * extra complexity. Moreover we still need to do the MAXALIGN to allow
+	 * direct access to items pointers.
+	 *
+	 * XXX Note that for byval types we store the whole datum, no matter what
+	 * the typlen value is.
+	 */
+	if (category != GIN_CAT_NORM_KEY)
+		keylen = 0;
+	else if (typbyval)
+		keylen = sizeof(Datum);
+	else if (typlen > 0)
+		keylen = typlen;
+	else if (typlen == -1)
+		keylen = VARSIZE_ANY(key);
+	else if (typlen == -2)
+		keylen = strlen(DatumGetPointer(key)) + 1;
+	else
+		elog(ERROR, "unexpected typlen value (%d)", typlen);
+
+	/*
+	 * Determine GIN tuple length with all the data included. Be careful about
+	 * alignment, to allow direct access to item pointers.
+	 */
+	tuplen = SHORTALIGN(offsetof(GinTuple, data) + keylen) +
+		(sizeof(ItemPointerData) * nitems);
+
+	*len = tuplen;
+
+	/*
+	 * Allocate space for the whole GIN tuple.
+	 *
+	 * The palloc0 is needed - writetup_index_gin will write the whole tuple
+	 * to disk, so we need to make sure the padding bytes are defined
+	 * (otherwise valgrind would report this).
+	 */
+	tuple = palloc0(tuplen);
+
+	tuple->tuplen = tuplen;
+	tuple->attrnum = attrnum;
+	tuple->category = category;
+	tuple->keylen = keylen;
+	tuple->nitems = nitems;
+
+	/* key type info */
+	tuple->typlen = typlen;
+	tuple->typbyval = typbyval;
+
+	/*
+	 * Copy the key and items into the tuple. First the key value, which we
+	 * can simply copy right at the beginning of the data array.
+	 */
+	if (category == GIN_CAT_NORM_KEY)
+	{
+		if (typbyval)
+		{
+			memcpy(tuple->data, &key, sizeof(Datum));
+		}
+		else if (typlen > 0)	/* byref, fixed length */
+		{
+			memcpy(tuple->data, DatumGetPointer(key), typlen);
+		}
+		else if (typlen == -1)
+		{
+			memcpy(tuple->data, DatumGetPointer(key), keylen);
+		}
+		else if (typlen == -2)
+		{
+			memcpy(tuple->data, DatumGetPointer(key), keylen);
+		}
+	}
+
+	/* finally, copy the TIDs into the array */
+	ptr = (char *) tuple + SHORTALIGN(offsetof(GinTuple, data) + keylen);
+
+	memcpy(ptr, items, sizeof(ItemPointerData) * nitems);
+
+	return tuple;
+}
+
+/*
+ * _gin_parse_tuple
+ *		Deserialize the tuple from the tuplestore representation.
+ *
+ * Most of the fields are actually directly accessible, the only thing that
+ * needs more care is the key and the TID list.
+ *
+ * For the key, this returns a regular Datum representing it. It's either the
+ * actual key value, or a pointer to the beginning of the data array (which is
+ * where the data was copied by _gin_build_tuple).
+ *
+ * The pointer to the TID list is returned through 'items' (which is simply
+ * a pointer to the data array).
+ */
+static Datum
+_gin_parse_tuple(GinTuple *a, ItemPointerData **items)
+{
+	Datum		key;
+
+	if (items)
+	{
+		char	   *ptr = (char *) a + SHORTALIGN(offsetof(GinTuple, data) + a->keylen);
+
+		*items = (ItemPointerData *) ptr;
+	}
+
+	if (a->category != GIN_CAT_NORM_KEY)
+		return (Datum) 0;
+
+	if (a->typbyval)
+	{
+		memcpy(&key, a->data, a->keylen);
+		return key;
+	}
+
+	return PointerGetDatum(a->data);
+}
+
+/*
+ * _gin_compare_tuples
+ *		Compare GIN tuples, used by tuplesort during parallel index build.
+ *
+ * The scalar fields (attrnum, category) are compared first, the key value is
+ * compared last. The comparisons are done using type-specific sort support
+ * functions.
+ *
+ * If the key value matches, we compare the first TID value in the TID list,
+ * which means the tuples are merged in an order in which they are most
+ * likely to be simply concatenated. (This "first" TID will also allow us
+ * to determine a point up to which the list is fully determined and can be
+ * written into the index to enforce a memory limit etc.)
+ */
+int
+_gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup)
+{
+	int			r;
+	Datum		keya,
+				keyb;
+
+	if (a->attrnum < b->attrnum)
+		return -1;
+
+	if (a->attrnum > b->attrnum)
+		return 1;
+
+	if (a->category < b->category)
+		return -1;
+
+	if (a->category > b->category)
+		return 1;
+
+	if (a->category == GIN_CAT_NORM_KEY)
+	{
+		keya = _gin_parse_tuple(a, NULL);
+		keyb = _gin_parse_tuple(b, NULL);
+
+		r = ApplySortComparator(keya, false,
+								keyb, false,
+								&ssup[a->attrnum - 1]);
+
+		/* if the key is the same, consider the first TID in the array */
+		return (r != 0) ? r : ItemPointerCompare(GinTupleGetFirst(a),
+												 GinTupleGetFirst(b));
+	}
+
+	return ItemPointerCompare(GinTupleGetFirst(a),
+							  GinTupleGetFirst(b));
+}
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 5b643619754..0b67108bc34 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -20,6 +20,7 @@
 #include "access/xloginsert.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_type.h"
+#include "commands/progress.h"
 #include "commands/vacuum.h"
 #include "miscadmin.h"
 #include "storage/indexfsm.h"
@@ -55,7 +56,7 @@ ginhandler(PG_FUNCTION_ARGS)
 	amroutine->amclusterable = false;
 	amroutine->ampredlocks = true;
 	amroutine->amcanparallel = false;
-	amroutine->amcanbuildparallel = false;
+	amroutine->amcanbuildparallel = true;
 	amroutine->amcaninclude = false;
 	amroutine->amusemaintenanceworkmem = true;
 	amroutine->amsummarizing = false;
@@ -74,7 +75,7 @@ ginhandler(PG_FUNCTION_ARGS)
 	amroutine->amgettreeheight = NULL;
 	amroutine->amoptions = ginoptions;
 	amroutine->amproperty = NULL;
-	amroutine->ambuildphasename = NULL;
+	amroutine->ambuildphasename = ginbuildphasename;
 	amroutine->amvalidate = ginvalidate;
 	amroutine->amadjustmembers = ginadjustmembers;
 	amroutine->ambeginscan = ginbeginscan;
@@ -702,3 +703,28 @@ ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
 
 	END_CRIT_SECTION();
 }
+
+/*
+ *	ginbuildphasename() -- Return name of index build phase.
+ */
+char *
+ginbuildphasename(int64 phasenum)
+{
+	switch (phasenum)
+	{
+		case PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE:
+			return "initializing";
+		case PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN:
+			return "scanning table";
+		case PROGRESS_GIN_PHASE_PERFORMSORT_1:
+			return "sorting tuples (workers)";
+		case PROGRESS_GIN_PHASE_MERGE_1:
+			return "merging tuples (workers)";
+		case PROGRESS_GIN_PHASE_PERFORMSORT_2:
+			return "sorting tuples";
+		case PROGRESS_GIN_PHASE_MERGE_2:
+			return "merging tuples";
+		default:
+			return NULL;
+	}
+}
diff --git a/src/backend/access/transam/parallel.c b/src/backend/access/transam/parallel.c
index 4ab5df92133..f6d81d6e1fc 100644
--- a/src/backend/access/transam/parallel.c
+++ b/src/backend/access/transam/parallel.c
@@ -15,6 +15,7 @@
 #include "postgres.h"
 
 #include "access/brin.h"
+#include "access/gin.h"
 #include "access/nbtree.h"
 #include "access/parallel.h"
 #include "access/session.h"
@@ -149,6 +150,9 @@ static const struct
 		"_brin_parallel_build_main", _brin_parallel_build_main
 	},
 	{
+		"_gin_parallel_build_main", _gin_parallel_build_main
+	},
+	{
 		"parallel_vacuum_main", parallel_vacuum_main
 	}
 };
diff --git a/src/backend/utils/sort/tuplesortvariants.c b/src/backend/utils/sort/tuplesortvariants.c
index 913c4ef455e..eb8601e2257 100644
--- a/src/backend/utils/sort/tuplesortvariants.c
+++ b/src/backend/utils/sort/tuplesortvariants.c
@@ -20,10 +20,12 @@
 #include "postgres.h"
 
 #include "access/brin_tuple.h"
+#include "access/gin_tuple.h"
 #include "access/hash.h"
 #include "access/htup_details.h"
 #include "access/nbtree.h"
 #include "catalog/index.h"
+#include "catalog/pg_collation.h"
 #include "executor/executor.h"
 #include "pg_trace.h"
 #include "utils/datum.h"
@@ -46,6 +48,8 @@ static void removeabbrev_index(Tuplesortstate *state, SortTuple *stups,
 							   int count);
 static void removeabbrev_index_brin(Tuplesortstate *state, SortTuple *stups,
 									int count);
+static void removeabbrev_index_gin(Tuplesortstate *state, SortTuple *stups,
+								   int count);
 static void removeabbrev_datum(Tuplesortstate *state, SortTuple *stups,
 							   int count);
 static int	comparetup_heap(const SortTuple *a, const SortTuple *b,
@@ -74,6 +78,8 @@ static int	comparetup_index_hash_tiebreak(const SortTuple *a, const SortTuple *b
 										   Tuplesortstate *state);
 static int	comparetup_index_brin(const SortTuple *a, const SortTuple *b,
 								  Tuplesortstate *state);
+static int	comparetup_index_gin(const SortTuple *a, const SortTuple *b,
+								 Tuplesortstate *state);
 static void writetup_index(Tuplesortstate *state, LogicalTape *tape,
 						   SortTuple *stup);
 static void readtup_index(Tuplesortstate *state, SortTuple *stup,
@@ -82,6 +88,10 @@ static void writetup_index_brin(Tuplesortstate *state, LogicalTape *tape,
 								SortTuple *stup);
 static void readtup_index_brin(Tuplesortstate *state, SortTuple *stup,
 							   LogicalTape *tape, unsigned int len);
+static void writetup_index_gin(Tuplesortstate *state, LogicalTape *tape,
+							   SortTuple *stup);
+static void readtup_index_gin(Tuplesortstate *state, SortTuple *stup,
+							  LogicalTape *tape, unsigned int len);
 static int	comparetup_datum(const SortTuple *a, const SortTuple *b,
 							 Tuplesortstate *state);
 static int	comparetup_datum_tiebreak(const SortTuple *a, const SortTuple *b,
@@ -569,6 +579,77 @@ tuplesort_begin_index_brin(int workMem,
 }
 
 Tuplesortstate *
+tuplesort_begin_index_gin(Relation heapRel,
+						  Relation indexRel,
+						  int workMem, SortCoordinate coordinate,
+						  int sortopt)
+{
+	Tuplesortstate *state = tuplesort_begin_common(workMem, coordinate,
+												   sortopt);
+	TuplesortPublic *base = TuplesortstateGetPublic(state);
+	MemoryContext oldcontext;
+	int			i;
+	TupleDesc	desc = RelationGetDescr(indexRel);
+
+	oldcontext = MemoryContextSwitchTo(base->maincontext);
+
+#ifdef TRACE_SORT
+	if (trace_sort)
+		elog(LOG,
+			 "begin index sort: workMem = %d, randomAccess = %c",
+			 workMem,
+			 sortopt & TUPLESORT_RANDOMACCESS ? 't' : 'f');
+#endif
+
+	/*
+	 * Multi-column GIN indexes expand the row into a separate index entry for
+	 * attribute, and that's what we write into the tuplesort. But we still
+	 * need to initialize sortsupport for all the attributes.
+	 */
+	base->nKeys = IndexRelationGetNumberOfKeyAttributes(indexRel);
+
+	/* Prepare SortSupport data for each column */
+	base->sortKeys = (SortSupport) palloc0(base->nKeys *
+										   sizeof(SortSupportData));
+
+	for (i = 0; i < base->nKeys; i++)
+	{
+		SortSupport sortKey = base->sortKeys + i;
+		Form_pg_attribute att = TupleDescAttr(desc, i);
+		TypeCacheEntry *typentry;
+
+		sortKey->ssup_cxt = CurrentMemoryContext;
+		sortKey->ssup_collation = indexRel->rd_indcollation[i];
+		sortKey->ssup_nulls_first = false;
+		sortKey->ssup_attno = i + 1;
+		sortKey->abbreviate = false;
+
+		Assert(sortKey->ssup_attno != 0);
+
+		if (!OidIsValid(sortKey->ssup_collation))
+			sortKey->ssup_collation = DEFAULT_COLLATION_OID;
+
+		/*
+		 * Look for a ordering for the index key data type, and then the sort
+		 * support function.
+		 */
+		typentry = lookup_type_cache(att->atttypid, TYPECACHE_LT_OPR);
+		PrepareSortSupportFromOrderingOp(typentry->lt_opr, sortKey);
+	}
+
+	base->removeabbrev = removeabbrev_index_gin;
+	base->comparetup = comparetup_index_gin;
+	base->writetup = writetup_index_gin;
+	base->readtup = readtup_index_gin;
+	base->haveDatum1 = false;
+	base->arg = NULL;
+
+	MemoryContextSwitchTo(oldcontext);
+
+	return state;
+}
+
+Tuplesortstate *
 tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
 					  bool nullsFirstFlag, int workMem,
 					  SortCoordinate coordinate, int sortopt)
@@ -803,6 +884,37 @@ tuplesort_putbrintuple(Tuplesortstate *state, BrinTuple *tuple, Size size)
 	MemoryContextSwitchTo(oldcontext);
 }
 
+void
+tuplesort_putgintuple(Tuplesortstate *state, GinTuple *tuple, Size size)
+{
+	SortTuple	stup;
+	GinTuple   *ctup;
+	TuplesortPublic *base = TuplesortstateGetPublic(state);
+	MemoryContext oldcontext = MemoryContextSwitchTo(base->tuplecontext);
+	Size		tuplen;
+
+	/* copy the GinTuple into the right memory context */
+	ctup = palloc(size);
+	memcpy(ctup, tuple, size);
+
+	stup.tuple = ctup;
+	stup.datum1 = (Datum) 0;
+	stup.isnull1 = false;
+
+	/* GetMemoryChunkSpace is not supported for bump contexts */
+	if (TupleSortUseBumpTupleCxt(base->sortopt))
+		tuplen = MAXALIGN(size);
+	else
+		tuplen = GetMemoryChunkSpace(ctup);
+
+	tuplesort_puttuple_common(state, &stup,
+							  base->sortKeys &&
+							  base->sortKeys->abbrev_converter &&
+							  !stup.isnull1, tuplen);
+
+	MemoryContextSwitchTo(oldcontext);
+}
+
 /*
  * Accept one Datum while collecting input data for sort.
  *
@@ -975,6 +1087,29 @@ tuplesort_getbrintuple(Tuplesortstate *state, Size *len, bool forward)
 	return &btup->tuple;
 }
 
+GinTuple *
+tuplesort_getgintuple(Tuplesortstate *state, Size *len, bool forward)
+{
+	TuplesortPublic *base = TuplesortstateGetPublic(state);
+	MemoryContext oldcontext = MemoryContextSwitchTo(base->sortcontext);
+	SortTuple	stup;
+	GinTuple   *tup;
+
+	if (!tuplesort_gettuple_common(state, forward, &stup))
+		stup.tuple = NULL;
+
+	MemoryContextSwitchTo(oldcontext);
+
+	if (!stup.tuple)
+		return false;
+
+	tup = (GinTuple *) stup.tuple;
+
+	*len = tup->tuplen;
+
+	return tup;
+}
+
 /*
  * Fetch the next Datum in either forward or back direction.
  * Returns false if no more datums.
@@ -1764,6 +1899,69 @@ readtup_index_brin(Tuplesortstate *state, SortTuple *stup,
 }
 
 /*
+ * Routines specialized for GIN case
+ */
+
+static void
+removeabbrev_index_gin(Tuplesortstate *state, SortTuple *stups, int count)
+{
+	Assert(false);
+	elog(ERROR, "removeabbrev_index_gin not implemented");
+}
+
+static int
+comparetup_index_gin(const SortTuple *a, const SortTuple *b,
+					 Tuplesortstate *state)
+{
+	TuplesortPublic *base = TuplesortstateGetPublic(state);
+
+	Assert(!TuplesortstateGetPublic(state)->haveDatum1);
+
+	return _gin_compare_tuples((GinTuple *) a->tuple,
+							   (GinTuple *) b->tuple,
+							   base->sortKeys);
+}
+
+static void
+writetup_index_gin(Tuplesortstate *state, LogicalTape *tape, SortTuple *stup)
+{
+	TuplesortPublic *base = TuplesortstateGetPublic(state);
+	GinTuple   *tuple = (GinTuple *) stup->tuple;
+	unsigned int tuplen = tuple->tuplen;
+
+	tuplen = tuplen + sizeof(tuplen);
+	LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
+	LogicalTapeWrite(tape, tuple, tuple->tuplen);
+	if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
+		LogicalTapeWrite(tape, &tuplen, sizeof(tuplen));
+}
+
+static void
+readtup_index_gin(Tuplesortstate *state, SortTuple *stup,
+				  LogicalTape *tape, unsigned int len)
+{
+	GinTuple   *tuple;
+	TuplesortPublic *base = TuplesortstateGetPublic(state);
+	unsigned int tuplen = len - sizeof(unsigned int);
+
+	/*
+	 * Allocate space for the GIN sort tuple, which already has the proper
+	 * length included in the header.
+	 */
+	tuple = (GinTuple *) tuplesort_readtup_alloc(state, tuplen);
+
+	tuple->tuplen = tuplen;
+
+	LogicalTapeReadExact(tape, tuple, tuplen);
+	if (base->sortopt & TUPLESORT_RANDOMACCESS) /* need trailing length word? */
+		LogicalTapeReadExact(tape, &tuplen, sizeof(tuplen));
+	stup->tuple = (void *) tuple;
+
+	/* no abbreviations (FIXME maybe use attrnum for this?) */
+	stup->datum1 = (Datum) 0;
+}
+
+/*
  * Routines specialized for DatumTuple case
  */
 
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index 9ed48dfde4b..2e1076a0499 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -12,6 +12,8 @@
 
 #include "access/xlogreader.h"
 #include "lib/stringinfo.h"
+#include "nodes/execnodes.h"
+#include "storage/shm_toc.h"
 #include "storage/block.h"
 #include "utils/relcache.h"
 
@@ -37,6 +39,17 @@
 #define GIN_SEARCH_MODE_EVERYTHING		3	/* for internal use only */
 
 /*
+ * Constant definition for progress reporting.  Phase numbers must match
+ * ginbuildphasename.
+ */
+/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 (see progress.h) */
+#define PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN		2
+#define PROGRESS_GIN_PHASE_PERFORMSORT_1			3
+#define PROGRESS_GIN_PHASE_MERGE_1					4
+#define PROGRESS_GIN_PHASE_PERFORMSORT_2			5
+#define PROGRESS_GIN_PHASE_MERGE_2					6
+
+/*
  * GinStatsData represents stats data for planner use
  */
 typedef struct GinStatsData
@@ -88,4 +101,6 @@ extern void ginGetStats(Relation index, GinStatsData *stats);
 extern void ginUpdateStats(Relation index, const GinStatsData *stats,
 						   bool is_build);
 
+extern void _gin_parallel_build_main(dsm_segment *seg, shm_toc *toc);
+
 #endif							/* GIN_H */
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index 50478db9820..95d8805b66f 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -109,6 +109,7 @@ extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
 extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
 extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple,
 							  GinNullCategory *category);
+extern char *ginbuildphasename(int64 phasenum);
 
 /* gininsert.c */
 extern IndexBuildResult *ginbuild(Relation heap, Relation index,
diff --git a/src/include/access/gin_tuple.h b/src/include/access/gin_tuple.h
new file mode 100644
index 00000000000..ce555031335
--- /dev/null
+++ b/src/include/access/gin_tuple.h
@@ -0,0 +1,44 @@
+/*--------------------------------------------------------------------------
+ * gin.h
+ *	  Public header file for Generalized Inverted Index access method.
+ *
+ *	Copyright (c) 2006-2024, PostgreSQL Global Development Group
+ *
+ *	src/include/access/gin.h
+ *--------------------------------------------------------------------------
+ */
+#ifndef GIN_TUPLE_
+#define GIN_TUPLE_
+
+#include "access/ginblock.h"
+#include "storage/itemptr.h"
+#include "utils/sortsupport.h"
+
+/*
+ * Data for one key in a GIN index.
+ */
+typedef struct GinTuple
+{
+	int			tuplen;			/* length of the whole tuple */
+	OffsetNumber attrnum;		/* attnum of index key */
+	uint16		keylen;			/* bytes in data for key value */
+	int16		typlen;			/* typlen for key */
+	bool		typbyval;		/* typbyval for key */
+	signed char category;		/* category: normal or NULL? */
+	int			nitems;			/* number of TIDs in the data */
+	char		data[FLEXIBLE_ARRAY_MEMBER];
+} GinTuple;
+
+static inline ItemPointer
+GinTupleGetFirst(GinTuple *tup)
+{
+	GinPostingList *list;
+
+	list = (GinPostingList *) SHORTALIGN(tup->data + tup->keylen);
+
+	return &list->first;
+}
+
+extern int	_gin_compare_tuples(GinTuple *a, GinTuple *b, SortSupport ssup);
+
+#endif							/* GIN_TUPLE_H */
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h
index c63f1e5d6da..ef79f259f93 100644
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -22,6 +22,7 @@
 #define TUPLESORT_H
 
 #include "access/brin_tuple.h"
+#include "access/gin_tuple.h"
 #include "access/itup.h"
 #include "executor/tuptable.h"
 #include "storage/dsm.h"
@@ -443,6 +444,10 @@ extern Tuplesortstate *tuplesort_begin_index_gist(Relation heapRel,
 												  int sortopt);
 extern Tuplesortstate *tuplesort_begin_index_brin(int workMem, SortCoordinate coordinate,
 												  int sortopt);
+extern Tuplesortstate *tuplesort_begin_index_gin(Relation heapRel,
+												 Relation indexRel,
+												 int workMem, SortCoordinate coordinate,
+												 int sortopt);
 extern Tuplesortstate *tuplesort_begin_datum(Oid datumType,
 											 Oid sortOperator, Oid sortCollation,
 											 bool nullsFirstFlag,
@@ -456,6 +461,7 @@ extern void tuplesort_putindextuplevalues(Tuplesortstate *state,
 										  Relation rel, ItemPointer self,
 										  const Datum *values, const bool *isnull);
 extern void tuplesort_putbrintuple(Tuplesortstate *state, BrinTuple *tuple, Size size);
+extern void tuplesort_putgintuple(Tuplesortstate *state, GinTuple *tuple, Size size);
 extern void tuplesort_putdatum(Tuplesortstate *state, Datum val,
 							   bool isNull);
 
@@ -465,6 +471,8 @@ extern HeapTuple tuplesort_getheaptuple(Tuplesortstate *state, bool forward);
 extern IndexTuple tuplesort_getindextuple(Tuplesortstate *state, bool forward);
 extern BrinTuple *tuplesort_getbrintuple(Tuplesortstate *state, Size *len,
 										 bool forward);
+extern GinTuple *tuplesort_getgintuple(Tuplesortstate *state, Size *len,
+									   bool forward);
 extern bool tuplesort_getdatum(Tuplesortstate *state, bool forward, bool copy,
 							   Datum *val, bool *isNull, Datum *abbrev);
 
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 56989aa0b84..19ff271ba50 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1032,11 +1032,14 @@ GinBtreeData
 GinBtreeDataLeafInsertData
 GinBtreeEntryInsertData
 GinBtreeStack
+GinBuffer
+GinBuildShared
 GinBuildState
 GinChkVal
 GinEntries
 GinEntryAccumulator
 GinIndexStat
+GinLeader
 GinMetaPageData
 GinNullCategory
 GinOptions
@@ -1052,6 +1055,7 @@ GinScanOpaqueData
 GinState
 GinStatsData
 GinTernaryValue
+GinTuple
 GinTupleCollector
 GinVacuumState
 GistBuildMode
author	Tomas Vondra <tomas.vondra@postgresql.org>	2025-03-03 16:53:03 +0100
committer	Tomas Vondra <tomas.vondra@postgresql.org>	2025-03-03 16:53:06 +0100
commit	8492feb98f6df3f0f03e84ed56f0d1cbb2ac514c (patch)
tree	8b5775ca7cdb77a61c4ada41b45579e50bc3cf35
parent	3f1db99bfabbb9d4afc41f362d9801512f4c7c65 (diff)
download	postgresql-8492feb98f6df3f0f03e84ed56f0d1cbb2ac514c.tar.gz postgresql-8492feb98f6df3f0f03e84ed56f0d1cbb2ac514c.zip