Allow parallel CREATE INDEX for GIN indexes

Allow using parallel workers to build a GIN index, similarly to BTREE and BRIN. For large tables this may result in significant speedup when the build is CPU-bound. The work is divided so that each worker builds index entries on a subset of the table, determined by the regular parallel scan used to read the data. Each worker uses a local tuplesort to sort and merge the entries for the same key. The TID lists do not overlap (for a given key), which means the merge sort simply concatenates the two lists. The merged entries are written into a shared tuplesort for the leader. The leader needs to merge the sorted entries again, before writing them into the index. But this way a significant part of the work happens in the workers, and the leader is left with merging fewer large entries, which is more efficient. Most of the parallelism infrastructure is a simplified copy of the code used by BTREE indexes, omitting the parts irrelevant for GIN indexes (e.g. uniqueness checks). Original patch by me, with reviews and substantial improvements by Matthias van de Meent, certainly enough to make him a co-author. Author: Tomas Vondra, Matthias van de Meent Reviewed-by: Matthias van de Meent, Andy Fan, Kirill Reshke Discussion: https://postgr.es/m/6ab4003f-a8b8-4d75-a67f-f25ad98582dc%40enterprisedb.com
author: Tomas Vondra <tomas.vondra@postgresql.org> 2025-03-03 16:53:03 +0100
committer: Tomas Vondra <tomas.vondra@postgresql.org> 2025-03-03 16:53:06 +0100
commit: 8492feb98f6df3f0f03e84ed56f0d1cbb2ac514c (patch)
tree: 8b5775ca7cdb77a61c4ada41b45579e50bc3cf35 /src/backend/access/gin/ginutil.c
parent: 3f1db99bfabbb9d4afc41f362d9801512f4c7c65 (diff)
download: postgresql-8492feb98f6df3f0f03e84ed56f0d1cbb2ac514c.tar.gz
postgresql-8492feb98f6df3f0f03e84ed56f0d1cbb2ac514c.zip
1 files changed, 28 insertions, 2 deletions
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 5b643619754..0b67108bc34 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -20,6 +20,7 @@
 #include "access/xloginsert.h"
 #include "catalog/pg_collation.h"
 #include "catalog/pg_type.h"
+#include "commands/progress.h"
 #include "commands/vacuum.h"
 #include "miscadmin.h"
 #include "storage/indexfsm.h"
@@ -55,7 +56,7 @@ ginhandler(PG_FUNCTION_ARGS)
 	amroutine->amclusterable = false;
 	amroutine->ampredlocks = true;
 	amroutine->amcanparallel = false;
-	amroutine->amcanbuildparallel = false;
+	amroutine->amcanbuildparallel = true;
 	amroutine->amcaninclude = false;
 	amroutine->amusemaintenanceworkmem = true;
 	amroutine->amsummarizing = false;
@@ -74,7 +75,7 @@ ginhandler(PG_FUNCTION_ARGS)
 	amroutine->amgettreeheight = NULL;
 	amroutine->amoptions = ginoptions;
 	amroutine->amproperty = NULL;
-	amroutine->ambuildphasename = NULL;
+	amroutine->ambuildphasename = ginbuildphasename;
 	amroutine->amvalidate = ginvalidate;
 	amroutine->amadjustmembers = ginadjustmembers;
 	amroutine->ambeginscan = ginbeginscan;
@@ -702,3 +703,28 @@ ginUpdateStats(Relation index, const GinStatsData *stats, bool is_build)
 
 	END_CRIT_SECTION();
 }
+
+/*
+ *	ginbuildphasename() -- Return name of index build phase.
+ */
+char *
+ginbuildphasename(int64 phasenum)
+{
+	switch (phasenum)
+	{
+		case PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE:
+			return "initializing";
+		case PROGRESS_GIN_PHASE_INDEXBUILD_TABLESCAN:
+			return "scanning table";
+		case PROGRESS_GIN_PHASE_PERFORMSORT_1:
+			return "sorting tuples (workers)";
+		case PROGRESS_GIN_PHASE_MERGE_1:
+			return "merging tuples (workers)";
+		case PROGRESS_GIN_PHASE_PERFORMSORT_2:
+			return "sorting tuples";
+		case PROGRESS_GIN_PHASE_MERGE_2:
+			return "merging tuples";
+		default:
+			return NULL;
+	}
+}
author	Tomas Vondra <tomas.vondra@postgresql.org>	2025-03-03 16:53:03 +0100
committer	Tomas Vondra <tomas.vondra@postgresql.org>	2025-03-03 16:53:06 +0100
commit	8492feb98f6df3f0f03e84ed56f0d1cbb2ac514c (patch)
tree	8b5775ca7cdb77a61c4ada41b45579e50bc3cf35 /src/backend/access/gin/ginutil.c
parent	3f1db99bfabbb9d4afc41f362d9801512f4c7c65 (diff)
download	postgresql-8492feb98f6df3f0f03e84ed56f0d1cbb2ac514c.tar.gz postgresql-8492feb98f6df3f0f03e84ed56f0d1cbb2ac514c.zip