From 7d80e93fb1f1059ba3691b2584bb16992d7d734e Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Thu, 24 Feb 2022 14:34:06 +0200
Subject: Fix data loss on crash after sorted GiST index build.

If a checkpoint happens during the index build, and the system crashes
after the checkpoint and the index build have finished, the data written
to the index before the checkpoint started could be lost. The checkpoint
won't have fsync'd it, and it won't be replayed at crash recovery either.
Fix by calling smgrimmedsync() after the index build, just like in B-tree
index build.

Backpatch to v14 where the sorted GiST index build was introduced.

Reported-by: Melanie Plageman
Discussion: https://www.postgresql.org/message-id/CAAKRu_ZJJynimxKj5xYBSziL62-iEtPE+fx-B=JzR=jUtP92mw@mail.gmail.com
---
 src/backend/access/gist/gistbuild.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'src/backend/access/gist/gistbuild.c')

diff --git a/src/backend/access/gist/gistbuild.c b/src/backend/access/gist/gistbuild.c
index f46a42197c9..ec28bfe89f0 100644
--- a/src/backend/access/gist/gistbuild.c
+++ b/src/backend/access/gist/gistbuild.c
@@ -461,6 +461,21 @@ gist_indexsortbuild(GISTBuildState *state)
 
 	pfree(pagestate->page);
 	pfree(pagestate);
+
+	/*
+	 * When we WAL-logged index pages, we must nonetheless fsync index files.
+	 * Since we're building outside shared buffers, a CHECKPOINT occurring
+	 * during the build has no way to flush the previously written data to
+	 * disk (indeed it won't know the index even exists).  A crash later on
+	 * would replay WAL from the checkpoint, therefore it wouldn't replay our
+	 * earlier WAL entries. If we do not fsync those pages here, they might
+	 * still not be on disk when the crash occurs.
+	 */
+	if (RelationNeedsWAL(state->indexrel))
+	{
+		RelationOpenSmgr(state->indexrel);
+		smgrimmedsync(state->indexrel->rd_smgr, MAIN_FORKNUM);
+	}
 }
 
 /*
-- 
cgit v1.2.3