aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>2024-08-16 14:45:37 +0300
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>2024-08-16 14:45:37 +0300
commit077ad4bd76b12cd4144e40ef5fba3f49528fa5e4 (patch)
tree1f0efb410939b8bfc9ef29484a4b359c07e54069
parent3943da46bc54006ec4849bc7541cf4e674b700eb (diff)
downloadpostgresql-077ad4bd76b12cd4144e40ef5fba3f49528fa5e4.tar.gz
postgresql-077ad4bd76b12cd4144e40ef5fba3f49528fa5e4.zip
Relax fsyncing at end of a bulk load that was not WAL-logged
And improve the comments. Backpatch to v17 where this was introduced. Reviewed-by: Noah Misch Discussion: https://www.postgresql.org/message-id/cac7d1b6-8358-40be-af0b-21bc9b27d34c@iki.fi
-rw-r--r--src/backend/storage/smgr/bulk_write.c72
1 files changed, 61 insertions, 11 deletions
diff --git a/src/backend/storage/smgr/bulk_write.c b/src/backend/storage/smgr/bulk_write.c
index 4a10ece4c39..1a5f3ce96e1 100644
--- a/src/backend/storage/smgr/bulk_write.c
+++ b/src/backend/storage/smgr/bulk_write.c
@@ -132,19 +132,69 @@ smgr_bulk_finish(BulkWriteState *bulkstate)
smgr_bulk_flush(bulkstate);
/*
- * When we wrote out the pages, we passed skipFsync=true to avoid the
- * overhead of registering all the writes with the checkpointer. Register
- * the whole relation now.
- *
- * There is one hole in that idea: If a checkpoint occurred while we were
- * writing the pages, it already missed fsyncing the pages we had written
- * before the checkpoint started. A crash later on would replay the WAL
- * starting from the checkpoint, therefore it wouldn't replay our earlier
- * WAL records. So if a checkpoint started after the bulk write, fsync
- * the files now.
+ * Fsync the relation, or register it for the next checkpoint, if
+ * necessary.
*/
- if (!SmgrIsTemp(bulkstate->smgr))
+ if (SmgrIsTemp(bulkstate->smgr))
{
+ /* Temporary relations don't need to be fsync'd, ever */
+ }
+ else if (!bulkstate->use_wal)
+ {
+ /*----------
+ * This is either an unlogged relation, or a permanent relation but we
+ * skipped WAL-logging because wal_level=minimal:
+ *
+ * A) Unlogged relation
+ *
+ * Unlogged relations will go away on crash, but they need to be
+ * fsync'd on a clean shutdown. It's sufficient to call
+ * smgrregistersync(), that ensures that the checkpointer will
+ * flush it at the shutdown checkpoint. (It will flush it on the
+ * next online checkpoint too, which is not strictly necessary.)
+ *
+ * Note that the init-fork of an unlogged relation is not
+ * considered unlogged for our purposes. It's treated like a
+ * regular permanent relation. The callers will pass use_wal=true
+ * for the init fork.
+ *
+ * B) Permanent relation, WAL-logging skipped because wal_level=minimal
+ *
+ * This is a new relation, and we didn't WAL-log the pages as we
+ * wrote, but they need to be fsync'd before commit.
+ *
+ * We don't need to do that here, however. The fsync() is done at
+ * commit, by smgrDoPendingSyncs() (*).
+ *
+ * (*) smgrDoPendingSyncs() might decide to WAL-log the whole
+ * relation at commit instead of fsyncing it, if the relation was
+ * very small, but it's smgrDoPendingSyncs() responsibility in any
+ * case.
+ *
+ * We cannot distinguish the two here, so conservatively assume it's
+ * an unlogged relation. A permanent relation with wal_level=minimal
+ * would require no actions, see above.
+ */
+ smgrregistersync(bulkstate->smgr, bulkstate->forknum);
+ }
+ else
+ {
+ /*
+ * Permanent relation, WAL-logged normally.
+ *
+ * We already WAL-logged all the pages, so they will be replayed from
+ * WAL on crash. However, when we wrote out the pages, we passed
+ * skipFsync=true to avoid the overhead of registering all the writes
+ * with the checkpointer. Register the whole relation now.
+ *
+ * There is one hole in that idea: If a checkpoint occurred while we
+ * were writing the pages, it already missed fsyncing the pages we had
+ * written before the checkpoint started. A crash later on would
+ * replay the WAL starting from the checkpoint, therefore it wouldn't
+ * replay our earlier WAL records. So if a checkpoint started after
+ * the bulk write, fsync the files now.
+ */
+
/*
* Prevent a checkpoint from starting between the GetRedoRecPtr() and
* smgrregistersync() calls.