aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorNathan Bossart <nathan@postgresql.org>2023-09-06 16:27:00 -0700
committerNathan Bossart <nathan@postgresql.org>2023-09-06 16:27:00 -0700
commitcccc6cdeb32f010f1cf777a9e9a85344a4317ab8 (patch)
tree989510ddcfe04a8503da11931e5c043dee189cfc /src
parent3ed19567198ddb34ab32be6ebdf132148287abdf (diff)
downloadpostgresql-cccc6cdeb32f010f1cf777a9e9a85344a4317ab8.tar.gz
postgresql-cccc6cdeb32f010f1cf777a9e9a85344a4317ab8.zip
Add support for syncfs() in frontend support functions.
This commit adds support for using syncfs() in fsync_pgdata() and fsync_dir_recurse() (which have been renamed to sync_pgdata() and sync_dir_recurse()). Like recovery_init_sync_method, sync_pgdata() calls syncfs() for the data directory, each tablespace, and pg_wal (if it is a symlink). For now, all of the frontend utilities that use these support functions are hard-coded to use fsync(), but a follow-up commit will allow specifying syncfs(). Co-authored-by: Justin Pryzby Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/20210930004340.GM831%40telsasoft.com
Diffstat (limited to 'src')
-rw-r--r--src/bin/initdb/initdb.c7
-rw-r--r--src/bin/pg_basebackup/pg_basebackup.c5
-rw-r--r--src/bin/pg_checksums/pg_checksums.c3
-rw-r--r--src/bin/pg_dump/pg_backup.h4
-rw-r--r--src/bin/pg_dump/pg_backup_archiver.c14
-rw-r--r--src/bin/pg_dump/pg_backup_archiver.h1
-rw-r--r--src/bin/pg_dump/pg_backup_directory.c2
-rw-r--r--src/bin/pg_dump/pg_dump.c3
-rw-r--r--src/bin/pg_rewind/file_ops.c8
-rw-r--r--src/bin/pg_rewind/pg_rewind.c1
-rw-r--r--src/bin/pg_rewind/pg_rewind.h2
-rw-r--r--src/common/file_utils.c191
-rw-r--r--src/include/common/file_utils.h5
13 files changed, 190 insertions, 56 deletions
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 905b979947f..51198e66655 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -165,6 +165,7 @@ static bool show_setting = false;
static bool data_checksums = false;
static char *xlog_dir = NULL;
static int wal_segment_size_mb = (DEFAULT_XLOG_SEG_SIZE) / (1024 * 1024);
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
/* internal vars */
@@ -3322,7 +3323,7 @@ main(int argc, char *argv[])
atexit(cleanup_directories_atexit);
- /* If we only need to fsync, just do it and exit */
+ /* If we only need to sync, just do it and exit */
if (sync_only)
{
setup_pgdata();
@@ -3333,7 +3334,7 @@ main(int argc, char *argv[])
fputs(_("syncing data to disk ... "), stdout);
fflush(stdout);
- fsync_pgdata(pg_data, PG_VERSION_NUM);
+ sync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
check_ok();
return 0;
}
@@ -3396,7 +3397,7 @@ main(int argc, char *argv[])
{
fputs(_("syncing data to disk ... "), stdout);
fflush(stdout);
- fsync_pgdata(pg_data, PG_VERSION_NUM);
+ sync_pgdata(pg_data, PG_VERSION_NUM, sync_method);
check_ok();
}
else
diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c
index 1dc8efe0cb7..e9033af5c03 100644
--- a/src/bin/pg_basebackup/pg_basebackup.c
+++ b/src/bin/pg_basebackup/pg_basebackup.c
@@ -148,6 +148,7 @@ static bool verify_checksums = true;
static bool manifest = true;
static bool manifest_force_encode = false;
static char *manifest_checksums = NULL;
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
static bool success = false;
static bool made_new_pgdata = false;
@@ -2199,11 +2200,11 @@ BaseBackup(char *compression_algorithm, char *compression_detail,
if (format == 't')
{
if (strcmp(basedir, "-") != 0)
- (void) fsync_dir_recurse(basedir);
+ (void) sync_dir_recurse(basedir, sync_method);
}
else
{
- (void) fsync_pgdata(basedir, serverVersion);
+ (void) sync_pgdata(basedir, serverVersion, sync_method);
}
}
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index 9011a19b4ef..836ee654059 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -44,6 +44,7 @@ static char *only_filenode = NULL;
static bool do_sync = true;
static bool verbose = false;
static bool showprogress = false;
+static DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
typedef enum
{
@@ -623,7 +624,7 @@ main(int argc, char *argv[])
if (do_sync)
{
pg_log_info("syncing data directory");
- fsync_pgdata(DataDir, PG_VERSION_NUM);
+ sync_pgdata(DataDir, PG_VERSION_NUM, sync_method);
}
pg_log_info("updating control file");
diff --git a/src/bin/pg_dump/pg_backup.h b/src/bin/pg_dump/pg_backup.h
index aba780ef4b1..3a57cdd97d4 100644
--- a/src/bin/pg_dump/pg_backup.h
+++ b/src/bin/pg_dump/pg_backup.h
@@ -24,6 +24,7 @@
#define PG_BACKUP_H
#include "common/compression.h"
+#include "common/file_utils.h"
#include "fe_utils/simple_list.h"
#include "libpq-fe.h"
@@ -307,7 +308,8 @@ extern Archive *OpenArchive(const char *FileSpec, const ArchiveFormat fmt);
extern Archive *CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
const pg_compress_specification compression_spec,
bool dosync, ArchiveMode mode,
- SetupWorkerPtrType setupDumpWorker);
+ SetupWorkerPtrType setupDumpWorker,
+ DataDirSyncMethod sync_method);
/* The --list option */
extern void PrintTOCSummary(Archive *AHX);
diff --git a/src/bin/pg_dump/pg_backup_archiver.c b/src/bin/pg_dump/pg_backup_archiver.c
index 39ebcfec326..4d83381d840 100644
--- a/src/bin/pg_dump/pg_backup_archiver.c
+++ b/src/bin/pg_dump/pg_backup_archiver.c
@@ -66,7 +66,8 @@ typedef struct _parallelReadyList
static ArchiveHandle *_allocAH(const char *FileSpec, const ArchiveFormat fmt,
const pg_compress_specification compression_spec,
bool dosync, ArchiveMode mode,
- SetupWorkerPtrType setupWorkerPtr);
+ SetupWorkerPtrType setupWorkerPtr,
+ DataDirSyncMethod sync_method);
static void _getObjectDescription(PQExpBuffer buf, const TocEntry *te);
static void _printTocEntry(ArchiveHandle *AH, TocEntry *te, bool isData);
static char *sanitize_line(const char *str, bool want_hyphen);
@@ -238,11 +239,12 @@ Archive *
CreateArchive(const char *FileSpec, const ArchiveFormat fmt,
const pg_compress_specification compression_spec,
bool dosync, ArchiveMode mode,
- SetupWorkerPtrType setupDumpWorker)
+ SetupWorkerPtrType setupDumpWorker,
+ DataDirSyncMethod sync_method)
{
ArchiveHandle *AH = _allocAH(FileSpec, fmt, compression_spec,
- dosync, mode, setupDumpWorker);
+ dosync, mode, setupDumpWorker, sync_method);
return (Archive *) AH;
}
@@ -257,7 +259,8 @@ OpenArchive(const char *FileSpec, const ArchiveFormat fmt)
compression_spec.algorithm = PG_COMPRESSION_NONE;
AH = _allocAH(FileSpec, fmt, compression_spec, true,
- archModeRead, setupRestoreWorker);
+ archModeRead, setupRestoreWorker,
+ DATA_DIR_SYNC_METHOD_FSYNC);
return (Archive *) AH;
}
@@ -2233,7 +2236,7 @@ static ArchiveHandle *
_allocAH(const char *FileSpec, const ArchiveFormat fmt,
const pg_compress_specification compression_spec,
bool dosync, ArchiveMode mode,
- SetupWorkerPtrType setupWorkerPtr)
+ SetupWorkerPtrType setupWorkerPtr, DataDirSyncMethod sync_method)
{
ArchiveHandle *AH;
CompressFileHandle *CFH;
@@ -2287,6 +2290,7 @@ _allocAH(const char *FileSpec, const ArchiveFormat fmt,
AH->mode = mode;
AH->compression_spec = compression_spec;
AH->dosync = dosync;
+ AH->sync_method = sync_method;
memset(&(AH->sqlparse), 0, sizeof(AH->sqlparse));
diff --git a/src/bin/pg_dump/pg_backup_archiver.h b/src/bin/pg_dump/pg_backup_archiver.h
index 18b38c17abc..b07673933d4 100644
--- a/src/bin/pg_dump/pg_backup_archiver.h
+++ b/src/bin/pg_dump/pg_backup_archiver.h
@@ -312,6 +312,7 @@ struct _archiveHandle
pg_compress_specification compression_spec; /* Requested specification for
* compression */
bool dosync; /* data requested to be synced on sight */
+ DataDirSyncMethod sync_method;
ArchiveMode mode; /* File mode - r or w */
void *formatData; /* Header data specific to file format */
diff --git a/src/bin/pg_dump/pg_backup_directory.c b/src/bin/pg_dump/pg_backup_directory.c
index 7f2ac7c7fd1..679c60420bd 100644
--- a/src/bin/pg_dump/pg_backup_directory.c
+++ b/src/bin/pg_dump/pg_backup_directory.c
@@ -613,7 +613,7 @@ _CloseArchive(ArchiveHandle *AH)
* individually. Just recurse once through all the files generated.
*/
if (AH->dosync)
- fsync_dir_recurse(ctx->directory);
+ sync_dir_recurse(ctx->directory, AH->sync_method);
}
AH->FH = NULL;
}
diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index cebd2400fd1..280662bc332 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -357,6 +357,7 @@ main(int argc, char **argv)
char *compression_algorithm_str = "none";
char *error_detail = NULL;
bool user_compression_defined = false;
+ DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
static DumpOptions dopt;
@@ -777,7 +778,7 @@ main(int argc, char **argv)
/* Open the output file */
fout = CreateArchive(filename, archiveFormat, compression_spec,
- dosync, archiveMode, setupDumpWorker);
+ dosync, archiveMode, setupDumpWorker, sync_method);
/* Make dump options accessible right away */
SetArchiveOptions(fout, &dopt, NULL);
diff --git a/src/bin/pg_rewind/file_ops.c b/src/bin/pg_rewind/file_ops.c
index 25996b4da47..dd226859329 100644
--- a/src/bin/pg_rewind/file_ops.c
+++ b/src/bin/pg_rewind/file_ops.c
@@ -286,9 +286,9 @@ remove_target_symlink(const char *path)
*
* We do this once, for the whole data directory, for performance reasons. At
* the end of pg_rewind's run, the kernel is likely to already have flushed
- * most dirty buffers to disk. Additionally fsync_pgdata uses a two-pass
- * approach (only initiating writeback in the first pass), which often reduces
- * the overall amount of IO noticeably.
+ * most dirty buffers to disk. Additionally sync_pgdata uses a two-pass
+ * approach when fsync is specified (only initiating writeback in the first
+ * pass), which often reduces the overall amount of IO noticeably.
*/
void
sync_target_dir(void)
@@ -296,7 +296,7 @@ sync_target_dir(void)
if (!do_sync || dry_run)
return;
- fsync_pgdata(datadir_target, PG_VERSION_NUM);
+ sync_pgdata(datadir_target, PG_VERSION_NUM, sync_method);
}
diff --git a/src/bin/pg_rewind/pg_rewind.c b/src/bin/pg_rewind/pg_rewind.c
index 7f69f024412..bdfacf32632 100644
--- a/src/bin/pg_rewind/pg_rewind.c
+++ b/src/bin/pg_rewind/pg_rewind.c
@@ -74,6 +74,7 @@ bool showprogress = false;
bool dry_run = false;
bool do_sync = true;
bool restore_wal = false;
+DataDirSyncMethod sync_method = DATA_DIR_SYNC_METHOD_FSYNC;
/* Target history */
TimeLineHistoryEntry *targetHistory;
diff --git a/src/bin/pg_rewind/pg_rewind.h b/src/bin/pg_rewind/pg_rewind.h
index ef8bdc1fbb8..05729adfef3 100644
--- a/src/bin/pg_rewind/pg_rewind.h
+++ b/src/bin/pg_rewind/pg_rewind.h
@@ -13,6 +13,7 @@
#include "access/timeline.h"
#include "common/logging.h"
+#include "common/file_utils.h"
#include "datapagemap.h"
#include "libpq-fe.h"
#include "storage/block.h"
@@ -24,6 +25,7 @@ extern bool showprogress;
extern bool dry_run;
extern bool do_sync;
extern int WalSegSz;
+extern DataDirSyncMethod sync_method;
/* Target history */
extern TimeLineHistoryEntry *targetHistory;
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 74833c4acbb..abe5129412d 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -51,19 +51,52 @@ static void walkdir(const char *path,
int (*action) (const char *fname, bool isdir),
bool process_symlinks);
+#ifdef HAVE_SYNCFS
+
/*
- * Issue fsync recursively on PGDATA and all its contents.
+ * do_syncfs -- Try to syncfs a file system
*
- * We fsync regular files and directories wherever they are, but we follow
+ * Reports errors trying to open the path. syncfs() errors are fatal.
+ */
+static void
+do_syncfs(const char *path)
+{
+ int fd;
+
+ fd = open(path, O_RDONLY, 0);
+
+ if (fd < 0)
+ {
+ pg_log_error("could not open file \"%s\": %m", path);
+ return;
+ }
+
+ if (syncfs(fd) < 0)
+ {
+ pg_log_error("could not synchronize file system for file \"%s\": %m", path);
+ (void) close(fd);
+ exit(EXIT_FAILURE);
+ }
+
+ (void) close(fd);
+}
+
+#endif /* HAVE_SYNCFS */
+
+/*
+ * Synchronize PGDATA and all its contents.
+ *
+ * We sync regular files and directories wherever they are, but we follow
* symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc.
* Other symlinks are presumed to point at files we're not responsible for
- * fsyncing, and might not have privileges to write at all.
+ * syncing, and might not have privileges to write at all.
*
- * serverVersion indicates the version of the server to be fsync'd.
+ * serverVersion indicates the version of the server to be sync'd.
*/
void
-fsync_pgdata(const char *pg_data,
- int serverVersion)
+sync_pgdata(const char *pg_data,
+ int serverVersion,
+ DataDirSyncMethod sync_method)
{
bool xlog_is_symlink;
char pg_wal[MAXPGPATH];
@@ -89,49 +122,135 @@ fsync_pgdata(const char *pg_data,
xlog_is_symlink = true;
}
- /*
- * If possible, hint to the kernel that we're soon going to fsync the data
- * directory and its contents.
- */
+ switch (sync_method)
+ {
+ case DATA_DIR_SYNC_METHOD_SYNCFS:
+ {
+#ifndef HAVE_SYNCFS
+ pg_log_error("this build does not support sync method \"%s\"",
+ "syncfs");
+ exit(EXIT_FAILURE);
+#else
+ DIR *dir;
+ struct dirent *de;
+
+ /*
+ * On Linux, we don't have to open every single file one by
+ * one. We can use syncfs() to sync whole filesystems. We
+ * only expect filesystem boundaries to exist where we
+ * tolerate symlinks, namely pg_wal and the tablespaces, so we
+ * call syncfs() for each of those directories.
+ */
+
+ /* Sync the top level pgdata directory. */
+ do_syncfs(pg_data);
+
+ /* If any tablespaces are configured, sync each of those. */
+ dir = opendir(pg_tblspc);
+ if (dir == NULL)
+ pg_log_error("could not open directory \"%s\": %m",
+ pg_tblspc);
+ else
+ {
+ while (errno = 0, (de = readdir(dir)) != NULL)
+ {
+ char subpath[MAXPGPATH * 2];
+
+ if (strcmp(de->d_name, ".") == 0 ||
+ strcmp(de->d_name, "..") == 0)
+ continue;
+
+ snprintf(subpath, sizeof(subpath), "%s/%s",
+ pg_tblspc, de->d_name);
+ do_syncfs(subpath);
+ }
+
+ if (errno)
+ pg_log_error("could not read directory \"%s\": %m",
+ pg_tblspc);
+
+ (void) closedir(dir);
+ }
+
+ /* If pg_wal is a symlink, process that too. */
+ if (xlog_is_symlink)
+ do_syncfs(pg_wal);
+#endif /* HAVE_SYNCFS */
+ }
+ break;
+
+ case DATA_DIR_SYNC_METHOD_FSYNC:
+ {
+ /*
+ * If possible, hint to the kernel that we're soon going to
+ * fsync the data directory and its contents.
+ */
#ifdef PG_FLUSH_DATA_WORKS
- walkdir(pg_data, pre_sync_fname, false);
- if (xlog_is_symlink)
- walkdir(pg_wal, pre_sync_fname, false);
- walkdir(pg_tblspc, pre_sync_fname, true);
+ walkdir(pg_data, pre_sync_fname, false);
+ if (xlog_is_symlink)
+ walkdir(pg_wal, pre_sync_fname, false);
+ walkdir(pg_tblspc, pre_sync_fname, true);
#endif
- /*
- * Now we do the fsync()s in the same order.
- *
- * The main call ignores symlinks, so in addition to specially processing
- * pg_wal if it's a symlink, pg_tblspc has to be visited separately with
- * process_symlinks = true. Note that if there are any plain directories
- * in pg_tblspc, they'll get fsync'd twice. That's not an expected case
- * so we don't worry about optimizing it.
- */
- walkdir(pg_data, fsync_fname, false);
- if (xlog_is_symlink)
- walkdir(pg_wal, fsync_fname, false);
- walkdir(pg_tblspc, fsync_fname, true);
+ /*
+ * Now we do the fsync()s in the same order.
+ *
+ * The main call ignores symlinks, so in addition to specially
+ * processing pg_wal if it's a symlink, pg_tblspc has to be
+ * visited separately with process_symlinks = true. Note that
+ * if there are any plain directories in pg_tblspc, they'll
+ * get fsync'd twice. That's not an expected case so we don't
+ * worry about optimizing it.
+ */
+ walkdir(pg_data, fsync_fname, false);
+ if (xlog_is_symlink)
+ walkdir(pg_wal, fsync_fname, false);
+ walkdir(pg_tblspc, fsync_fname, true);
+ }
+ break;
+ }
}
/*
- * Issue fsync recursively on the given directory and all its contents.
+ * Synchronize the given directory and all its contents.
*
- * This is a convenient wrapper on top of walkdir().
+ * This is a convenient wrapper on top of walkdir() and do_syncfs().
*/
void
-fsync_dir_recurse(const char *dir)
+sync_dir_recurse(const char *dir, DataDirSyncMethod sync_method)
{
- /*
- * If possible, hint to the kernel that we're soon going to fsync the data
- * directory and its contents.
- */
+ switch (sync_method)
+ {
+ case DATA_DIR_SYNC_METHOD_SYNCFS:
+ {
+#ifndef HAVE_SYNCFS
+ pg_log_error("this build does not support sync method \"%s\"",
+ "syncfs");
+ exit(EXIT_FAILURE);
+#else
+ /*
+ * On Linux, we don't have to open every single file one by
+ * one. We can use syncfs() to sync the whole filesystem.
+ */
+ do_syncfs(dir);
+#endif /* HAVE_SYNCFS */
+ }
+ break;
+
+ case DATA_DIR_SYNC_METHOD_FSYNC:
+ {
+ /*
+ * If possible, hint to the kernel that we're soon going to
+ * fsync the data directory and its contents.
+ */
#ifdef PG_FLUSH_DATA_WORKS
- walkdir(dir, pre_sync_fname, false);
+ walkdir(dir, pre_sync_fname, false);
#endif
- walkdir(dir, fsync_fname, false);
+ walkdir(dir, fsync_fname, false);
+ }
+ break;
+ }
}
/*
diff --git a/src/include/common/file_utils.h b/src/include/common/file_utils.h
index 7da21f15e6e..49d523e611f 100644
--- a/src/include/common/file_utils.h
+++ b/src/include/common/file_utils.h
@@ -34,8 +34,9 @@ struct iovec; /* avoid including port/pg_iovec.h here */
#ifdef FRONTEND
extern int fsync_fname(const char *fname, bool isdir);
-extern void fsync_pgdata(const char *pg_data, int serverVersion);
-extern void fsync_dir_recurse(const char *dir);
+extern void sync_pgdata(const char *pg_data, int serverVersion,
+ DataDirSyncMethod sync_method);
+extern void sync_dir_recurse(const char *dir, DataDirSyncMethod sync_method);
extern int durable_rename(const char *oldfile, const char *newfile);
extern int fsync_parent_path(const char *fname);
#endif