aboutsummaryrefslogtreecommitdiff
path: root/src/common/file_utils.c
diff options
context:
space:
mode:
authorNathan Bossart <nathan@postgresql.org>2023-09-06 16:27:00 -0700
committerNathan Bossart <nathan@postgresql.org>2023-09-06 16:27:00 -0700
commitcccc6cdeb32f010f1cf777a9e9a85344a4317ab8 (patch)
tree989510ddcfe04a8503da11931e5c043dee189cfc /src/common/file_utils.c
parent3ed19567198ddb34ab32be6ebdf132148287abdf (diff)
downloadpostgresql-cccc6cdeb32f010f1cf777a9e9a85344a4317ab8.tar.gz
postgresql-cccc6cdeb32f010f1cf777a9e9a85344a4317ab8.zip
Add support for syncfs() in frontend support functions.
This commit adds support for using syncfs() in fsync_pgdata() and fsync_dir_recurse() (which have been renamed to sync_pgdata() and sync_dir_recurse()). Like recovery_init_sync_method, sync_pgdata() calls syncfs() for the data directory, each tablespace, and pg_wal (if it is a symlink). For now, all of the frontend utilities that use these support functions are hard-coded to use fsync(), but a follow-up commit will allow specifying syncfs(). Co-authored-by: Justin Pryzby Reviewed-by: Michael Paquier Discussion: https://postgr.es/m/20210930004340.GM831%40telsasoft.com
Diffstat (limited to 'src/common/file_utils.c')
-rw-r--r--src/common/file_utils.c191
1 files changed, 155 insertions, 36 deletions
diff --git a/src/common/file_utils.c b/src/common/file_utils.c
index 74833c4acbb..abe5129412d 100644
--- a/src/common/file_utils.c
+++ b/src/common/file_utils.c
@@ -51,19 +51,52 @@ static void walkdir(const char *path,
int (*action) (const char *fname, bool isdir),
bool process_symlinks);
+#ifdef HAVE_SYNCFS
+
/*
- * Issue fsync recursively on PGDATA and all its contents.
+ * do_syncfs -- Try to syncfs a file system
*
- * We fsync regular files and directories wherever they are, but we follow
+ * Reports errors trying to open the path. syncfs() errors are fatal.
+ */
+static void
+do_syncfs(const char *path)
+{
+ int fd;
+
+ fd = open(path, O_RDONLY, 0);
+
+ if (fd < 0)
+ {
+ pg_log_error("could not open file \"%s\": %m", path);
+ return;
+ }
+
+ if (syncfs(fd) < 0)
+ {
+ pg_log_error("could not synchronize file system for file \"%s\": %m", path);
+ (void) close(fd);
+ exit(EXIT_FAILURE);
+ }
+
+ (void) close(fd);
+}
+
+#endif /* HAVE_SYNCFS */
+
+/*
+ * Synchronize PGDATA and all its contents.
+ *
+ * We sync regular files and directories wherever they are, but we follow
* symlinks only for pg_wal (or pg_xlog) and immediately under pg_tblspc.
* Other symlinks are presumed to point at files we're not responsible for
- * fsyncing, and might not have privileges to write at all.
+ * syncing, and might not have privileges to write at all.
*
- * serverVersion indicates the version of the server to be fsync'd.
+ * serverVersion indicates the version of the server to be sync'd.
*/
void
-fsync_pgdata(const char *pg_data,
- int serverVersion)
+sync_pgdata(const char *pg_data,
+ int serverVersion,
+ DataDirSyncMethod sync_method)
{
bool xlog_is_symlink;
char pg_wal[MAXPGPATH];
@@ -89,49 +122,135 @@ fsync_pgdata(const char *pg_data,
xlog_is_symlink = true;
}
- /*
- * If possible, hint to the kernel that we're soon going to fsync the data
- * directory and its contents.
- */
+ switch (sync_method)
+ {
+ case DATA_DIR_SYNC_METHOD_SYNCFS:
+ {
+#ifndef HAVE_SYNCFS
+ pg_log_error("this build does not support sync method \"%s\"",
+ "syncfs");
+ exit(EXIT_FAILURE);
+#else
+ DIR *dir;
+ struct dirent *de;
+
+ /*
+ * On Linux, we don't have to open every single file one by
+ * one. We can use syncfs() to sync whole filesystems. We
+ * only expect filesystem boundaries to exist where we
+ * tolerate symlinks, namely pg_wal and the tablespaces, so we
+ * call syncfs() for each of those directories.
+ */
+
+ /* Sync the top level pgdata directory. */
+ do_syncfs(pg_data);
+
+ /* If any tablespaces are configured, sync each of those. */
+ dir = opendir(pg_tblspc);
+ if (dir == NULL)
+ pg_log_error("could not open directory \"%s\": %m",
+ pg_tblspc);
+ else
+ {
+ while (errno = 0, (de = readdir(dir)) != NULL)
+ {
+ char subpath[MAXPGPATH * 2];
+
+ if (strcmp(de->d_name, ".") == 0 ||
+ strcmp(de->d_name, "..") == 0)
+ continue;
+
+ snprintf(subpath, sizeof(subpath), "%s/%s",
+ pg_tblspc, de->d_name);
+ do_syncfs(subpath);
+ }
+
+ if (errno)
+ pg_log_error("could not read directory \"%s\": %m",
+ pg_tblspc);
+
+ (void) closedir(dir);
+ }
+
+ /* If pg_wal is a symlink, process that too. */
+ if (xlog_is_symlink)
+ do_syncfs(pg_wal);
+#endif /* HAVE_SYNCFS */
+ }
+ break;
+
+ case DATA_DIR_SYNC_METHOD_FSYNC:
+ {
+ /*
+ * If possible, hint to the kernel that we're soon going to
+ * fsync the data directory and its contents.
+ */
#ifdef PG_FLUSH_DATA_WORKS
- walkdir(pg_data, pre_sync_fname, false);
- if (xlog_is_symlink)
- walkdir(pg_wal, pre_sync_fname, false);
- walkdir(pg_tblspc, pre_sync_fname, true);
+ walkdir(pg_data, pre_sync_fname, false);
+ if (xlog_is_symlink)
+ walkdir(pg_wal, pre_sync_fname, false);
+ walkdir(pg_tblspc, pre_sync_fname, true);
#endif
- /*
- * Now we do the fsync()s in the same order.
- *
- * The main call ignores symlinks, so in addition to specially processing
- * pg_wal if it's a symlink, pg_tblspc has to be visited separately with
- * process_symlinks = true. Note that if there are any plain directories
- * in pg_tblspc, they'll get fsync'd twice. That's not an expected case
- * so we don't worry about optimizing it.
- */
- walkdir(pg_data, fsync_fname, false);
- if (xlog_is_symlink)
- walkdir(pg_wal, fsync_fname, false);
- walkdir(pg_tblspc, fsync_fname, true);
+ /*
+ * Now we do the fsync()s in the same order.
+ *
+ * The main call ignores symlinks, so in addition to specially
+ * processing pg_wal if it's a symlink, pg_tblspc has to be
+ * visited separately with process_symlinks = true. Note that
+ * if there are any plain directories in pg_tblspc, they'll
+ * get fsync'd twice. That's not an expected case so we don't
+ * worry about optimizing it.
+ */
+ walkdir(pg_data, fsync_fname, false);
+ if (xlog_is_symlink)
+ walkdir(pg_wal, fsync_fname, false);
+ walkdir(pg_tblspc, fsync_fname, true);
+ }
+ break;
+ }
}
/*
- * Issue fsync recursively on the given directory and all its contents.
+ * Synchronize the given directory and all its contents.
*
- * This is a convenient wrapper on top of walkdir().
+ * This is a convenient wrapper on top of walkdir() and do_syncfs().
*/
void
-fsync_dir_recurse(const char *dir)
+sync_dir_recurse(const char *dir, DataDirSyncMethod sync_method)
{
- /*
- * If possible, hint to the kernel that we're soon going to fsync the data
- * directory and its contents.
- */
+ switch (sync_method)
+ {
+ case DATA_DIR_SYNC_METHOD_SYNCFS:
+ {
+#ifndef HAVE_SYNCFS
+ pg_log_error("this build does not support sync method \"%s\"",
+ "syncfs");
+ exit(EXIT_FAILURE);
+#else
+ /*
+ * On Linux, we don't have to open every single file one by
+ * one. We can use syncfs() to sync the whole filesystem.
+ */
+ do_syncfs(dir);
+#endif /* HAVE_SYNCFS */
+ }
+ break;
+
+ case DATA_DIR_SYNC_METHOD_FSYNC:
+ {
+ /*
+ * If possible, hint to the kernel that we're soon going to
+ * fsync the data directory and its contents.
+ */
#ifdef PG_FLUSH_DATA_WORKS
- walkdir(dir, pre_sync_fname, false);
+ walkdir(dir, pre_sync_fname, false);
#endif
- walkdir(dir, fsync_fname, false);
+ walkdir(dir, fsync_fname, false);
+ }
+ break;
+ }
}
/*