aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/transam/xlog.c
diff options
context:
space:
mode:
authorThomas Munro <tmunro@postgresql.org>2023-04-08 11:04:49 +1200
committerThomas Munro <tmunro@postgresql.org>2023-04-08 16:35:07 +1200
commitd4e71df6d757fd21c363164a3a4d3b5681462662 (patch)
tree27db4af292830160ecfe4789645f87d0e5a1daea /src/backend/access/transam/xlog.c
parentfaeedbcefd40bfdf314e048c425b6d9208896d90 (diff)
downloadpostgresql-d4e71df6d757fd21c363164a3a4d3b5681462662.tar.gz
postgresql-d4e71df6d757fd21c363164a3a4d3b5681462662.zip
Add io_direct setting (developer-only).
Provide a way to ask the kernel to use O_DIRECT (or local equivalent) where available for data and WAL files, to avoid or minimize kernel caching. This hurts performance currently and is not intended for end users yet. Later proposed work would introduce our own I/O clustering, read-ahead, etc to replace the facilities the kernel disables with this option. The only user-visible change, if the developer-only GUC is not used, is that this commit also removes the obscure logic that would activate O_DIRECT for the WAL when wal_sync_method=open_[data]sync and wal_level=minimal (which also requires max_wal_senders=0). Those are non-default and unlikely settings, and this behavior wasn't (correctly) documented. The same effect can be achieved with io_direct=wal. Author: Thomas Munro <thomas.munro@gmail.com> Author: Andres Freund <andres@anarazel.de> Author: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Reviewed-by: Justin Pryzby <pryzby@telsasoft.com> Reviewed-by: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Discussion: https://postgr.es/m/CA%2BhUKGK1X532hYqJ_MzFWt0n1zt8trz980D79WbjwnT-yYLZpg%40mail.gmail.com
Diffstat (limited to 'src/backend/access/transam/xlog.c')
-rw-r--r--src/backend/access/transam/xlog.c37
1 files changed, 16 insertions, 21 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index a5c74fdab8c..18e16ae5b3e 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2926,6 +2926,7 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
XLogSegNo max_segno;
int fd;
int save_errno;
+ int open_flags = O_RDWR | O_CREAT | O_EXCL | PG_BINARY;
Assert(logtli != 0);
@@ -2959,8 +2960,11 @@ XLogFileInitInternal(XLogSegNo logsegno, TimeLineID logtli,
unlink(tmppath);
+ if (io_direct_flags & IO_DIRECT_WAL_INIT)
+ open_flags |= PG_O_DIRECT;
+
/* do not use get_sync_bit() here --- want to fsync only at end of fill */
- fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY);
+ fd = BasicOpenFile(tmppath, open_flags);
if (fd < 0)
ereport(ERROR,
(errcode_for_file_access(),
@@ -3354,7 +3358,7 @@ XLogFileClose(void)
* use the cache to read the WAL segment.
*/
#if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED)
- if (!XLogIsNeeded())
+ if (!XLogIsNeeded() && (io_direct_flags & IO_DIRECT_WAL) == 0)
(void) posix_fadvise(openLogFile, 0, 0, POSIX_FADV_DONTNEED);
#endif
@@ -4445,7 +4449,6 @@ show_in_hot_standby(void)
return RecoveryInProgress() ? "on" : "off";
}
-
/*
* Read the control file, set respective GUCs.
*
@@ -8029,35 +8032,27 @@ xlog_redo(XLogReaderState *record)
}
/*
- * Return the (possible) sync flag used for opening a file, depending on the
- * value of the GUC wal_sync_method.
+ * Return the extra open flags used for opening a file, depending on the
+ * value of the GUCs wal_sync_method, fsync and io_direct.
*/
static int
get_sync_bit(int method)
{
int o_direct_flag = 0;
- /* If fsync is disabled, never open in sync mode */
- if (!enableFsync)
- return 0;
-
/*
- * Optimize writes by bypassing kernel cache with O_DIRECT when using
- * O_SYNC and O_DSYNC. But only if archiving and streaming are disabled,
- * otherwise the archive command or walsender process will read the WAL
- * soon after writing it, which is guaranteed to cause a physical read if
- * we bypassed the kernel cache. We also skip the
- * posix_fadvise(POSIX_FADV_DONTNEED) call in XLogFileClose() for the same
- * reason.
- *
- * Never use O_DIRECT in walreceiver process for similar reasons; the WAL
+ * Use O_DIRECT if requested, except in walreceiver process. The WAL
* written by walreceiver is normally read by the startup process soon
- * after it's written. Also, walreceiver performs unaligned writes, which
+ * after it's written. Also, walreceiver performs unaligned writes, which
* don't work with O_DIRECT, so it is required for correctness too.
*/
- if (!XLogIsNeeded() && !AmWalReceiverProcess())
+ if ((io_direct_flags & IO_DIRECT_WAL) && !AmWalReceiverProcess())
o_direct_flag = PG_O_DIRECT;
+ /* If fsync is disabled, never open in sync mode */
+ if (!enableFsync)
+ return o_direct_flag;
+
switch (method)
{
/*
@@ -8069,7 +8064,7 @@ get_sync_bit(int method)
case SYNC_METHOD_FSYNC:
case SYNC_METHOD_FSYNC_WRITETHROUGH:
case SYNC_METHOD_FDATASYNC:
- return 0;
+ return o_direct_flag;
#ifdef O_SYNC
case SYNC_METHOD_OPEN:
return O_SYNC | o_direct_flag;