diff options
author | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2010-02-19 10:51:04 +0000 |
---|---|---|
committer | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2010-02-19 10:51:04 +0000 |
commit | ad458cfe81bcefd6d8bd17ff2e42c6599d441bd6 (patch) | |
tree | a4e6d3c89e74fbd65b03103d7a4a575f96f3a6fa /src/backend/access/transam/xlog.c | |
parent | 94f610b16342d7727774f6bb9245341cfa6f895c (diff) | |
download | postgresql-ad458cfe81bcefd6d8bd17ff2e42c6599d441bd6.tar.gz postgresql-ad458cfe81bcefd6d8bd17ff2e42c6599d441bd6.zip |
Don't use O_DIRECT when writing WAL files if archiving or streaming is
enabled. Bypassing the kernel cache is counter-productive in that case,
because the archiver/walsender process will read from the WAL file
soon after it's written, and if it's not cached the read will cause
a physical read, eating I/O bandwidth available on the WAL drive.
Also, walreceiver process does unaligned writes, so disable O_DIRECT
in walreceiver process for that reason too.
Diffstat (limited to 'src/backend/access/transam/xlog.c')
-rw-r--r-- | src/backend/access/transam/xlog.c | 32 |
1 files changed, 24 insertions, 8 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 106d39b7601..046d80fa95d 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.376 2010/02/19 01:04:03 itagaki Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.377 2010/02/19 10:51:03 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -2686,13 +2686,10 @@ XLogFileClose(void) * WAL segment files will not be re-read in normal operation, so we advise * the OS to release any cached pages. But do not do so if WAL archiving * or streaming is active, because archiver and walsender process could use - * the cache to read the WAL segment. Also, don't bother with it if we - * are using O_DIRECT, since the kernel is presumably not caching in that - * case. + * the cache to read the WAL segment. */ #if defined(USE_POSIX_FADVISE) && defined(POSIX_FADV_DONTNEED) - if (!XLogIsNeeded() && - (get_sync_bit(sync_method) & PG_O_DIRECT) == 0) + if (!XLogIsNeeded()) (void) posix_fadvise(openLogFile, 0, 0, POSIX_FADV_DONTNEED); #endif @@ -7652,10 +7649,29 @@ xlog_outrec(StringInfo buf, XLogRecord *record) static int get_sync_bit(int method) { + int o_direct_flag = 0; + /* If fsync is disabled, never open in sync mode */ if (!enableFsync) return 0; + /* + * Optimize writes by bypassing kernel cache with O_DIRECT when using + * O_SYNC, O_DSYNC or O_FSYNC. But only if archiving and streaming are + * disabled, otherwise the archive command or walsender process will + * read the WAL soon after writing it, which is guaranteed to cause a + * physical read if we bypassed the kernel cache. We also skip the + * posix_fadvise(POSIX_FADV_DONTNEED) call in XLogFileClose() for the + * same reason. + * + * Never use O_DIRECT in walreceiver process for similar reasons; the WAL + * written by walreceiver is normally read by the startup process soon + * after its written. Also, walreceiver performs unaligned writes, which + * don't work with O_DIRECT, so it is required for correctness too. + */ + if (!XLogIsNeeded() && !am_walreceiver) + o_direct_flag = PG_O_DIRECT; + switch (method) { /* @@ -7670,11 +7686,11 @@ get_sync_bit(int method) return 0; #ifdef OPEN_SYNC_FLAG case SYNC_METHOD_OPEN: - return OPEN_SYNC_FLAG; + return OPEN_SYNC_FLAG | o_direct_flag; #endif #ifdef OPEN_DATASYNC_FLAG case SYNC_METHOD_OPEN_DSYNC: - return OPEN_DATASYNC_FLAG; + return OPEN_DATASYNC_FLAG | o_direct_flag; #endif default: /* can't happen (unless we are out of sync with option array) */ |