diff options
author | Robert Haas <rhaas@postgresql.org> | 2012-07-02 10:26:31 -0400 |
---|---|---|
committer | Robert Haas <rhaas@postgresql.org> | 2012-07-02 10:26:31 -0400 |
commit | f11e8be3e812cdbbc139c1b4e49141378b118dee (patch) | |
tree | 162f29144f66e3bd9de31556170b0ece9be4cae0 /src | |
parent | f83b59997d29f06c3d67e7eb9a1f2c9cd017d665 (diff) | |
download | postgresql-f11e8be3e812cdbbc139c1b4e49141378b118dee.tar.gz postgresql-f11e8be3e812cdbbc139c1b4e49141378b118dee.zip |
Make commit_delay much smarter.
Instead of letting every backend participating in a group commit wait
independently, have the first one that becomes ready to flush WAL wait
for the configured delay, and let all the others wait just long enough
for that first process to complete its flush. This greatly increases
the chances of being able to configure a commit_delay setting that
actually improves performance.
As a side consequence of this change, commit_delay now affects all WAL
flushes, rather than just commits. There was some discussion on
pgsql-hackers about whether to rename the GUC to, say, wal_flush_delay,
but in the absence of consensus I am leaving it alone for now.
Peter Geoghegan, with some changes, mostly to the documentation, by me.
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/access/transam/xact.c | 19 | ||||
-rw-r--r-- | src/backend/access/transam/xlog.c | 59 |
2 files changed, 38 insertions, 40 deletions
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 86b1afa80d9..49def6abbb6 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -68,9 +68,6 @@ bool XactDeferrable; int synchronous_commit = SYNCHRONOUS_COMMIT_ON; -int CommitDelay = 0; /* precommit delay in microseconds */ -int CommitSiblings = 5; /* # concurrent xacts needed to sleep */ - /* * MyXactAccessedTempRel is set when a temporary relation is accessed. * We don't allow PREPARE TRANSACTION in that case. (This is global @@ -1123,22 +1120,6 @@ RecordTransactionCommit(void) if ((wrote_xlog && synchronous_commit > SYNCHRONOUS_COMMIT_OFF) || forceSyncCommit || nrels > 0) { - /* - * Synchronous commit case: - * - * Sleep before flush! So we can flush more than one commit records - * per single fsync. (The idea is some other backend may do the - * XLogFlush while we're sleeping. This needs work still, because on - * most Unixen, the minimum select() delay is 10msec or more, which is - * way too long.) - * - * We do not sleep if enableFsync is not turned on, nor if there are - * fewer than CommitSiblings other backends with active transactions. - */ - if (CommitDelay > 0 && enableFsync && - MinimumActiveBackends(CommitSiblings)) - pg_usleep(CommitDelay); - XLogFlush(XactLastRecEnd); /* diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index a43e2eeaf30..6ee50d01d52 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -80,6 +80,8 @@ bool fullPageWrites = true; bool log_checkpoints = false; int sync_method = DEFAULT_SYNC_METHOD; int wal_level = WAL_LEVEL_MINIMAL; +int CommitDelay = 0; /* precommit delay in microseconds */ +int CommitSiblings = 5; /* # concurrent xacts needed to sleep */ #ifdef WAL_DEBUG bool XLOG_DEBUG = false; @@ -2098,34 +2100,49 @@ XLogFlush(XLogRecPtr record) */ continue; } - /* Got the lock */ + + /* Got the lock; recheck whether request is satisfied */ LogwrtResult = XLogCtl->LogwrtResult; - if (!XLByteLE(record, LogwrtResult.Flush)) + if (XLByteLE(record, LogwrtResult.Flush)) + break; + + /* + * Sleep before flush! By adding a delay here, we may give further + * backends the opportunity to join the backlog of group commit + * followers; this can significantly improve transaction throughput, at + * the risk of increasing transaction latency. + * + * We do not sleep if enableFsync is not turned on, nor if there are + * fewer than CommitSiblings other backends with active transactions. + */ + if (CommitDelay > 0 && enableFsync && + MinimumActiveBackends(CommitSiblings)) + pg_usleep(CommitDelay); + + /* try to write/flush later additions to XLOG as well */ + if (LWLockConditionalAcquire(WALInsertLock, LW_EXCLUSIVE)) { - /* try to write/flush later additions to XLOG as well */ - if (LWLockConditionalAcquire(WALInsertLock, LW_EXCLUSIVE)) - { - XLogCtlInsert *Insert = &XLogCtl->Insert; - uint32 freespace = INSERT_FREESPACE(Insert); + XLogCtlInsert *Insert = &XLogCtl->Insert; + uint32 freespace = INSERT_FREESPACE(Insert); - if (freespace == 0) /* buffer is full */ - WriteRqstPtr = XLogCtl->xlblocks[Insert->curridx]; - else - { - WriteRqstPtr = XLogCtl->xlblocks[Insert->curridx]; - WriteRqstPtr -= freespace; - } - LWLockRelease(WALInsertLock); - WriteRqst.Write = WriteRqstPtr; - WriteRqst.Flush = WriteRqstPtr; - } + if (freespace == 0) /* buffer is full */ + WriteRqstPtr = XLogCtl->xlblocks[Insert->curridx]; else { - WriteRqst.Write = WriteRqstPtr; - WriteRqst.Flush = record; + WriteRqstPtr = XLogCtl->xlblocks[Insert->curridx]; + WriteRqstPtr -= freespace; } - XLogWrite(WriteRqst, false, false); + LWLockRelease(WALInsertLock); + WriteRqst.Write = WriteRqstPtr; + WriteRqst.Flush = WriteRqstPtr; } + else + { + WriteRqst.Write = WriteRqstPtr; + WriteRqst.Flush = record; + } + XLogWrite(WriteRqst, false, false); + LWLockRelease(WALWriteLock); /* done */ break; |