diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/access/transam/xact.c | 19 | ||||
-rw-r--r-- | src/backend/access/transam/xlog.c | 59 |
2 files changed, 38 insertions, 40 deletions
diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 86b1afa80d9..49def6abbb6 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -68,9 +68,6 @@ bool XactDeferrable; int synchronous_commit = SYNCHRONOUS_COMMIT_ON; -int CommitDelay = 0; /* precommit delay in microseconds */ -int CommitSiblings = 5; /* # concurrent xacts needed to sleep */ - /* * MyXactAccessedTempRel is set when a temporary relation is accessed. * We don't allow PREPARE TRANSACTION in that case. (This is global @@ -1123,22 +1120,6 @@ RecordTransactionCommit(void) if ((wrote_xlog && synchronous_commit > SYNCHRONOUS_COMMIT_OFF) || forceSyncCommit || nrels > 0) { - /* - * Synchronous commit case: - * - * Sleep before flush! So we can flush more than one commit records - * per single fsync. (The idea is some other backend may do the - * XLogFlush while we're sleeping. This needs work still, because on - * most Unixen, the minimum select() delay is 10msec or more, which is - * way too long.) - * - * We do not sleep if enableFsync is not turned on, nor if there are - * fewer than CommitSiblings other backends with active transactions. - */ - if (CommitDelay > 0 && enableFsync && - MinimumActiveBackends(CommitSiblings)) - pg_usleep(CommitDelay); - XLogFlush(XactLastRecEnd); /* diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index a43e2eeaf30..6ee50d01d52 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -80,6 +80,8 @@ bool fullPageWrites = true; bool log_checkpoints = false; int sync_method = DEFAULT_SYNC_METHOD; int wal_level = WAL_LEVEL_MINIMAL; +int CommitDelay = 0; /* precommit delay in microseconds */ +int CommitSiblings = 5; /* # concurrent xacts needed to sleep */ #ifdef WAL_DEBUG bool XLOG_DEBUG = false; @@ -2098,34 +2100,49 @@ XLogFlush(XLogRecPtr record) */ continue; } - /* Got the lock */ + + /* Got the lock; recheck whether request is satisfied */ LogwrtResult = XLogCtl->LogwrtResult; - if (!XLByteLE(record, LogwrtResult.Flush)) + if (XLByteLE(record, LogwrtResult.Flush)) + break; + + /* + * Sleep before flush! By adding a delay here, we may give further + * backends the opportunity to join the backlog of group commit + * followers; this can significantly improve transaction throughput, at + * the risk of increasing transaction latency. + * + * We do not sleep if enableFsync is not turned on, nor if there are + * fewer than CommitSiblings other backends with active transactions. + */ + if (CommitDelay > 0 && enableFsync && + MinimumActiveBackends(CommitSiblings)) + pg_usleep(CommitDelay); + + /* try to write/flush later additions to XLOG as well */ + if (LWLockConditionalAcquire(WALInsertLock, LW_EXCLUSIVE)) { - /* try to write/flush later additions to XLOG as well */ - if (LWLockConditionalAcquire(WALInsertLock, LW_EXCLUSIVE)) - { - XLogCtlInsert *Insert = &XLogCtl->Insert; - uint32 freespace = INSERT_FREESPACE(Insert); + XLogCtlInsert *Insert = &XLogCtl->Insert; + uint32 freespace = INSERT_FREESPACE(Insert); - if (freespace == 0) /* buffer is full */ - WriteRqstPtr = XLogCtl->xlblocks[Insert->curridx]; - else - { - WriteRqstPtr = XLogCtl->xlblocks[Insert->curridx]; - WriteRqstPtr -= freespace; - } - LWLockRelease(WALInsertLock); - WriteRqst.Write = WriteRqstPtr; - WriteRqst.Flush = WriteRqstPtr; - } + if (freespace == 0) /* buffer is full */ + WriteRqstPtr = XLogCtl->xlblocks[Insert->curridx]; else { - WriteRqst.Write = WriteRqstPtr; - WriteRqst.Flush = record; + WriteRqstPtr = XLogCtl->xlblocks[Insert->curridx]; + WriteRqstPtr -= freespace; } - XLogWrite(WriteRqst, false, false); + LWLockRelease(WALInsertLock); + WriteRqst.Write = WriteRqstPtr; + WriteRqst.Flush = WriteRqstPtr; } + else + { + WriteRqst.Write = WriteRqstPtr; + WriteRqst.Flush = record; + } + XLogWrite(WriteRqst, false, false); + LWLockRelease(WALWriteLock); /* done */ break; |