aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/transam
diff options
context:
space:
mode:
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>2012-01-30 16:40:58 +0200
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>2012-01-30 16:53:48 +0200
commit9b38d46d9f5517dab67dda1dd0459683fc9cda9f (patch)
treef305cd2fc3d24b8ac487a45583946d4ec709bd13 /src/backend/access/transam
parentba1868ba3138b2119f8290969b9a3936fbc297ce (diff)
downloadpostgresql-9b38d46d9f5517dab67dda1dd0459683fc9cda9f.tar.gz
postgresql-9b38d46d9f5517dab67dda1dd0459683fc9cda9f.zip
Make group commit more effective.
When a backend needs to flush the WAL, and someone else is already flushing the WAL, wait until it releases the WALInsertLock and check if we still need to do the flush or if the other backend already did the work for us, before acquiring WALInsertLock. This helps group commit, because when the WAL flush finishes, all the backends that were waiting for it can be woken up in one go, and the can all concurrently observe that they're done, rather than waking them up one by one in a cascading fashion. This is based on a new LWLock function, LWLockWaitUntilFree(), which has peculiar semantics. If the lock is immediately free, it grabs the lock and returns true. If it's not free, it waits until it is released, but then returns false without grabbing the lock. This is used in XLogFlush(), so that when the lock is acquired, the backend flushes the WAL, but if it's not, the backend first checks the current flush location before retrying. Original patch and benchmarking by Peter Geoghegan and Simon Riggs, although this patch as committed ended up being very different from that.
Diffstat (limited to 'src/backend/access/transam')
-rw-r--r--src/backend/access/transam/twophase.c2
-rw-r--r--src/backend/access/transam/xlog.c36
2 files changed, 30 insertions, 8 deletions
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 69af75c6b64..6e84cd0a216 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -327,7 +327,7 @@ MarkAsPreparing(TransactionId xid, const char *gid,
proc->databaseId = databaseid;
proc->roleId = owner;
proc->lwWaiting = false;
- proc->lwExclusive = false;
+ proc->lwWaitMode = 0;
proc->lwWaitLink = NULL;
proc->waitLock = NULL;
proc->waitProcLock = NULL;
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 4b273a8318f..cce87a3cd30 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2118,23 +2118,43 @@ XLogFlush(XLogRecPtr record)
/* initialize to given target; may increase below */
WriteRqstPtr = record;
- /* read LogwrtResult and update local state */
+ /*
+ * Now wait until we get the write lock, or someone else does the
+ * flush for us.
+ */
+ for (;;)
{
/* use volatile pointer to prevent code rearrangement */
volatile XLogCtlData *xlogctl = XLogCtl;
+ /* read LogwrtResult and update local state */
SpinLockAcquire(&xlogctl->info_lck);
if (XLByteLT(WriteRqstPtr, xlogctl->LogwrtRqst.Write))
WriteRqstPtr = xlogctl->LogwrtRqst.Write;
LogwrtResult = xlogctl->LogwrtResult;
SpinLockRelease(&xlogctl->info_lck);
- }
- /* done already? */
- if (!XLByteLE(record, LogwrtResult.Flush))
- {
- /* now wait for the write lock */
- LWLockAcquire(WALWriteLock, LW_EXCLUSIVE);
+ /* done already? */
+ if (XLByteLE(record, LogwrtResult.Flush))
+ break;
+
+ /*
+ * Try to get the write lock. If we can't get it immediately, wait
+ * until it's released, and recheck if we still need to do the flush
+ * or if the backend that held the lock did it for us already. This
+ * helps to maintain a good rate of group committing when the system
+ * is bottlenecked by the speed of fsyncing.
+ */
+ if (!LWLockWaitUntilFree(WALWriteLock, LW_EXCLUSIVE))
+ {
+ /*
+ * The lock is now free, but we didn't acquire it yet. Before we
+ * do, loop back to check if someone else flushed the record for
+ * us already.
+ */
+ continue;
+ }
+ /* Got the lock */
LogwrtResult = XLogCtl->Write.LogwrtResult;
if (!XLByteLE(record, LogwrtResult.Flush))
{
@@ -2163,6 +2183,8 @@ XLogFlush(XLogRecPtr record)
XLogWrite(WriteRqst, false, false);
}
LWLockRelease(WALWriteLock);
+ /* done */
+ break;
}
END_CRIT_SECTION();