aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/transam/clog.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/transam/clog.c')
-rw-r--r--src/backend/access/transam/clog.c243
1 files changed, 180 insertions, 63 deletions
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 34f079cbb14..a787b374dac 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -3,12 +3,13 @@
* clog.c
* PostgreSQL transaction-commit-log manager
*
- * This module replaces the old "pg_log" access code, which treated pg_log
- * essentially like a relation, in that it went through the regular buffer
- * manager. The problem with that was that there wasn't any good way to
- * recycle storage space for transactions so old that they'll never be
- * looked up again. Now we use specialized access code so that the commit
- * log can be broken into relatively small, independent segments.
+ * This module stores two bits per transaction regarding its commit/abort
+ * status; the status for four transactions fit in a byte.
+ *
+ * This would be a pretty simple abstraction on top of slru.c, except that
+ * for performance reasons we allow multiple transactions that are
+ * committing concurrently to form a queue, so that a single process can
+ * update the status for all of them within a single lock acquisition run.
*
* XLOG interactions: this module generates an XLOG record whenever a new
* CLOG page is initialized to zeroes. Other writes of CLOG come from
@@ -43,6 +44,7 @@
#include "pgstat.h"
#include "storage/proc.h"
#include "storage/sync.h"
+#include "utils/guc_hooks.h"
/*
* Defines for CLOG page sizes. A page is the same BLCKSZ as is used
@@ -62,6 +64,15 @@
#define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE)
#define CLOG_XACT_BITMASK ((1 << CLOG_BITS_PER_XACT) - 1)
+/*
+ * Because space used in CLOG by each transaction is so small, we place a
+ * smaller limit on the number of CLOG buffers than SLRU allows. No other
+ * SLRU needs this.
+ */
+#define CLOG_MAX_ALLOWED_BUFFERS \
+ Min(SLRU_MAX_ALLOWED_BUFFERS, \
+ (((MaxTransactionId / 2) + (CLOG_XACTS_PER_PAGE - 1)) / CLOG_XACTS_PER_PAGE))
+
/*
* Although we return an int64 the actual value can't currently exceed
@@ -284,15 +295,20 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
XLogRecPtr lsn, int64 pageno,
bool all_xact_same_page)
{
+ LWLock *lock;
+
/* Can't use group update when PGPROC overflows. */
StaticAssertDecl(THRESHOLD_SUBTRANS_CLOG_OPT <= PGPROC_MAX_CACHED_SUBXIDS,
"group clog threshold less than PGPROC cached subxids");
+ /* Get the SLRU bank lock for the page we are going to access. */
+ lock = SimpleLruGetBankLock(XactCtl, pageno);
+
/*
- * When there is contention on XactSLRULock, we try to group multiple
- * updates; a single leader process will perform transaction status
- * updates for multiple backends so that the number of times XactSLRULock
- * needs to be acquired is reduced.
+ * When there is contention on the SLRU bank lock we need, we try to group
+ * multiple updates; a single leader process will perform transaction
+ * status updates for multiple backends so that the number of times the
+ * bank lock needs to be acquired is reduced.
*
* For this optimization to be safe, the XID and subxids in MyProc must be
* the same as the ones for which we're setting the status. Check that
@@ -310,17 +326,17 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
nsubxids * sizeof(TransactionId)) == 0))
{
/*
- * If we can immediately acquire XactSLRULock, we update the status of
- * our own XID and release the lock. If not, try use group XID
- * update. If that doesn't work out, fall back to waiting for the
- * lock to perform an update for this transaction only.
+ * If we can immediately acquire the lock, we update the status of our
+ * own XID and release the lock. If not, try use group XID update. If
+ * that doesn't work out, fall back to waiting for the lock to perform
+ * an update for this transaction only.
*/
- if (LWLockConditionalAcquire(XactSLRULock, LW_EXCLUSIVE))
+ if (LWLockConditionalAcquire(lock, LW_EXCLUSIVE))
{
/* Got the lock without waiting! Do the update. */
TransactionIdSetPageStatusInternal(xid, nsubxids, subxids, status,
lsn, pageno);
- LWLockRelease(XactSLRULock);
+ LWLockRelease(lock);
return;
}
else if (TransactionGroupUpdateXidStatus(xid, status, lsn, pageno))
@@ -333,10 +349,10 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
}
/* Group update not applicable, or couldn't accept this page number. */
- LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
+ LWLockAcquire(lock, LW_EXCLUSIVE);
TransactionIdSetPageStatusInternal(xid, nsubxids, subxids, status,
lsn, pageno);
- LWLockRelease(XactSLRULock);
+ LWLockRelease(lock);
}
/*
@@ -355,7 +371,8 @@ TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids,
Assert(status == TRANSACTION_STATUS_COMMITTED ||
status == TRANSACTION_STATUS_ABORTED ||
(status == TRANSACTION_STATUS_SUB_COMMITTED && !TransactionIdIsValid(xid)));
- Assert(LWLockHeldByMeInMode(XactSLRULock, LW_EXCLUSIVE));
+ Assert(LWLockHeldByMeInMode(SimpleLruGetBankLock(XactCtl, pageno),
+ LW_EXCLUSIVE));
/*
* If we're doing an async commit (ie, lsn is valid), then we must wait
@@ -406,14 +423,15 @@ TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids,
}
/*
- * When we cannot immediately acquire XactSLRULock in exclusive mode at
+ * Subroutine for TransactionIdSetPageStatus, q.v.
+ *
+ * When we cannot immediately acquire the SLRU bank lock in exclusive mode at
* commit time, add ourselves to a list of processes that need their XIDs
* status update. The first process to add itself to the list will acquire
- * XactSLRULock in exclusive mode and set transaction status as required
- * on behalf of all group members. This avoids a great deal of contention
- * around XactSLRULock when many processes are trying to commit at once,
- * since the lock need not be repeatedly handed off from one committing
- * process to the next.
+ * the lock in exclusive mode and set transaction status as required on behalf
+ * of all group members. This avoids a great deal of contention when many
+ * processes are trying to commit at once, since the lock need not be
+ * repeatedly handed off from one committing process to the next.
*
* Returns true when transaction status has been updated in clog; returns
* false if we decided against applying the optimization because the page
@@ -425,16 +443,17 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
{
volatile PROC_HDR *procglobal = ProcGlobal;
PGPROC *proc = MyProc;
- int pgprocno = MyProcNumber;
uint32 nextidx;
uint32 wakeidx;
+ int prevpageno;
+ LWLock *prevlock = NULL;
/* We should definitely have an XID whose status needs to be updated. */
Assert(TransactionIdIsValid(xid));
/*
- * Add ourselves to the list of processes needing a group XID status
- * update.
+ * Prepare to add ourselves to the list of processes needing a group XID
+ * status update.
*/
proc->clogGroupMember = true;
proc->clogGroupMemberXid = xid;
@@ -442,6 +461,29 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
proc->clogGroupMemberPage = pageno;
proc->clogGroupMemberLsn = lsn;
+ /*
+ * We put ourselves in the queue by writing MyProcNumber to
+ * ProcGlobal->clogGroupFirst. However, if there's already a process
+ * listed there, we compare our pageno with that of that process; if it
+ * differs, we cannot participate in the group, so we return for caller to
+ * update pg_xact in the normal way.
+ *
+ * If we're not the first process in the list, we must follow the leader.
+ * We do this by storing the data we want updated in our PGPROC entry
+ * where the leader can find it, then going to sleep.
+ *
+ * If no process is already in the list, we're the leader; our first step
+ * is to lock the SLRU bank to which our page belongs, then we close out
+ * the group by resetting the list pointer from ProcGlobal->clogGroupFirst
+ * (this lets other processes set up other groups later); finally we do
+ * the SLRU updates, release the SLRU bank lock, and wake up the sleeping
+ * processes.
+ *
+ * If another group starts to update a page in a different SLRU bank, they
+ * can proceed concurrently, since the bank lock they're going to use is
+ * different from ours. If another group starts to update a page in the
+ * same bank as ours, they wait until we release the lock.
+ */
nextidx = pg_atomic_read_u32(&procglobal->clogGroupFirst);
while (true)
@@ -453,10 +495,11 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
* There is a race condition here, which is that after doing the below
* check and before adding this proc's clog update to a group, the
* group leader might have already finished the group update for this
- * page and becomes group leader of another group. This will lead to a
- * situation where a single group can have different clog page
- * updates. This isn't likely and will still work, just maybe a bit
- * less efficiently.
+ * page and becomes group leader of another group, updating a
+ * different page. This will lead to a situation where a single group
+ * can have different clog page updates. This isn't likely and will
+ * still work, just less efficiently -- we handle this case by
+ * switching to a different bank lock in the loop below.
*/
if (nextidx != INVALID_PGPROCNO &&
GetPGProcByNumber(nextidx)->clogGroupMemberPage != proc->clogGroupMemberPage)
@@ -474,7 +517,7 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
if (pg_atomic_compare_exchange_u32(&procglobal->clogGroupFirst,
&nextidx,
- (uint32) pgprocno))
+ (uint32) MyProcNumber))
break;
}
@@ -508,13 +551,21 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
return true;
}
- /* We are the leader. Acquire the lock on behalf of everyone. */
- LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
+ /*
+ * By here, we know we're the leader process. Acquire the SLRU bank lock
+ * that corresponds to the page we originally wanted to modify.
+ */
+ prevpageno = proc->clogGroupMemberPage;
+ prevlock = SimpleLruGetBankLock(XactCtl, prevpageno);
+ LWLockAcquire(prevlock, LW_EXCLUSIVE);
/*
* Now that we've got the lock, clear the list of processes waiting for
* group XID status update, saving a pointer to the head of the list.
- * Trying to pop elements one at a time could lead to an ABA problem.
+ * (Trying to pop elements one at a time could lead to an ABA problem.)
+ *
+ * At this point, any processes trying to do this would create a separate
+ * group.
*/
nextidx = pg_atomic_exchange_u32(&procglobal->clogGroupFirst,
INVALID_PGPROCNO);
@@ -526,6 +577,31 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
while (nextidx != INVALID_PGPROCNO)
{
PGPROC *nextproc = &ProcGlobal->allProcs[nextidx];
+ int thispageno = nextproc->clogGroupMemberPage;
+
+ /*
+ * If the page to update belongs to a different bank than the previous
+ * one, exchange bank lock to the new one. This should be quite rare,
+ * as described above.
+ *
+ * (We could try to optimize this by waking up the processes for which
+ * we have already updated the status while we exchange the lock, but
+ * the code doesn't do that at present. I think it'd require
+ * additional bookkeeping, making the common path slower in order to
+ * improve an infrequent case.)
+ */
+ if (thispageno != prevpageno)
+ {
+ LWLock *lock = SimpleLruGetBankLock(XactCtl, thispageno);
+
+ if (prevlock != lock)
+ {
+ LWLockRelease(prevlock);
+ LWLockAcquire(lock, LW_EXCLUSIVE);
+ }
+ prevlock = lock;
+ prevpageno = thispageno;
+ }
/*
* Transactions with more than THRESHOLD_SUBTRANS_CLOG_OPT sub-XIDs
@@ -545,12 +621,17 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
}
/* We're done with the lock now. */
- LWLockRelease(XactSLRULock);
+ if (prevlock != NULL)
+ LWLockRelease(prevlock);
/*
* Now that we've released the lock, go back and wake everybody up. We
* don't do this under the lock so as to keep lock hold times to a
* minimum.
+ *
+ * (Perhaps we could do this in two passes, the first setting
+ * clogGroupNext to invalid while saving the semaphores to an array, then
+ * a single write barrier, then another pass unlocking the semaphores.)
*/
while (wakeidx != INVALID_PGPROCNO)
{
@@ -574,7 +655,7 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status,
/*
* Sets the commit status of a single transaction.
*
- * Must be called with XactSLRULock held
+ * Caller must hold the corresponding SLRU bank lock, will be held at exit.
*/
static void
TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, int slotno)
@@ -585,6 +666,11 @@ TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, i
char byteval;
char curval;
+ Assert(XactCtl->shared->page_number[slotno] == TransactionIdToPage(xid));
+ Assert(LWLockHeldByMeInMode(SimpleLruGetBankLock(XactCtl,
+ XactCtl->shared->page_number[slotno]),
+ LW_EXCLUSIVE));
+
byteptr = XactCtl->shared->page_buffer[slotno] + byteno;
curval = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
@@ -666,7 +752,7 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
lsnindex = GetLSNIndex(slotno, xid);
*lsn = XactCtl->shared->group_lsn[lsnindex];
- LWLockRelease(XactSLRULock);
+ LWLockRelease(SimpleLruGetBankLock(XactCtl, pageno));
return status;
}
@@ -674,23 +760,18 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
/*
* Number of shared CLOG buffers.
*
- * On larger multi-processor systems, it is possible to have many CLOG page
- * requests in flight at one time which could lead to disk access for CLOG
- * page if the required page is not found in memory. Testing revealed that we
- * can get the best performance by having 128 CLOG buffers, more than that it
- * doesn't improve performance.
- *
- * Unconditionally keeping the number of CLOG buffers to 128 did not seem like
- * a good idea, because it would increase the minimum amount of shared memory
- * required to start, which could be a problem for people running very small
- * configurations. The following formula seems to represent a reasonable
- * compromise: people with very low values for shared_buffers will get fewer
- * CLOG buffers as well, and everyone else will get 128.
+ * If asked to autotune, use 2MB for every 1GB of shared buffers, up to 8MB.
+ * Otherwise just cap the configured amount to be between 16 and the maximum
+ * allowed.
*/
-Size
+static int
CLOGShmemBuffers(void)
{
- return Min(128, Max(4, NBuffers / 512));
+ /* auto-tune based on shared buffers */
+ if (transaction_buffers == 0)
+ return SimpleLruAutotuneBuffers(512, 1024);
+
+ return Min(Max(16, transaction_buffers), CLOG_MAX_ALLOWED_BUFFERS);
}
/*
@@ -705,14 +786,44 @@ CLOGShmemSize(void)
void
CLOGShmemInit(void)
{
+ /* If auto-tuning is requested, now is the time to do it */
+ if (transaction_buffers == 0)
+ {
+ char buf[32];
+
+ snprintf(buf, sizeof(buf), "%d", CLOGShmemBuffers());
+ SetConfigOption("transaction_buffers", buf, PGC_POSTMASTER,
+ PGC_S_DYNAMIC_DEFAULT);
+
+ /*
+ * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT.
+ * However, if the DBA explicitly set transaction_buffers = 0 in the
+ * config file, then PGC_S_DYNAMIC_DEFAULT will fail to override that
+ * and we must force the matter with PGC_S_OVERRIDE.
+ */
+ if (transaction_buffers == 0) /* failed to apply it? */
+ SetConfigOption("transaction_buffers", buf, PGC_POSTMASTER,
+ PGC_S_OVERRIDE);
+ }
+ Assert(transaction_buffers != 0);
+
XactCtl->PagePrecedes = CLOGPagePrecedes;
SimpleLruInit(XactCtl, "transaction", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE,
- XactSLRULock, "pg_xact", LWTRANCHE_XACT_BUFFER,
- SYNC_HANDLER_CLOG, false);
+ "pg_xact", LWTRANCHE_XACT_BUFFER,
+ LWTRANCHE_XACT_SLRU, SYNC_HANDLER_CLOG, false);
SlruPagePrecedesUnitTests(XactCtl, CLOG_XACTS_PER_PAGE);
}
/*
+ * GUC check_hook for transaction_buffers
+ */
+bool
+check_transaction_buffers(int *newval, void **extra, GucSource source)
+{
+ return check_slru_buffers("transaction_buffers", newval);
+}
+
+/*
* This func must be called ONCE on system install. It creates
* the initial CLOG segment. (The CLOG directory is assumed to
* have been created by initdb, and CLOGShmemInit must have been
@@ -722,8 +833,9 @@ void
BootStrapCLOG(void)
{
int slotno;
+ LWLock *lock = SimpleLruGetBankLock(XactCtl, 0);
- LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
+ LWLockAcquire(lock, LW_EXCLUSIVE);
/* Create and zero the first page of the commit log */
slotno = ZeroCLOGPage(0, false);
@@ -732,7 +844,7 @@ BootStrapCLOG(void)
SimpleLruWritePage(XactCtl, slotno);
Assert(!XactCtl->shared->page_dirty[slotno]);
- LWLockRelease(XactSLRULock);
+ LWLockRelease(lock);
}
/*
@@ -781,8 +893,9 @@ TrimCLOG(void)
{
TransactionId xid = XidFromFullTransactionId(TransamVariables->nextXid);
int64 pageno = TransactionIdToPage(xid);
+ LWLock *lock = SimpleLruGetBankLock(XactCtl, pageno);
- LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
+ LWLockAcquire(lock, LW_EXCLUSIVE);
/*
* Zero out the remainder of the current clog page. Under normal
@@ -814,7 +927,7 @@ TrimCLOG(void)
XactCtl->shared->page_dirty[slotno] = true;
}
- LWLockRelease(XactSLRULock);
+ LWLockRelease(lock);
}
/*
@@ -846,6 +959,7 @@ void
ExtendCLOG(TransactionId newestXact)
{
int64 pageno;
+ LWLock *lock;
/*
* No work except at first XID of a page. But beware: just after
@@ -856,13 +970,14 @@ ExtendCLOG(TransactionId newestXact)
return;
pageno = TransactionIdToPage(newestXact);
+ lock = SimpleLruGetBankLock(XactCtl, pageno);
- LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
+ LWLockAcquire(lock, LW_EXCLUSIVE);
/* Zero the page and make an XLOG entry about it */
ZeroCLOGPage(pageno, true);
- LWLockRelease(XactSLRULock);
+ LWLockRelease(lock);
}
@@ -1000,16 +1115,18 @@ clog_redo(XLogReaderState *record)
{
int64 pageno;
int slotno;
+ LWLock *lock;
memcpy(&pageno, XLogRecGetData(record), sizeof(pageno));
- LWLockAcquire(XactSLRULock, LW_EXCLUSIVE);
+ lock = SimpleLruGetBankLock(XactCtl, pageno);
+ LWLockAcquire(lock, LW_EXCLUSIVE);
slotno = ZeroCLOGPage(pageno, false);
SimpleLruWritePage(XactCtl, slotno);
Assert(!XactCtl->shared->page_dirty[slotno]);
- LWLockRelease(XactSLRULock);
+ LWLockRelease(lock);
}
else if (info == CLOG_TRUNCATE)
{