diff options
Diffstat (limited to 'src/backend/access/transam/clog.c')
-rw-r--r-- | src/backend/access/transam/clog.c | 243 |
1 files changed, 180 insertions, 63 deletions
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 34f079cbb14..a787b374dac 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -3,12 +3,13 @@ * clog.c * PostgreSQL transaction-commit-log manager * - * This module replaces the old "pg_log" access code, which treated pg_log - * essentially like a relation, in that it went through the regular buffer - * manager. The problem with that was that there wasn't any good way to - * recycle storage space for transactions so old that they'll never be - * looked up again. Now we use specialized access code so that the commit - * log can be broken into relatively small, independent segments. + * This module stores two bits per transaction regarding its commit/abort + * status; the status for four transactions fit in a byte. + * + * This would be a pretty simple abstraction on top of slru.c, except that + * for performance reasons we allow multiple transactions that are + * committing concurrently to form a queue, so that a single process can + * update the status for all of them within a single lock acquisition run. * * XLOG interactions: this module generates an XLOG record whenever a new * CLOG page is initialized to zeroes. Other writes of CLOG come from @@ -43,6 +44,7 @@ #include "pgstat.h" #include "storage/proc.h" #include "storage/sync.h" +#include "utils/guc_hooks.h" /* * Defines for CLOG page sizes. A page is the same BLCKSZ as is used @@ -62,6 +64,15 @@ #define CLOG_XACTS_PER_PAGE (BLCKSZ * CLOG_XACTS_PER_BYTE) #define CLOG_XACT_BITMASK ((1 << CLOG_BITS_PER_XACT) - 1) +/* + * Because space used in CLOG by each transaction is so small, we place a + * smaller limit on the number of CLOG buffers than SLRU allows. No other + * SLRU needs this. + */ +#define CLOG_MAX_ALLOWED_BUFFERS \ + Min(SLRU_MAX_ALLOWED_BUFFERS, \ + (((MaxTransactionId / 2) + (CLOG_XACTS_PER_PAGE - 1)) / CLOG_XACTS_PER_PAGE)) + /* * Although we return an int64 the actual value can't currently exceed @@ -284,15 +295,20 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids, XLogRecPtr lsn, int64 pageno, bool all_xact_same_page) { + LWLock *lock; + /* Can't use group update when PGPROC overflows. */ StaticAssertDecl(THRESHOLD_SUBTRANS_CLOG_OPT <= PGPROC_MAX_CACHED_SUBXIDS, "group clog threshold less than PGPROC cached subxids"); + /* Get the SLRU bank lock for the page we are going to access. */ + lock = SimpleLruGetBankLock(XactCtl, pageno); + /* - * When there is contention on XactSLRULock, we try to group multiple - * updates; a single leader process will perform transaction status - * updates for multiple backends so that the number of times XactSLRULock - * needs to be acquired is reduced. + * When there is contention on the SLRU bank lock we need, we try to group + * multiple updates; a single leader process will perform transaction + * status updates for multiple backends so that the number of times the + * bank lock needs to be acquired is reduced. * * For this optimization to be safe, the XID and subxids in MyProc must be * the same as the ones for which we're setting the status. Check that @@ -310,17 +326,17 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids, nsubxids * sizeof(TransactionId)) == 0)) { /* - * If we can immediately acquire XactSLRULock, we update the status of - * our own XID and release the lock. If not, try use group XID - * update. If that doesn't work out, fall back to waiting for the - * lock to perform an update for this transaction only. + * If we can immediately acquire the lock, we update the status of our + * own XID and release the lock. If not, try use group XID update. If + * that doesn't work out, fall back to waiting for the lock to perform + * an update for this transaction only. */ - if (LWLockConditionalAcquire(XactSLRULock, LW_EXCLUSIVE)) + if (LWLockConditionalAcquire(lock, LW_EXCLUSIVE)) { /* Got the lock without waiting! Do the update. */ TransactionIdSetPageStatusInternal(xid, nsubxids, subxids, status, lsn, pageno); - LWLockRelease(XactSLRULock); + LWLockRelease(lock); return; } else if (TransactionGroupUpdateXidStatus(xid, status, lsn, pageno)) @@ -333,10 +349,10 @@ TransactionIdSetPageStatus(TransactionId xid, int nsubxids, } /* Group update not applicable, or couldn't accept this page number. */ - LWLockAcquire(XactSLRULock, LW_EXCLUSIVE); + LWLockAcquire(lock, LW_EXCLUSIVE); TransactionIdSetPageStatusInternal(xid, nsubxids, subxids, status, lsn, pageno); - LWLockRelease(XactSLRULock); + LWLockRelease(lock); } /* @@ -355,7 +371,8 @@ TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids, Assert(status == TRANSACTION_STATUS_COMMITTED || status == TRANSACTION_STATUS_ABORTED || (status == TRANSACTION_STATUS_SUB_COMMITTED && !TransactionIdIsValid(xid))); - Assert(LWLockHeldByMeInMode(XactSLRULock, LW_EXCLUSIVE)); + Assert(LWLockHeldByMeInMode(SimpleLruGetBankLock(XactCtl, pageno), + LW_EXCLUSIVE)); /* * If we're doing an async commit (ie, lsn is valid), then we must wait @@ -406,14 +423,15 @@ TransactionIdSetPageStatusInternal(TransactionId xid, int nsubxids, } /* - * When we cannot immediately acquire XactSLRULock in exclusive mode at + * Subroutine for TransactionIdSetPageStatus, q.v. + * + * When we cannot immediately acquire the SLRU bank lock in exclusive mode at * commit time, add ourselves to a list of processes that need their XIDs * status update. The first process to add itself to the list will acquire - * XactSLRULock in exclusive mode and set transaction status as required - * on behalf of all group members. This avoids a great deal of contention - * around XactSLRULock when many processes are trying to commit at once, - * since the lock need not be repeatedly handed off from one committing - * process to the next. + * the lock in exclusive mode and set transaction status as required on behalf + * of all group members. This avoids a great deal of contention when many + * processes are trying to commit at once, since the lock need not be + * repeatedly handed off from one committing process to the next. * * Returns true when transaction status has been updated in clog; returns * false if we decided against applying the optimization because the page @@ -425,16 +443,17 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status, { volatile PROC_HDR *procglobal = ProcGlobal; PGPROC *proc = MyProc; - int pgprocno = MyProcNumber; uint32 nextidx; uint32 wakeidx; + int prevpageno; + LWLock *prevlock = NULL; /* We should definitely have an XID whose status needs to be updated. */ Assert(TransactionIdIsValid(xid)); /* - * Add ourselves to the list of processes needing a group XID status - * update. + * Prepare to add ourselves to the list of processes needing a group XID + * status update. */ proc->clogGroupMember = true; proc->clogGroupMemberXid = xid; @@ -442,6 +461,29 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status, proc->clogGroupMemberPage = pageno; proc->clogGroupMemberLsn = lsn; + /* + * We put ourselves in the queue by writing MyProcNumber to + * ProcGlobal->clogGroupFirst. However, if there's already a process + * listed there, we compare our pageno with that of that process; if it + * differs, we cannot participate in the group, so we return for caller to + * update pg_xact in the normal way. + * + * If we're not the first process in the list, we must follow the leader. + * We do this by storing the data we want updated in our PGPROC entry + * where the leader can find it, then going to sleep. + * + * If no process is already in the list, we're the leader; our first step + * is to lock the SLRU bank to which our page belongs, then we close out + * the group by resetting the list pointer from ProcGlobal->clogGroupFirst + * (this lets other processes set up other groups later); finally we do + * the SLRU updates, release the SLRU bank lock, and wake up the sleeping + * processes. + * + * If another group starts to update a page in a different SLRU bank, they + * can proceed concurrently, since the bank lock they're going to use is + * different from ours. If another group starts to update a page in the + * same bank as ours, they wait until we release the lock. + */ nextidx = pg_atomic_read_u32(&procglobal->clogGroupFirst); while (true) @@ -453,10 +495,11 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status, * There is a race condition here, which is that after doing the below * check and before adding this proc's clog update to a group, the * group leader might have already finished the group update for this - * page and becomes group leader of another group. This will lead to a - * situation where a single group can have different clog page - * updates. This isn't likely and will still work, just maybe a bit - * less efficiently. + * page and becomes group leader of another group, updating a + * different page. This will lead to a situation where a single group + * can have different clog page updates. This isn't likely and will + * still work, just less efficiently -- we handle this case by + * switching to a different bank lock in the loop below. */ if (nextidx != INVALID_PGPROCNO && GetPGProcByNumber(nextidx)->clogGroupMemberPage != proc->clogGroupMemberPage) @@ -474,7 +517,7 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status, if (pg_atomic_compare_exchange_u32(&procglobal->clogGroupFirst, &nextidx, - (uint32) pgprocno)) + (uint32) MyProcNumber)) break; } @@ -508,13 +551,21 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status, return true; } - /* We are the leader. Acquire the lock on behalf of everyone. */ - LWLockAcquire(XactSLRULock, LW_EXCLUSIVE); + /* + * By here, we know we're the leader process. Acquire the SLRU bank lock + * that corresponds to the page we originally wanted to modify. + */ + prevpageno = proc->clogGroupMemberPage; + prevlock = SimpleLruGetBankLock(XactCtl, prevpageno); + LWLockAcquire(prevlock, LW_EXCLUSIVE); /* * Now that we've got the lock, clear the list of processes waiting for * group XID status update, saving a pointer to the head of the list. - * Trying to pop elements one at a time could lead to an ABA problem. + * (Trying to pop elements one at a time could lead to an ABA problem.) + * + * At this point, any processes trying to do this would create a separate + * group. */ nextidx = pg_atomic_exchange_u32(&procglobal->clogGroupFirst, INVALID_PGPROCNO); @@ -526,6 +577,31 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status, while (nextidx != INVALID_PGPROCNO) { PGPROC *nextproc = &ProcGlobal->allProcs[nextidx]; + int thispageno = nextproc->clogGroupMemberPage; + + /* + * If the page to update belongs to a different bank than the previous + * one, exchange bank lock to the new one. This should be quite rare, + * as described above. + * + * (We could try to optimize this by waking up the processes for which + * we have already updated the status while we exchange the lock, but + * the code doesn't do that at present. I think it'd require + * additional bookkeeping, making the common path slower in order to + * improve an infrequent case.) + */ + if (thispageno != prevpageno) + { + LWLock *lock = SimpleLruGetBankLock(XactCtl, thispageno); + + if (prevlock != lock) + { + LWLockRelease(prevlock); + LWLockAcquire(lock, LW_EXCLUSIVE); + } + prevlock = lock; + prevpageno = thispageno; + } /* * Transactions with more than THRESHOLD_SUBTRANS_CLOG_OPT sub-XIDs @@ -545,12 +621,17 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status, } /* We're done with the lock now. */ - LWLockRelease(XactSLRULock); + if (prevlock != NULL) + LWLockRelease(prevlock); /* * Now that we've released the lock, go back and wake everybody up. We * don't do this under the lock so as to keep lock hold times to a * minimum. + * + * (Perhaps we could do this in two passes, the first setting + * clogGroupNext to invalid while saving the semaphores to an array, then + * a single write barrier, then another pass unlocking the semaphores.) */ while (wakeidx != INVALID_PGPROCNO) { @@ -574,7 +655,7 @@ TransactionGroupUpdateXidStatus(TransactionId xid, XidStatus status, /* * Sets the commit status of a single transaction. * - * Must be called with XactSLRULock held + * Caller must hold the corresponding SLRU bank lock, will be held at exit. */ static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, int slotno) @@ -585,6 +666,11 @@ TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, i char byteval; char curval; + Assert(XactCtl->shared->page_number[slotno] == TransactionIdToPage(xid)); + Assert(LWLockHeldByMeInMode(SimpleLruGetBankLock(XactCtl, + XactCtl->shared->page_number[slotno]), + LW_EXCLUSIVE)); + byteptr = XactCtl->shared->page_buffer[slotno] + byteno; curval = (*byteptr >> bshift) & CLOG_XACT_BITMASK; @@ -666,7 +752,7 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn) lsnindex = GetLSNIndex(slotno, xid); *lsn = XactCtl->shared->group_lsn[lsnindex]; - LWLockRelease(XactSLRULock); + LWLockRelease(SimpleLruGetBankLock(XactCtl, pageno)); return status; } @@ -674,23 +760,18 @@ TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn) /* * Number of shared CLOG buffers. * - * On larger multi-processor systems, it is possible to have many CLOG page - * requests in flight at one time which could lead to disk access for CLOG - * page if the required page is not found in memory. Testing revealed that we - * can get the best performance by having 128 CLOG buffers, more than that it - * doesn't improve performance. - * - * Unconditionally keeping the number of CLOG buffers to 128 did not seem like - * a good idea, because it would increase the minimum amount of shared memory - * required to start, which could be a problem for people running very small - * configurations. The following formula seems to represent a reasonable - * compromise: people with very low values for shared_buffers will get fewer - * CLOG buffers as well, and everyone else will get 128. + * If asked to autotune, use 2MB for every 1GB of shared buffers, up to 8MB. + * Otherwise just cap the configured amount to be between 16 and the maximum + * allowed. */ -Size +static int CLOGShmemBuffers(void) { - return Min(128, Max(4, NBuffers / 512)); + /* auto-tune based on shared buffers */ + if (transaction_buffers == 0) + return SimpleLruAutotuneBuffers(512, 1024); + + return Min(Max(16, transaction_buffers), CLOG_MAX_ALLOWED_BUFFERS); } /* @@ -705,14 +786,44 @@ CLOGShmemSize(void) void CLOGShmemInit(void) { + /* If auto-tuning is requested, now is the time to do it */ + if (transaction_buffers == 0) + { + char buf[32]; + + snprintf(buf, sizeof(buf), "%d", CLOGShmemBuffers()); + SetConfigOption("transaction_buffers", buf, PGC_POSTMASTER, + PGC_S_DYNAMIC_DEFAULT); + + /* + * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT. + * However, if the DBA explicitly set transaction_buffers = 0 in the + * config file, then PGC_S_DYNAMIC_DEFAULT will fail to override that + * and we must force the matter with PGC_S_OVERRIDE. + */ + if (transaction_buffers == 0) /* failed to apply it? */ + SetConfigOption("transaction_buffers", buf, PGC_POSTMASTER, + PGC_S_OVERRIDE); + } + Assert(transaction_buffers != 0); + XactCtl->PagePrecedes = CLOGPagePrecedes; SimpleLruInit(XactCtl, "transaction", CLOGShmemBuffers(), CLOG_LSNS_PER_PAGE, - XactSLRULock, "pg_xact", LWTRANCHE_XACT_BUFFER, - SYNC_HANDLER_CLOG, false); + "pg_xact", LWTRANCHE_XACT_BUFFER, + LWTRANCHE_XACT_SLRU, SYNC_HANDLER_CLOG, false); SlruPagePrecedesUnitTests(XactCtl, CLOG_XACTS_PER_PAGE); } /* + * GUC check_hook for transaction_buffers + */ +bool +check_transaction_buffers(int *newval, void **extra, GucSource source) +{ + return check_slru_buffers("transaction_buffers", newval); +} + +/* * This func must be called ONCE on system install. It creates * the initial CLOG segment. (The CLOG directory is assumed to * have been created by initdb, and CLOGShmemInit must have been @@ -722,8 +833,9 @@ void BootStrapCLOG(void) { int slotno; + LWLock *lock = SimpleLruGetBankLock(XactCtl, 0); - LWLockAcquire(XactSLRULock, LW_EXCLUSIVE); + LWLockAcquire(lock, LW_EXCLUSIVE); /* Create and zero the first page of the commit log */ slotno = ZeroCLOGPage(0, false); @@ -732,7 +844,7 @@ BootStrapCLOG(void) SimpleLruWritePage(XactCtl, slotno); Assert(!XactCtl->shared->page_dirty[slotno]); - LWLockRelease(XactSLRULock); + LWLockRelease(lock); } /* @@ -781,8 +893,9 @@ TrimCLOG(void) { TransactionId xid = XidFromFullTransactionId(TransamVariables->nextXid); int64 pageno = TransactionIdToPage(xid); + LWLock *lock = SimpleLruGetBankLock(XactCtl, pageno); - LWLockAcquire(XactSLRULock, LW_EXCLUSIVE); + LWLockAcquire(lock, LW_EXCLUSIVE); /* * Zero out the remainder of the current clog page. Under normal @@ -814,7 +927,7 @@ TrimCLOG(void) XactCtl->shared->page_dirty[slotno] = true; } - LWLockRelease(XactSLRULock); + LWLockRelease(lock); } /* @@ -846,6 +959,7 @@ void ExtendCLOG(TransactionId newestXact) { int64 pageno; + LWLock *lock; /* * No work except at first XID of a page. But beware: just after @@ -856,13 +970,14 @@ ExtendCLOG(TransactionId newestXact) return; pageno = TransactionIdToPage(newestXact); + lock = SimpleLruGetBankLock(XactCtl, pageno); - LWLockAcquire(XactSLRULock, LW_EXCLUSIVE); + LWLockAcquire(lock, LW_EXCLUSIVE); /* Zero the page and make an XLOG entry about it */ ZeroCLOGPage(pageno, true); - LWLockRelease(XactSLRULock); + LWLockRelease(lock); } @@ -1000,16 +1115,18 @@ clog_redo(XLogReaderState *record) { int64 pageno; int slotno; + LWLock *lock; memcpy(&pageno, XLogRecGetData(record), sizeof(pageno)); - LWLockAcquire(XactSLRULock, LW_EXCLUSIVE); + lock = SimpleLruGetBankLock(XactCtl, pageno); + LWLockAcquire(lock, LW_EXCLUSIVE); slotno = ZeroCLOGPage(pageno, false); SimpleLruWritePage(XactCtl, slotno); Assert(!XactCtl->shared->page_dirty[slotno]); - LWLockRelease(XactSLRULock); + LWLockRelease(lock); } else if (info == CLOG_TRUNCATE) { |