From 53c2a97a92665be6bd7d70bd62ae6158fe4db96e Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Wed, 28 Feb 2024 17:05:31 +0100 Subject: Improve performance of subsystems on top of SLRU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit More precisely, what we do here is make the SLRU cache sizes configurable with new GUCs, so that sites with high concurrency and big ranges of transactions in flight (resp. multixacts/subtransactions) can benefit from bigger caches. In order for this to work with good performance, two additional changes are made: 1. the cache is divided in "banks" (to borrow terminology from CPU caches), and algorithms such as eviction buffer search only affect one specific bank. This forestalls the problem that linear searching for a specific buffer across the whole cache takes too long: we only have to search the specific bank, whose size is small. This work is authored by Andrey Borodin. 2. Change the locking regime for the SLRU banks, so that each bank uses a separate LWLock. This allows for increased scalability. This work is authored by Dilip Kumar. (A part of this was previously committed as d172b717c6f4.) Special care is taken so that the algorithms that can potentially traverse more than one bank release one bank's lock before acquiring the next. This should happen rarely, but particularly clog.c's group commit feature needed code adjustment to cope with this. I (Álvaro) also added lots of comments to make sure the design is sound. The new GUCs match the names introduced by bcdfa5f2e2f2 in the pg_stat_slru view. The default values for these parameters are similar to the previous sizes of each SLRU. commit_ts, clog and subtrans accept value 0, which means to adjust by dividing shared_buffers by 512 (so 2MB for every 1GB of shared_buffers), with a cap of 8MB. (A new slru.c function SimpleLruAutotuneBuffers() was added to support this.) The cap was previously 1MB for clog, so for sites with more than 512MB of shared memory the total memory used increases, which is likely a good tradeoff. However, other SLRUs (notably multixact ones) retain smaller sizes and don't support a configured value of 0. These values based on shared_buffers may need to be revisited, but that's an easy change. There was some resistance to adding these new GUCs: it would be better to adjust to memory pressure automatically somehow, for example by stealing memory from shared_buffers (where the caches can grow and shrink naturally). However, doing that seems to be a much larger project and one which has made virtually no progress in several years, and because this is such a pain point for so many users, here we take the pragmatic approach. Author: Andrey Borodin Author: Dilip Kumar Reviewed-by: Amul Sul, Gilles Darold, Anastasia Lubennikova, Ivan Lazarev, Robert Haas, Thomas Munro, Tomas Vondra, Yura Sokolov, Васильев Дмитрий (Dmitry Vasiliev). Discussion: https://postgr.es/m/2BEC2B3F-9B61-4C1D-9FB5-5FAB0F05EF86@yandex-team.ru Discussion: https://postgr.es/m/CAFiTN-vzDvNz=ExGXz6gdyjtzGixKSqs0mKHMmaQ8sOSEFZ33A@mail.gmail.com --- src/backend/access/transam/commit_ts.c | 88 ++++++++++++++++++++++++++-------- 1 file changed, 67 insertions(+), 21 deletions(-) (limited to 'src/backend/access/transam/commit_ts.c') diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index d965db89c75..5c35a18348c 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -33,6 +33,7 @@ #include "pg_trace.h" #include "storage/shmem.h" #include "utils/builtins.h" +#include "utils/guc_hooks.h" #include "utils/snapmgr.h" #include "utils/timestamp.h" @@ -225,10 +226,11 @@ SetXidCommitTsInPage(TransactionId xid, int nsubxids, TransactionId *subxids, TimestampTz ts, RepOriginId nodeid, int64 pageno) { + LWLock *lock = SimpleLruGetBankLock(CommitTsCtl, pageno); int slotno; int i; - LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE); + LWLockAcquire(lock, LW_EXCLUSIVE); slotno = SimpleLruReadPage(CommitTsCtl, pageno, true, xid); @@ -238,13 +240,13 @@ SetXidCommitTsInPage(TransactionId xid, int nsubxids, CommitTsCtl->shared->page_dirty[slotno] = true; - LWLockRelease(CommitTsSLRULock); + LWLockRelease(lock); } /* * Sets the commit timestamp of a single transaction. * - * Must be called with CommitTsSLRULock held + * Caller must hold the correct SLRU bank lock, will be held at exit */ static void TransactionIdSetCommitTs(TransactionId xid, TimestampTz ts, @@ -345,7 +347,7 @@ TransactionIdGetCommitTsData(TransactionId xid, TimestampTz *ts, if (nodeid) *nodeid = entry.nodeid; - LWLockRelease(CommitTsSLRULock); + LWLockRelease(SimpleLruGetBankLock(CommitTsCtl, pageno)); return *ts != 0; } @@ -499,14 +501,18 @@ pg_xact_commit_timestamp_origin(PG_FUNCTION_ARGS) /* * Number of shared CommitTS buffers. * - * We use a very similar logic as for the number of CLOG buffers (except we - * scale up twice as fast with shared buffers, and the maximum is twice as - * high); see comments in CLOGShmemBuffers. + * If asked to autotune, use 2MB for every 1GB of shared buffers, up to 8MB. + * Otherwise just cap the configured amount to be between 16 and the maximum + * allowed. */ -Size +static int CommitTsShmemBuffers(void) { - return Min(256, Max(4, NBuffers / 256)); + /* auto-tune based on shared buffers */ + if (commit_timestamp_buffers == 0) + return SimpleLruAutotuneBuffers(512, 1024); + + return Min(Max(16, commit_timestamp_buffers), SLRU_MAX_ALLOWED_BUFFERS); } /* @@ -528,10 +534,31 @@ CommitTsShmemInit(void) { bool found; + /* If auto-tuning is requested, now is the time to do it */ + if (commit_timestamp_buffers == 0) + { + char buf[32]; + + snprintf(buf, sizeof(buf), "%d", CommitTsShmemBuffers()); + SetConfigOption("commit_timestamp_buffers", buf, PGC_POSTMASTER, + PGC_S_DYNAMIC_DEFAULT); + + /* + * We prefer to report this value's source as PGC_S_DYNAMIC_DEFAULT. + * However, if the DBA explicitly set commit_timestamp_buffers = 0 in + * the config file, then PGC_S_DYNAMIC_DEFAULT will fail to override + * that and we must force the matter with PGC_S_OVERRIDE. + */ + if (commit_timestamp_buffers == 0) /* failed to apply it? */ + SetConfigOption("commit_timestamp_buffers", buf, PGC_POSTMASTER, + PGC_S_OVERRIDE); + } + Assert(commit_timestamp_buffers != 0); + CommitTsCtl->PagePrecedes = CommitTsPagePrecedes; SimpleLruInit(CommitTsCtl, "commit_timestamp", CommitTsShmemBuffers(), 0, - CommitTsSLRULock, "pg_commit_ts", - LWTRANCHE_COMMITTS_BUFFER, + "pg_commit_ts", LWTRANCHE_COMMITTS_BUFFER, + LWTRANCHE_COMMITTS_SLRU, SYNC_HANDLER_COMMIT_TS, false); SlruPagePrecedesUnitTests(CommitTsCtl, COMMIT_TS_XACTS_PER_PAGE); @@ -553,6 +580,15 @@ CommitTsShmemInit(void) Assert(found); } +/* + * GUC check_hook for commit_timestamp_buffers + */ +bool +check_commit_ts_buffers(int *newval, void **extra, GucSource source) +{ + return check_slru_buffers("commit_timestamp_buffers", newval); +} + /* * This function must be called ONCE on system install. * @@ -715,13 +751,14 @@ ActivateCommitTs(void) /* Create the current segment file, if necessary */ if (!SimpleLruDoesPhysicalPageExist(CommitTsCtl, pageno)) { + LWLock *lock = SimpleLruGetBankLock(CommitTsCtl, pageno); int slotno; - LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE); + LWLockAcquire(lock, LW_EXCLUSIVE); slotno = ZeroCommitTsPage(pageno, false); SimpleLruWritePage(CommitTsCtl, slotno); Assert(!CommitTsCtl->shared->page_dirty[slotno]); - LWLockRelease(CommitTsSLRULock); + LWLockRelease(lock); } /* Change the activation status in shared memory. */ @@ -760,8 +797,6 @@ DeactivateCommitTs(void) TransamVariables->oldestCommitTsXid = InvalidTransactionId; TransamVariables->newestCommitTsXid = InvalidTransactionId; - LWLockRelease(CommitTsLock); - /* * Remove *all* files. This is necessary so that there are no leftover * files; in the case where this feature is later enabled after running @@ -769,10 +804,16 @@ DeactivateCommitTs(void) * (We can probably tolerate out-of-sequence files, as they are going to * be overwritten anyway when we wrap around, but it seems better to be * tidy.) + * + * Note that we do this with CommitTsLock acquired in exclusive mode. This + * is very heavy-handed, but since this routine can only be called in the + * replica and should happen very rarely, we don't worry too much about + * it. Note also that no process should be consulting this SLRU if we + * have just deactivated it. */ - LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE); (void) SlruScanDirectory(CommitTsCtl, SlruScanDirCbDeleteAll, NULL); - LWLockRelease(CommitTsSLRULock); + + LWLockRelease(CommitTsLock); } /* @@ -804,6 +845,7 @@ void ExtendCommitTs(TransactionId newestXact) { int64 pageno; + LWLock *lock; /* * Nothing to do if module not enabled. Note we do an unlocked read of @@ -824,12 +866,14 @@ ExtendCommitTs(TransactionId newestXact) pageno = TransactionIdToCTsPage(newestXact); - LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE); + lock = SimpleLruGetBankLock(CommitTsCtl, pageno); + + LWLockAcquire(lock, LW_EXCLUSIVE); /* Zero the page and make an XLOG entry about it */ ZeroCommitTsPage(pageno, !InRecovery); - LWLockRelease(CommitTsSLRULock); + LWLockRelease(lock); } /* @@ -983,16 +1027,18 @@ commit_ts_redo(XLogReaderState *record) { int64 pageno; int slotno; + LWLock *lock; memcpy(&pageno, XLogRecGetData(record), sizeof(pageno)); - LWLockAcquire(CommitTsSLRULock, LW_EXCLUSIVE); + lock = SimpleLruGetBankLock(CommitTsCtl, pageno); + LWLockAcquire(lock, LW_EXCLUSIVE); slotno = ZeroCommitTsPage(pageno, false); SimpleLruWritePage(CommitTsCtl, slotno); Assert(!CommitTsCtl->shared->page_dirty[slotno]); - LWLockRelease(CommitTsSLRULock); + LWLockRelease(lock); } else if (info == COMMIT_TS_TRUNCATE) { -- cgit v1.2.3