diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/postmaster/bgwriter.c | 196 | ||||
-rw-r--r-- | src/backend/storage/buffer/bufmgr.c | 58 | ||||
-rw-r--r-- | src/backend/storage/buffer/freelist.c | 45 | ||||
-rw-r--r-- | src/backend/storage/lmgr/proc.c | 4 | ||||
-rw-r--r-- | src/include/postmaster/bgwriter.h | 6 | ||||
-rw-r--r-- | src/include/storage/buf_internals.h | 3 | ||||
-rw-r--r-- | src/include/storage/proc.h | 2 |
7 files changed, 145 insertions, 169 deletions
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c index 59c539a4e0e..f72672ef3be 100644 --- a/src/backend/postmaster/bgwriter.c +++ b/src/backend/postmaster/bgwriter.c @@ -21,10 +21,7 @@ * * If the bgwriter exits unexpectedly, the postmaster treats that the same * as a backend crash: shared memory may be corrupted, so remaining backends - * should be killed by SIGQUIT and then a recovery cycle started. (Even if - * shared memory isn't corrupted, we have lost information about which - * files need to be fsync'd for the next checkpoint, and so a system - * restart needs to be forced.) + * should be killed by SIGQUIT and then a recovery cycle started. * * * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group @@ -48,6 +45,7 @@ #include "pgstat.h" #include "postmaster/bgwriter.h" #include "storage/bufmgr.h" +#include "storage/buf_internals.h" #include "storage/ipc.h" #include "storage/lwlock.h" #include "storage/pmsignal.h" @@ -66,9 +64,10 @@ int BgWriterDelay = 200; /* - * Time to sleep between bgwriter rounds, when it has no work to do. + * Multiplier to apply to BgWriterDelay when we decide to hibernate. + * (Perhaps this needs to be configurable?) */ -#define BGWRITER_HIBERNATE_MS 10000 +#define HIBERNATE_FACTOR 50 /* * Flags set by interrupt handlers for later service in the main loop. @@ -81,10 +80,6 @@ static volatile sig_atomic_t shutdown_requested = false; */ static bool am_bg_writer = false; -/* Prototypes for private functions */ - -static void BgWriterNap(bool hibernating); - /* Signal handlers */ static void bg_quickdie(SIGNAL_ARGS); @@ -104,7 +99,7 @@ BackgroundWriterMain(void) { sigjmp_buf local_sigjmp_buf; MemoryContext bgwriter_context; - bool hibernating; + bool prev_hibernate; am_bg_writer = true; @@ -126,7 +121,7 @@ BackgroundWriterMain(void) * handler is still needed for latch wakeups. */ pqsignal(SIGHUP, BgSigHupHandler); /* set flag to read config file */ - pqsignal(SIGINT, SIG_IGN); /* as of 9.2 no longer requests checkpoint */ + pqsignal(SIGINT, SIG_IGN); pqsignal(SIGTERM, ReqShutdownHandler); /* shutdown */ pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */ pqsignal(SIGALRM, SIG_IGN); @@ -147,12 +142,6 @@ BackgroundWriterMain(void) sigdelset(&BlockSig, SIGQUIT); /* - * Advertise our latch that backends can use to wake us up while we're - * sleeping. - */ - ProcGlobal->bgwriterLatch = &MyProc->procLatch; - - /* * Create a resource owner to keep track of our resources (currently only * buffer pins). */ @@ -247,25 +236,25 @@ BackgroundWriterMain(void) ThisTimeLineID = GetRecoveryTargetTLI(); /* + * Reset hibernation state after any error. + */ + prev_hibernate = false; + + /* * Loop forever */ - hibernating = false; for (;;) { - bool lapped; + bool can_hibernate; + int rc; - /* - * Emergency bailout if postmaster has died. This is to avoid the - * necessity for manual cleanup of all postmaster children. - */ - if (!PostmasterIsAlive()) - exit(1); + /* Clear any already-pending wakeups */ + ResetLatch(&MyProc->procLatch); if (got_SIGHUP) { got_SIGHUP = false; ProcessConfigFile(PGC_SIGHUP); - /* update global shmem state for sync rep */ } if (shutdown_requested) { @@ -281,126 +270,69 @@ BackgroundWriterMain(void) /* * Do one cycle of dirty-buffer writing. */ - if (hibernating && bgwriter_lru_maxpages > 0) - ResetLatch(&MyProc->procLatch); - lapped = BgBufferSync(); - - if (lapped && !hibernating) - { - /* - * BgBufferSync did nothing. Since there doesn't seem to be any - * work for the bgwriter to do, go into slower-paced - * "hibernation" mode, where we sleep for much longer times than - * bgwriter_delay says. Fewer wakeups saves electricity. If a - * backend starts dirtying pages again, it will wake us up by - * setting our latch. - * - * The latch is kept set during productive cycles where buffers - * are written, and only reset before going into a longer sleep. - * That ensures that when there's a constant trickle of activity, - * the SetLatch() calls that backends have to do will see the - * latch as already set, and are not slowed down by having to - * actually set the latch and signal us. - */ - hibernating = true; - - /* - * Take one more short nap and perform one more bgwriter cycle - - * someone might've dirtied a buffer just after we finished the - * previous bgwriter cycle, while the latch was still set. If - * we still find nothing to do after this cycle, the next sleep - * will be longer. - */ - BgWriterNap(false); - continue; - } - else if (!lapped && hibernating) - { - /* - * Woken up from hibernation. Set the latch just in case it's - * not set yet (usually we wake up from hibernation because a - * backend already set the latch, but not necessarily). - */ - SetLatch(&MyProc->procLatch); - hibernating = false; - } + can_hibernate = BgBufferSync(); /* - * Take a short or long nap, depending on whether there was any work - * to do. + * Send off activity statistics to the stats collector */ - BgWriterNap(hibernating); - } -} + pgstat_send_bgwriter(); -/* - * BgWriterNap -- Nap for the configured time or until a signal is received. - * - * If 'hibernating' is false, sleeps for bgwriter_delay milliseconds. - * Otherwise sleeps longer, but also wakes up if the process latch is set. - */ -static void -BgWriterNap(bool hibernating) -{ - long udelay; - - /* - * Send off activity statistics to the stats collector - */ - pgstat_send_bgwriter(); - - /* - * If there was no work to do in the previous bgwriter cycle, take a - * longer nap. - */ - if (hibernating) - { /* - * We wake on a buffer being dirtied. It's possible that some - * useful work will become available for the bgwriter to do without - * a buffer actually being dirtied, like when a dirty buffer's usage - * count is decremented to zero or it's unpinned. This corner case - * is judged as too marginal to justify adding additional SetLatch() - * calls in very hot code paths, cheap though those calls may be. + * Sleep until we are signaled or BgWriterDelay has elapsed. * - * We still wake up periodically, so that BufferAlloc stats are - * updated reasonably promptly. + * Note: the feedback control loop in BgBufferSync() expects that we + * will call it every BgWriterDelay msec. While it's not critical for + * correctness that that be exact, the feedback loop might misbehave + * if we stray too far from that. Hence, avoid loading this process + * down with latch events that are likely to happen frequently during + * normal operation. */ - int res = WaitLatch(&MyProc->procLatch, - WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, - BGWRITER_HIBERNATE_MS); + rc = WaitLatch(&MyProc->procLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + BgWriterDelay /* ms */); /* - * Only do a quick return if timeout was reached (or postmaster died) - * to ensure that no less than BgWriterDelay ms has passed between - * BgBufferSyncs - WaitLatch() might have returned instantaneously. + * If no latch event and BgBufferSync says nothing's happening, extend + * the sleep in "hibernation" mode, where we sleep for much longer + * than bgwriter_delay says. Fewer wakeups save electricity. When a + * backend starts using buffers again, it will wake us up by setting + * our latch. Because the extra sleep will persist only as long as no + * buffer allocations happen, this should not distort the behavior of + * BgBufferSync's control loop too badly; essentially, it will think + * that the system-wide idle interval didn't exist. + * + * There is a race condition here, in that a backend might allocate a + * buffer between the time BgBufferSync saw the alloc count as zero + * and the time we call StrategyNotifyBgWriter. While it's not + * critical that we not hibernate anyway, we try to reduce the odds of + * that by only hibernating when BgBufferSync says nothing's happening + * for two consecutive cycles. Also, we mitigate any possible + * consequences of a missed wakeup by not hibernating forever. */ - if (res & (WL_TIMEOUT | WL_POSTMASTER_DEATH)) - return; - } + if (rc == WL_TIMEOUT && can_hibernate && prev_hibernate) + { + /* Ask for notification at next buffer allocation */ + StrategyNotifyBgWriter(&MyProc->procLatch); + /* Sleep ... */ + rc = WaitLatch(&MyProc->procLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + BgWriterDelay * HIBERNATE_FACTOR); + /* Reset the notification request in case we timed out */ + StrategyNotifyBgWriter(NULL); + } - /* - * Nap for the configured time. - * - * On some platforms, signals won't interrupt the sleep. To ensure we - * respond reasonably promptly when someone signals us, break down the - * sleep into 1-second increments, and check for interrupts after each - * nap. - */ - udelay = BgWriterDelay * 1000L; + /* + * Emergency bailout if postmaster has died. This is to avoid the + * necessity for manual cleanup of all postmaster children. + */ + if (rc & WL_POSTMASTER_DEATH) + exit(1); - while (udelay > 999999L) - { - if (got_SIGHUP || shutdown_requested) - break; - pg_usleep(1000000L); - udelay -= 1000000L; + prev_hibernate = can_hibernate; } - - if (!(got_SIGHUP || shutdown_requested)) - pg_usleep(udelay); } + /* -------------------------------- * signal handler routines * -------------------------------- diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 1889941eda1..a1b588b95c1 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -968,7 +968,6 @@ void MarkBufferDirty(Buffer buffer) { volatile BufferDesc *bufHdr; - bool dirtied = false; if (!BufferIsValid(buffer)) elog(ERROR, "bad buffer ID: %d", buffer); @@ -989,26 +988,20 @@ MarkBufferDirty(Buffer buffer) Assert(bufHdr->refcount > 0); - if (!(bufHdr->flags & BM_DIRTY)) - dirtied = true; - - bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - - UnlockBufHdr(bufHdr); - /* - * If the buffer was not dirty already, do vacuum accounting, and - * nudge bgwriter. + * If the buffer was not dirty already, do vacuum accounting. */ - if (dirtied) + if (!(bufHdr->flags & BM_DIRTY)) { VacuumPageDirty++; pgBufferUsage.shared_blks_dirtied++; if (VacuumCostActive) VacuumCostBalance += VacuumCostPageDirty; - if (ProcGlobal->bgwriterLatch) - SetLatch(ProcGlobal->bgwriterLatch); } + + bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); + + UnlockBufHdr(bufHdr); } /* @@ -1331,9 +1324,11 @@ BufferSync(int flags) * * This is called periodically by the background writer process. * - * Returns true if the clocksweep has been "lapped", so that there's nothing - * to do. Also returns true if there's nothing to do because bgwriter was - * effectively disabled by setting bgwriter_lru_maxpages to 0. + * Returns true if it's appropriate for the bgwriter process to go into + * low-power hibernation mode. (This happens if the strategy clock sweep + * has been "lapped" and no buffer allocations have occurred recently, + * or if the bgwriter has been effectively disabled by setting + * bgwriter_lru_maxpages to 0.) */ bool BgBufferSync(void) @@ -1375,6 +1370,10 @@ BgBufferSync(void) int num_written; int reusable_buffers; + /* Variables for final smoothed_density update */ + long new_strategy_delta; + uint32 new_recent_alloc; + /* * Find out where the freelist clock sweep currently is, and how many * buffer allocations have happened since our last call. @@ -1598,21 +1597,23 @@ BgBufferSync(void) * which is helpful because a long memory isn't as desirable on the * density estimates. */ - strategy_delta = bufs_to_lap - num_to_scan; - recent_alloc = reusable_buffers - reusable_buffers_est; - if (strategy_delta > 0 && recent_alloc > 0) + new_strategy_delta = bufs_to_lap - num_to_scan; + new_recent_alloc = reusable_buffers - reusable_buffers_est; + if (new_strategy_delta > 0 && new_recent_alloc > 0) { - scans_per_alloc = (float) strategy_delta / (float) recent_alloc; + scans_per_alloc = (float) new_strategy_delta / (float) new_recent_alloc; smoothed_density += (scans_per_alloc - smoothed_density) / smoothing_samples; #ifdef BGW_DEBUG elog(DEBUG2, "bgwriter: cleaner density alloc=%u scan=%ld density=%.2f new smoothed=%.2f", - recent_alloc, strategy_delta, scans_per_alloc, smoothed_density); + new_recent_alloc, new_strategy_delta, + scans_per_alloc, smoothed_density); #endif } - return (bufs_to_lap == 0); + /* Return true if OK to hibernate */ + return (bufs_to_lap == 0 && recent_alloc == 0); } /* @@ -2385,24 +2386,17 @@ SetBufferCommitInfoNeedsSave(Buffer buffer) if ((bufHdr->flags & (BM_DIRTY | BM_JUST_DIRTIED)) != (BM_DIRTY | BM_JUST_DIRTIED)) { - bool dirtied = false; - LockBufHdr(bufHdr); Assert(bufHdr->refcount > 0); if (!(bufHdr->flags & BM_DIRTY)) - dirtied = true; - bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); - UnlockBufHdr(bufHdr); - - if (dirtied) { + /* Do vacuum cost accounting */ VacuumPageDirty++; if (VacuumCostActive) VacuumCostBalance += VacuumCostPageDirty; - /* The bgwriter may need to be woken. */ - if (ProcGlobal->bgwriterLatch) - SetLatch(ProcGlobal->bgwriterLatch); } + bufHdr->flags |= (BM_DIRTY | BM_JUST_DIRTIED); + UnlockBufHdr(bufHdr); } } diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 3e62448386d..76a4beca699 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -41,6 +41,11 @@ typedef struct */ uint32 completePasses; /* Complete cycles of the clock sweep */ uint32 numBufferAllocs; /* Buffers allocated since last reset */ + + /* + * Notification latch, or NULL if none. See StrategyNotifyBgWriter. + */ + Latch *bgwriterLatch; } BufferStrategyControl; /* Pointers to shared state */ @@ -107,6 +112,7 @@ volatile BufferDesc * StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held) { volatile BufferDesc *buf; + Latch *bgwriterLatch; int trycounter; /* @@ -135,6 +141,21 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held) StrategyControl->numBufferAllocs++; /* + * If bgwriterLatch is set, we need to waken the bgwriter, but we should + * not do so while holding BufFreelistLock; so release and re-grab. This + * is annoyingly tedious, but it happens at most once per bgwriter cycle, + * so the performance hit is minimal. + */ + bgwriterLatch = StrategyControl->bgwriterLatch; + if (bgwriterLatch) + { + StrategyControl->bgwriterLatch = NULL; + LWLockRelease(BufFreelistLock); + SetLatch(bgwriterLatch); + LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); + } + + /* * Try to get a buffer from the freelist. Note that the freeNext fields * are considered to be protected by the BufFreelistLock not the * individual buffer spinlocks, so it's OK to manipulate them without @@ -269,6 +290,27 @@ StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc) return result; } +/* + * StrategyNotifyBgWriter -- set or clear allocation notification latch + * + * If bgwriterLatch isn't NULL, the next invocation of StrategyGetBuffer will + * set that latch. Pass NULL to clear the pending notification before it + * happens. This feature is used by the bgwriter process to wake itself up + * from hibernation, and is not meant for anybody else to use. + */ +void +StrategyNotifyBgWriter(Latch *bgwriterLatch) +{ + /* + * We acquire the BufFreelistLock just to ensure that the store appears + * atomic to StrategyGetBuffer. The bgwriter should call this rather + * infrequently, so there's no performance penalty from being safe. + */ + LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE); + StrategyControl->bgwriterLatch = bgwriterLatch; + LWLockRelease(BufFreelistLock); +} + /* * StrategyShmemSize @@ -344,6 +386,9 @@ StrategyInitialize(bool init) /* Clear statistics */ StrategyControl->completePasses = 0; StrategyControl->numBufferAllocs = 0; + + /* No pending notification */ + StrategyControl->bgwriterLatch = NULL; } else Assert(!init); diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 8e309f8a0b4..031e91d14c5 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -186,7 +186,6 @@ InitProcGlobal(void) ProcGlobal->startupProc = NULL; ProcGlobal->startupProcPid = 0; ProcGlobal->startupBufferPinWaitBufId = -1; - ProcGlobal->bgwriterLatch = NULL; ProcGlobal->walwriterLatch = NULL; ProcGlobal->checkpointerLatch = NULL; @@ -627,6 +626,9 @@ HaveNFreeProcs(int n) return (n <= 0); } +/* + * Check if the current process is awaiting a lock. + */ bool IsWaitingForLock(void) { diff --git a/src/include/postmaster/bgwriter.h b/src/include/postmaster/bgwriter.h index 540c4e9cc0e..d17dd174875 100644 --- a/src/include/postmaster/bgwriter.h +++ b/src/include/postmaster/bgwriter.h @@ -1,7 +1,10 @@ /*------------------------------------------------------------------------- * * bgwriter.h - * Exports from postmaster/bgwriter.c. + * Exports from postmaster/bgwriter.c and postmaster/checkpointer.c. + * + * The bgwriter process used to handle checkpointing duties too. Now + * there is a separate process, but we did not bother to split this header. * * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group * @@ -32,7 +35,6 @@ extern bool ForwardFsyncRequest(RelFileNodeBackend rnode, ForkNumber forknum, BlockNumber segno); extern void AbsorbFsyncRequests(void); -/* These were previously called BgWriterShmem... */ extern Size CheckpointerShmemSize(void); extern void CheckpointerShmemInit(void); diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h index e43719ed737..4129ce52442 100644 --- a/src/include/storage/buf_internals.h +++ b/src/include/storage/buf_internals.h @@ -16,6 +16,7 @@ #define BUFMGR_INTERNALS_H #include "storage/buf.h" +#include "storage/latch.h" #include "storage/lwlock.h" #include "storage/shmem.h" #include "storage/smgr.h" @@ -188,6 +189,8 @@ extern bool StrategyRejectBuffer(BufferAccessStrategy strategy, volatile BufferDesc *buf); extern int StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc); +extern void StrategyNotifyBgWriter(Latch *bgwriterLatch); + extern Size StrategyShmemSize(void); extern void StrategyInitialize(bool init); diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index c4808f44a08..7552e188f37 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -188,8 +188,6 @@ typedef struct PROC_HDR PGPROC *freeProcs; /* Head of list of autovacuum's free PGPROC structures */ PGPROC *autovacFreeProcs; - /* BGWriter process's latch */ - Latch *bgwriterLatch; /* WALWriter process's latch */ Latch *walwriterLatch; /* Checkpointer process's latch */ |