diff options
author | Fujii Masao <fujii@postgresql.org> | 2020-03-24 12:46:48 +0900 |
---|---|---|
committer | Fujii Masao <fujii@postgresql.org> | 2020-03-24 12:46:48 +0900 |
commit | 496ee647ecd2917369ffcf1eaa0b2cdca07c8730 (patch) | |
tree | 8b2b8aac0573b977442ad471b69702da8d203afc /src/backend/access/transam/xlog.c | |
parent | e09ad07b21a244c3cbcdbe3048e9ab0834ac6d41 (diff) | |
download | postgresql-496ee647ecd2917369ffcf1eaa0b2cdca07c8730.tar.gz postgresql-496ee647ecd2917369ffcf1eaa0b2cdca07c8730.zip |
Prefer standby promotion over recovery pause.
Previously if a promotion was triggered while recovery was paused,
the paused state continued. Also recovery could be paused by executing
pg_wal_replay_pause() even while a promotion was ongoing. That is,
recovery pause had higher priority over a standby promotion.
But this behavior was not desirable because most users basically wanted
the recovery to complete as soon as possible and the server to become
the master when they requested a promotion.
This commit changes recovery so that it prefers a promotion over
recovery pause. That is, if a promotion is triggered while recovery
is paused, the paused state ends and a promotion continues. Also
this commit makes recovery pause functions like pg_wal_replay_pause()
throw an error if they are executed while a promotion is ongoing.
Internally, this commit adds new internal function PromoteIsTriggered()
that returns true if a promotion is triggered. Since the name of
this function and the existing function IsPromoteTriggered() are
confusingly similar, the commit changes the name of IsPromoteTriggered()
to IsPromoteSignaled, as more appropriate name.
Author: Fujii Masao
Reviewed-by: Atsushi Torikoshi, Sergei Kornilov
Discussion: https://postgr.es/m/00c194b2-dbbb-2e8a-5b39-13f14048ef0a@oss.nttdata.com
Diffstat (limited to 'src/backend/access/transam/xlog.c')
-rw-r--r-- | src/backend/access/transam/xlog.c | 67 |
1 files changed, 60 insertions, 7 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 64860f12f56..7621fc05e24 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -230,6 +230,12 @@ static bool LocalRecoveryInProgress = true; static bool LocalHotStandbyActive = false; /* + * Local copy of SharedPromoteIsTriggered variable. False actually means "not + * known, need to check the shared state". + */ +static bool LocalPromoteIsTriggered = false; + +/* * Local state for XLogInsertAllowed(): * 1: unconditionally allowed to insert XLOG * 0: unconditionally not allowed to insert XLOG @@ -655,6 +661,12 @@ typedef struct XLogCtlData bool SharedHotStandbyActive; /* + * SharedPromoteIsTriggered indicates if a standby promotion has been + * triggered. Protected by info_lck. + */ + bool SharedPromoteIsTriggered; + + /* * WalWriterSleeping indicates whether the WAL writer is currently in * low-power mode (and hence should be nudged if an async commit occurs). * Protected by info_lck. @@ -912,6 +924,7 @@ static void InitControlFile(uint64 sysidentifier); static void WriteControlFile(void); static void ReadControlFile(void); static char *str_time(pg_time_t tnow); +static void SetPromoteIsTriggered(void); static bool CheckForStandbyTrigger(void); #ifdef WAL_DEBUG @@ -5112,6 +5125,7 @@ XLOGShmemInit(void) XLogCtl->XLogCacheBlck = XLOGbuffers - 1; XLogCtl->SharedRecoveryInProgress = true; XLogCtl->SharedHotStandbyActive = false; + XLogCtl->SharedPromoteIsTriggered = false; XLogCtl->WalWriterSleeping = false; SpinLockInit(&XLogCtl->Insert.insertpos_lck); @@ -5940,16 +5954,22 @@ recoveryPausesHere(void) if (!LocalHotStandbyActive) return; + /* Don't pause after standby promotion has been triggered */ + if (LocalPromoteIsTriggered) + return; + ereport(LOG, (errmsg("recovery has paused"), errhint("Execute pg_wal_replay_resume() to continue."))); while (RecoveryIsPaused()) { + HandleStartupProcInterrupts(); + if (CheckForStandbyTrigger()) + return; pgstat_report_wait_start(WAIT_EVENT_RECOVERY_PAUSE); pg_usleep(1000000L); /* 1000 ms */ pgstat_report_wait_end(); - HandleStartupProcInterrupts(); } } @@ -12278,6 +12298,40 @@ emode_for_corrupt_record(int emode, XLogRecPtr RecPtr) } /* + * Has a standby promotion already been triggered? + * + * Unlike CheckForStandbyTrigger(), this works in any process + * that's connected to shared memory. + */ +bool +PromoteIsTriggered(void) +{ + /* + * We check shared state each time only until a standby promotion is + * triggered. We can't trigger a promotion again, so there's no need to + * keep checking after the shared variable has once been seen true. + */ + if (LocalPromoteIsTriggered) + return true; + + SpinLockAcquire(&XLogCtl->info_lck); + LocalPromoteIsTriggered = XLogCtl->SharedPromoteIsTriggered; + SpinLockRelease(&XLogCtl->info_lck); + + return LocalPromoteIsTriggered; +} + +static void +SetPromoteIsTriggered(void) +{ + SpinLockAcquire(&XLogCtl->info_lck); + XLogCtl->SharedPromoteIsTriggered = true; + SpinLockRelease(&XLogCtl->info_lck); + + LocalPromoteIsTriggered = true; +} + +/* * Check to see whether the user-specified trigger file exists and whether a * promote request has arrived. If either condition holds, return true. */ @@ -12285,12 +12339,11 @@ static bool CheckForStandbyTrigger(void) { struct stat stat_buf; - static bool triggered = false; - if (triggered) + if (LocalPromoteIsTriggered) return true; - if (IsPromoteTriggered()) + if (IsPromoteSignaled()) { /* * In 9.1 and 9.2 the postmaster unlinked the promote file inside the @@ -12313,8 +12366,8 @@ CheckForStandbyTrigger(void) ereport(LOG, (errmsg("received promote request"))); - ResetPromoteTriggered(); - triggered = true; + ResetPromoteSignaled(); + SetPromoteIsTriggered(); return true; } @@ -12326,7 +12379,7 @@ CheckForStandbyTrigger(void) ereport(LOG, (errmsg("promote trigger file found: %s", PromoteTriggerFile))); unlink(PromoteTriggerFile); - triggered = true; + SetPromoteIsTriggered(); fast_promote = true; return true; } |