diff options
author | Michael Paquier <michael@paquier.xyz> | 2020-04-24 08:48:28 +0900 |
---|---|---|
committer | Michael Paquier <michael@paquier.xyz> | 2020-04-24 08:48:28 +0900 |
commit | 4e87c4836ab9059cdec17b0a288db3622a42ac18 (patch) | |
tree | e11fc75c0384b5487be7e5e1a02533f47eb41365 /src/backend/access/transam/xlog.c | |
parent | 3436c5e28374d4e0587634fda09faf4a38a9d848 (diff) | |
download | postgresql-4e87c4836ab9059cdec17b0a288db3622a42ac18.tar.gz postgresql-4e87c4836ab9059cdec17b0a288db3622a42ac18.zip |
Fix handling of WAL segments ready to be archived during crash recovery
78ea8b5 has fixed an issue related to the recycling of WAL segments on
standbys depending on archive_mode. However, it has introduced a
regression with the handling of WAL segments ready to be archived during
crash recovery, causing those files to be recycled without getting
archived.
This commit fixes the regression by tracking in shared memory if a live
cluster is either in crash recovery or archive recovery as the handling
of WAL segments ready to be archived is different in both cases (those
WAL segments should not be removed during crash recovery), and by using
this new shared memory state to decide if a segment can be recycled or
not. Previously, it was not possible to know if a cluster was in crash
recovery or archive recovery as the shared state was able to track only
if recovery was happening or not, leading to the problem.
A set of TAP tests is added to close the gap here, making sure that WAL
segments ready to be archived are correctly handled when a cluster is in
archive or crash recovery with archive_mode set to "on" or "always", for
both standby and primary.
Reported-by: Benoît Lobréau
Author: Jehan-Guillaume de Rorthais
Reviewed-by: Kyotaro Horiguchi, Fujii Masao, Michael Paquier
Discussion: https://postgr.es/m/20200331172229.40ee00dc@firost
Backpatch-through: 9.5
Diffstat (limited to 'src/backend/access/transam/xlog.c')
-rw-r--r-- | src/backend/access/transam/xlog.c | 57 |
1 files changed, 48 insertions, 9 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 11e32733c48..009a071276b 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -221,8 +221,9 @@ static TimeLineID receiveTLI = 0; static bool lastFullPageWrites; /* - * Local copy of SharedRecoveryInProgress variable. True actually means "not - * known, need to check the shared state". + * Local copy of the state tracked by SharedRecoveryState in shared memory, + * It is false if SharedRecoveryState is RECOVERY_STATE_DONE. True actually + * means "not known, need to check the shared state". */ static bool LocalRecoveryInProgress = true; @@ -653,10 +654,10 @@ typedef struct XLogCtlData TimeLineID PrevTimeLineID; /* - * SharedRecoveryInProgress indicates if we're still in crash or archive + * SharedRecoveryState indicates if we're still in crash or archive * recovery. Protected by info_lck. */ - bool SharedRecoveryInProgress; + RecoveryState SharedRecoveryState; /* * SharedHotStandbyActive indicates if we allow hot standby queries to be @@ -4434,6 +4435,16 @@ ReadRecord(XLogReaderState *xlogreader, int emode, updateMinRecoveryPoint = true; UpdateControlFile(); + + /* + * We update SharedRecoveryState while holding the lock on + * ControlFileLock so both states are consistent in shared + * memory. + */ + SpinLockAcquire(&XLogCtl->info_lck); + XLogCtl->SharedRecoveryState = RECOVERY_STATE_ARCHIVE; + SpinLockRelease(&XLogCtl->info_lck); + LWLockRelease(ControlFileLock); CheckRecoveryConsistency(); @@ -5166,7 +5177,7 @@ XLOGShmemInit(void) * in additional info.) */ XLogCtl->XLogCacheBlck = XLOGbuffers - 1; - XLogCtl->SharedRecoveryInProgress = true; + XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH; XLogCtl->SharedHotStandbyActive = false; XLogCtl->SharedPromoteIsTriggered = false; XLogCtl->WalWriterSleeping = false; @@ -6871,7 +6882,13 @@ StartupXLOG(void) */ dbstate_at_startup = ControlFile->state; if (InArchiveRecovery) + { ControlFile->state = DB_IN_ARCHIVE_RECOVERY; + + SpinLockAcquire(&XLogCtl->info_lck); + XLogCtl->SharedRecoveryState = RECOVERY_STATE_ARCHIVE; + SpinLockRelease(&XLogCtl->info_lck); + } else { ereport(LOG, @@ -6884,6 +6901,10 @@ StartupXLOG(void) ControlFile->checkPointCopy.ThisTimeLineID, recoveryTargetTLI))); ControlFile->state = DB_IN_CRASH_RECOVERY; + + SpinLockAcquire(&XLogCtl->info_lck); + XLogCtl->SharedRecoveryState = RECOVERY_STATE_CRASH; + SpinLockRelease(&XLogCtl->info_lck); } ControlFile->checkPoint = checkPointLoc; ControlFile->checkPointCopy = checkPoint; @@ -7911,7 +7932,7 @@ StartupXLOG(void) ControlFile->time = (pg_time_t) time(NULL); SpinLockAcquire(&XLogCtl->info_lck); - XLogCtl->SharedRecoveryInProgress = false; + XLogCtl->SharedRecoveryState = RECOVERY_STATE_DONE; SpinLockRelease(&XLogCtl->info_lck); UpdateControlFile(); @@ -8057,7 +8078,7 @@ RecoveryInProgress(void) */ volatile XLogCtlData *xlogctl = XLogCtl; - LocalRecoveryInProgress = xlogctl->SharedRecoveryInProgress; + LocalRecoveryInProgress = (xlogctl->SharedRecoveryState != RECOVERY_STATE_DONE); /* * Initialize TimeLineID and RedoRecPtr when we discover that recovery @@ -8069,8 +8090,8 @@ RecoveryInProgress(void) { /* * If we just exited recovery, make sure we read TimeLineID and - * RedoRecPtr after SharedRecoveryInProgress (for machines with - * weak memory ordering). + * RedoRecPtr after SharedRecoveryState (for machines with weak + * memory ordering). */ pg_memory_barrier(); InitXLOGAccess(); @@ -8087,6 +8108,24 @@ RecoveryInProgress(void) } /* + * Returns current recovery state from shared memory. + * + * This returned state is kept consistent with the contents of the control + * file. See details about the possible values of RecoveryState in xlog.h. + */ +RecoveryState +GetRecoveryState(void) +{ + RecoveryState retval; + + SpinLockAcquire(&XLogCtl->info_lck); + retval = XLogCtl->SharedRecoveryState; + SpinLockRelease(&XLogCtl->info_lck); + + return retval; +} + +/* * Is HotStandby active yet? This is only important in special backends * since normal backends won't ever be able to connect until this returns * true. Postmaster knows this by way of signal, not via shared memory. |