aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/transam/xlog.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/transam/xlog.c')
-rw-r--r--src/backend/access/transam/xlog.c120
1 files changed, 120 insertions, 0 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 2f5d6030660..cc8b83fa8d6 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -95,6 +95,8 @@ bool EnableHotStandby = false;
bool fullPageWrites = true;
bool wal_log_hints = false;
bool wal_compression = false;
+char *wal_consistency_checking_string = NULL;
+bool *wal_consistency_checking = NULL;
bool log_checkpoints = false;
int sync_method = DEFAULT_SYNC_METHOD;
int wal_level = WAL_LEVEL_MINIMAL;
@@ -245,6 +247,10 @@ bool InArchiveRecovery = false;
/* Was the last xlog file restored from archive, or local? */
static bool restoredFromArchive = false;
+/* Buffers dedicated to consistency checks of size BLCKSZ */
+static char *replay_image_masked = NULL;
+static char *master_image_masked = NULL;
+
/* options taken from recovery.conf for archive recovery */
char *recoveryRestoreCommand = NULL;
static char *recoveryEndCommand = NULL;
@@ -903,6 +909,7 @@ static char *GetXLogBuffer(XLogRecPtr ptr);
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos);
static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos);
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr);
+static void checkXLogConsistency(XLogReaderState *record);
static void WALInsertLockAcquire(void);
static void WALInsertLockAcquireExclusive(void);
@@ -1315,6 +1322,103 @@ ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
}
/*
+ * Checks whether the current buffer page and backup page stored in the
+ * WAL record are consistent or not. Before comparing the two pages, a
+ * masking can be applied to the pages to ignore certain areas like hint bits,
+ * unused space between pd_lower and pd_upper among other things. This
+ * function should be called once WAL replay has been completed for a
+ * given record.
+ */
+static void
+checkXLogConsistency(XLogReaderState *record)
+{
+ RmgrId rmid = XLogRecGetRmid(record);
+ RelFileNode rnode;
+ ForkNumber forknum;
+ BlockNumber blkno;
+ int block_id;
+
+ /* Records with no backup blocks have no need for consistency checks. */
+ if (!XLogRecHasAnyBlockRefs(record))
+ return;
+
+ Assert((XLogRecGetInfo(record) & XLR_CHECK_CONSISTENCY) != 0);
+
+ for (block_id = 0; block_id <= record->max_block_id; block_id++)
+ {
+ Buffer buf;
+ Page page;
+
+ if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno))
+ {
+ /*
+ * WAL record doesn't contain a block reference with the given id.
+ * Do nothing.
+ */
+ continue;
+ }
+
+ Assert(XLogRecHasBlockImage(record, block_id));
+
+ /*
+ * Read the contents from the current buffer and store it in a
+ * temporary page.
+ */
+ buf = XLogReadBufferExtended(rnode, forknum, blkno,
+ RBM_NORMAL_NO_LOG);
+ if (!BufferIsValid(buf))
+ continue;
+
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ page = BufferGetPage(buf);
+
+ /*
+ * Take a copy of the local page where WAL has been applied to have a
+ * comparison base before masking it...
+ */
+ memcpy(replay_image_masked, page, BLCKSZ);
+
+ /* No need for this page anymore now that a copy is in. */
+ UnlockReleaseBuffer(buf);
+
+ /*
+ * If the block LSN is already ahead of this WAL record, we can't
+ * expect contents to match. This can happen if recovery is restarted.
+ */
+ if (PageGetLSN(replay_image_masked) > record->EndRecPtr)
+ continue;
+
+ /*
+ * Read the contents from the backup copy, stored in WAL record and
+ * store it in a temporary page. There is not need to allocate a new
+ * page here, a local buffer is fine to hold its contents and a mask
+ * can be directly applied on it.
+ */
+ if (!RestoreBlockImage(record, block_id, master_image_masked))
+ elog(ERROR, "failed to restore block image");
+
+ /*
+ * If masking function is defined, mask both the master and replay
+ * images
+ */
+ if (RmgrTable[rmid].rm_mask != NULL)
+ {
+ RmgrTable[rmid].rm_mask(replay_image_masked, blkno);
+ RmgrTable[rmid].rm_mask(master_image_masked, blkno);
+ }
+
+ /* Time to compare the master and replay images. */
+ if (memcmp(replay_image_masked, master_image_masked, BLCKSZ) != 0)
+ {
+ elog(FATAL,
+ "inconsistent page found, rel %u/%u/%u, forknum %u, blkno %u",
+ rnode.spcNode, rnode.dbNode, rnode.relNode,
+ forknum, blkno);
+ }
+ }
+}
+
+/*
* Subroutine of XLogInsertRecord. Copies a WAL record to an already-reserved
* area in the WAL.
*/
@@ -6200,6 +6304,13 @@ StartupXLOG(void)
errdetail("Failed while allocating an XLog reading processor.")));
xlogreader->system_identifier = ControlFile->system_identifier;
+ /*
+ * Allocate pages dedicated to WAL consistency checks, those had better
+ * be aligned.
+ */
+ replay_image_masked = (char *) palloc(BLCKSZ);
+ master_image_masked = (char *) palloc(BLCKSZ);
+
if (read_backup_label(&checkPointLoc, &backupEndRequired,
&backupFromStandby))
{
@@ -7000,6 +7111,15 @@ StartupXLOG(void)
/* Now apply the WAL record itself */
RmgrTable[record->xl_rmid].rm_redo(xlogreader);
+ /*
+ * After redo, check whether the backup pages associated with
+ * the WAL record are consistent with the existing pages. This
+ * check is done only if consistency check is enabled for this
+ * record.
+ */
+ if ((record->xl_info & XLR_CHECK_CONSISTENCY) != 0)
+ checkXLogConsistency(xlogreader);
+
/* Pop the error context stack */
error_context_stack = errcallback.previous;