aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/transam/xlog.c
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2017-02-08 15:45:30 -0500
committerRobert Haas <rhaas@postgresql.org>2017-02-08 15:45:30 -0500
commita507b86900f695aacc8d52b7d2cfcb65f58862a2 (patch)
tree1cc25aaddb8613d2744e803e3ea970fd50d1e309 /src/backend/access/transam/xlog.c
parent115cb31597fac8a17202d1e41da8baf33fcb60cf (diff)
downloadpostgresql-a507b86900f695aacc8d52b7d2cfcb65f58862a2.tar.gz
postgresql-a507b86900f695aacc8d52b7d2cfcb65f58862a2.zip
Add WAL consistency checking facility.
When the new GUC wal_consistency_checking is set to a non-empty value, it triggers recording of additional full-page images, which are compared on the standby against the results of applying the WAL record (without regard to those full-page images). Allowable differences such as hints are masked out, and the resulting pages are compared; any difference results in a FATAL error on the standby. Kuntal Ghosh, based on earlier patches by Michael Paquier and Heikki Linnakangas. Extensively reviewed and revised by Michael Paquier and by me, with additional reviews and comments from Amit Kapila, Álvaro Herrera, Simon Riggs, and Peter Eisentraut.
Diffstat (limited to 'src/backend/access/transam/xlog.c')
-rw-r--r--src/backend/access/transam/xlog.c120
1 files changed, 120 insertions, 0 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 2f5d6030660..cc8b83fa8d6 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -95,6 +95,8 @@ bool EnableHotStandby = false;
bool fullPageWrites = true;
bool wal_log_hints = false;
bool wal_compression = false;
+char *wal_consistency_checking_string = NULL;
+bool *wal_consistency_checking = NULL;
bool log_checkpoints = false;
int sync_method = DEFAULT_SYNC_METHOD;
int wal_level = WAL_LEVEL_MINIMAL;
@@ -245,6 +247,10 @@ bool InArchiveRecovery = false;
/* Was the last xlog file restored from archive, or local? */
static bool restoredFromArchive = false;
+/* Buffers dedicated to consistency checks of size BLCKSZ */
+static char *replay_image_masked = NULL;
+static char *master_image_masked = NULL;
+
/* options taken from recovery.conf for archive recovery */
char *recoveryRestoreCommand = NULL;
static char *recoveryEndCommand = NULL;
@@ -903,6 +909,7 @@ static char *GetXLogBuffer(XLogRecPtr ptr);
static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos);
static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos);
static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr);
+static void checkXLogConsistency(XLogReaderState *record);
static void WALInsertLockAcquire(void);
static void WALInsertLockAcquireExclusive(void);
@@ -1315,6 +1322,103 @@ ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
}
/*
+ * Checks whether the current buffer page and backup page stored in the
+ * WAL record are consistent or not. Before comparing the two pages, a
+ * masking can be applied to the pages to ignore certain areas like hint bits,
+ * unused space between pd_lower and pd_upper among other things. This
+ * function should be called once WAL replay has been completed for a
+ * given record.
+ */
+static void
+checkXLogConsistency(XLogReaderState *record)
+{
+ RmgrId rmid = XLogRecGetRmid(record);
+ RelFileNode rnode;
+ ForkNumber forknum;
+ BlockNumber blkno;
+ int block_id;
+
+ /* Records with no backup blocks have no need for consistency checks. */
+ if (!XLogRecHasAnyBlockRefs(record))
+ return;
+
+ Assert((XLogRecGetInfo(record) & XLR_CHECK_CONSISTENCY) != 0);
+
+ for (block_id = 0; block_id <= record->max_block_id; block_id++)
+ {
+ Buffer buf;
+ Page page;
+
+ if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno))
+ {
+ /*
+ * WAL record doesn't contain a block reference with the given id.
+ * Do nothing.
+ */
+ continue;
+ }
+
+ Assert(XLogRecHasBlockImage(record, block_id));
+
+ /*
+ * Read the contents from the current buffer and store it in a
+ * temporary page.
+ */
+ buf = XLogReadBufferExtended(rnode, forknum, blkno,
+ RBM_NORMAL_NO_LOG);
+ if (!BufferIsValid(buf))
+ continue;
+
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ page = BufferGetPage(buf);
+
+ /*
+ * Take a copy of the local page where WAL has been applied to have a
+ * comparison base before masking it...
+ */
+ memcpy(replay_image_masked, page, BLCKSZ);
+
+ /* No need for this page anymore now that a copy is in. */
+ UnlockReleaseBuffer(buf);
+
+ /*
+ * If the block LSN is already ahead of this WAL record, we can't
+ * expect contents to match. This can happen if recovery is restarted.
+ */
+ if (PageGetLSN(replay_image_masked) > record->EndRecPtr)
+ continue;
+
+ /*
+ * Read the contents from the backup copy, stored in WAL record and
+ * store it in a temporary page. There is not need to allocate a new
+ * page here, a local buffer is fine to hold its contents and a mask
+ * can be directly applied on it.
+ */
+ if (!RestoreBlockImage(record, block_id, master_image_masked))
+ elog(ERROR, "failed to restore block image");
+
+ /*
+ * If masking function is defined, mask both the master and replay
+ * images
+ */
+ if (RmgrTable[rmid].rm_mask != NULL)
+ {
+ RmgrTable[rmid].rm_mask(replay_image_masked, blkno);
+ RmgrTable[rmid].rm_mask(master_image_masked, blkno);
+ }
+
+ /* Time to compare the master and replay images. */
+ if (memcmp(replay_image_masked, master_image_masked, BLCKSZ) != 0)
+ {
+ elog(FATAL,
+ "inconsistent page found, rel %u/%u/%u, forknum %u, blkno %u",
+ rnode.spcNode, rnode.dbNode, rnode.relNode,
+ forknum, blkno);
+ }
+ }
+}
+
+/*
* Subroutine of XLogInsertRecord. Copies a WAL record to an already-reserved
* area in the WAL.
*/
@@ -6200,6 +6304,13 @@ StartupXLOG(void)
errdetail("Failed while allocating an XLog reading processor.")));
xlogreader->system_identifier = ControlFile->system_identifier;
+ /*
+ * Allocate pages dedicated to WAL consistency checks, those had better
+ * be aligned.
+ */
+ replay_image_masked = (char *) palloc(BLCKSZ);
+ master_image_masked = (char *) palloc(BLCKSZ);
+
if (read_backup_label(&checkPointLoc, &backupEndRequired,
&backupFromStandby))
{
@@ -7000,6 +7111,15 @@ StartupXLOG(void)
/* Now apply the WAL record itself */
RmgrTable[record->xl_rmid].rm_redo(xlogreader);
+ /*
+ * After redo, check whether the backup pages associated with
+ * the WAL record are consistent with the existing pages. This
+ * check is done only if consistency check is enabled for this
+ * record.
+ */
+ if ((record->xl_info & XLR_CHECK_CONSISTENCY) != 0)
+ checkXLogConsistency(xlogreader);
+
/* Pop the error context stack */
error_context_stack = errcallback.previous;