aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichael Paquier <michael@paquier.xyz>2018-09-07 11:00:16 -0700
committerMichael Paquier <michael@paquier.xyz>2018-09-07 11:00:16 -0700
commit8582b4d044b05d3fe4bcdf6e039fde8e753934ae (patch)
tree9ddacee248b29e398d44a2e378610e95bb2769a1 /src
parent7b6b167fa3bd2f968ad885ca7b024be4122a85f1 (diff)
downloadpostgresql-8582b4d044b05d3fe4bcdf6e039fde8e753934ae.tar.gz
postgresql-8582b4d044b05d3fe4bcdf6e039fde8e753934ae.zip
Improve handling of corrupted two-phase state files at recovery
When a corrupted two-phase state file is found by WAL replay, be it for crash recovery or archive recovery, then the file is simply skipped and a WARNING is logged to the user, causing the transaction to be silently lost. Facing an on-disk WAL file which is corrupted is as likely to happen as what is stored in WAL records, but WAL records are already able to fail hard if there is a CRC mismatch. On-disk two-phase state files, on the contrary, are simply ignored if corrupted. Note that when restoring the initial two-phase data state at recovery, files newer than the horizon XID are discarded hence no files present in pg_twophase/ should be torned and have been made durable by a previous checkpoint, so recovery should never see any corrupted two-phase state file by design. The situation got better since 978b2f6 which has added two-phase state information directly in WAL instead of using on-disk files, so the risk is limited to two-phase transactions which live across at least one checkpoint for long periods. Backups having legit two-phase state files on-disk could also lose silently transactions when restored if things get corrupted. This behavior exists since two-phase commit has been introduced, no back-patch is done for now per the lack of complaints about this problem. Author: Michael Paquier Discussion: https://postgr.es/m/20180709050309.GM1467@paquier.xyz
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/transam/twophase.c139
1 files changed, 59 insertions, 80 deletions
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 4aff6cf7f27..c06bbe72646 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1207,10 +1207,12 @@ RegisterTwoPhaseRecord(TwoPhaseRmgrId rmid, uint16 info,
* Read and validate the state file for xid.
*
* If it looks OK (has a valid magic number and CRC), return the palloc'd
- * contents of the file. Otherwise return NULL.
+ * contents of the file, issuing an error when finding corrupted data. If
+ * missing_ok is true, which indicates that missing files can be safely
+ * ignored, then return NULL. This state can be reached when doing recovery.
*/
static char *
-ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
+ReadTwoPhaseFile(TransactionId xid, bool missing_ok)
{
char path[MAXPGPATH];
char *buf;
@@ -1227,11 +1229,12 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
if (fd < 0)
{
- if (give_warnings)
- ereport(WARNING,
- (errcode_for_file_access(),
- errmsg("could not open file \"%s\": %m", path)));
- return NULL;
+ if (missing_ok && errno == ENOENT)
+ return NULL;
+
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\": %m", path)));
}
/*
@@ -1241,35 +1244,27 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
* even on a valid file.
*/
if (fstat(fd, &stat))
- {
- int save_errno = errno;
-
- CloseTransientFile(fd);
- if (give_warnings)
- {
- errno = save_errno;
- ereport(WARNING,
- (errcode_for_file_access(),
- errmsg("could not stat file \"%s\": %m", path)));
- }
- return NULL;
- }
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m", path)));
if (stat.st_size < (MAXALIGN(sizeof(TwoPhaseFileHeader)) +
MAXALIGN(sizeof(TwoPhaseRecordOnDisk)) +
sizeof(pg_crc32c)) ||
stat.st_size > MaxAllocSize)
- {
- CloseTransientFile(fd);
- return NULL;
- }
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg_plural("incorrect size of file \"%s\": %zu byte",
+ "incorrect size of file \"%s\": %zu bytes",
+ (Size) stat.st_size, path,
+ (Size) stat.st_size)));
crc_offset = stat.st_size - sizeof(pg_crc32c);
if (crc_offset != MAXALIGN(crc_offset))
- {
- CloseTransientFile(fd);
- return NULL;
- }
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("incorrect alignment of CRC offset for file \"%s\"",
+ path)));
/*
* OK, slurp in the file.
@@ -1280,37 +1275,31 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
r = read(fd, buf, stat.st_size);
if (r != stat.st_size)
{
- int save_errno = errno;
-
- pgstat_report_wait_end();
- CloseTransientFile(fd);
- if (give_warnings)
- {
- if (r < 0)
- {
- errno = save_errno;
- ereport(WARNING,
- (errcode_for_file_access(),
- errmsg("could not read file \"%s\": %m", path)));
- }
- else
- ereport(WARNING,
- (errmsg("could not read file \"%s\": read %d of %zu",
- path, r, (Size) stat.st_size)));
- }
- pfree(buf);
- return NULL;
+ if (r < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m", path)));
+ else
+ ereport(ERROR,
+ (errmsg("could not read file \"%s\": read %d of %zu",
+ path, r, (Size) stat.st_size)));
}
pgstat_report_wait_end();
CloseTransientFile(fd);
hdr = (TwoPhaseFileHeader *) buf;
- if (hdr->magic != TWOPHASE_MAGIC || hdr->total_len != stat.st_size)
- {
- pfree(buf);
- return NULL;
- }
+ if (hdr->magic != TWOPHASE_MAGIC)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("invalid magic number stored in file \"%s\"",
+ path)));
+
+ if (hdr->total_len != stat.st_size)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("invalid size stored in file \"%s\"",
+ path)));
INIT_CRC32C(calc_crc);
COMP_CRC32C(calc_crc, buf, crc_offset);
@@ -1319,10 +1308,10 @@ ReadTwoPhaseFile(TransactionId xid, bool give_warnings)
file_crc = *((pg_crc32c *) (buf + crc_offset));
if (!EQ_CRC32C(calc_crc, file_crc))
- {
- pfree(buf);
- return NULL;
- }
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("calculated CRC checksum does not match value stored in file \"%s\"",
+ path)));
return buf;
}
@@ -1431,7 +1420,7 @@ StandbyTransactionIdIsPrepared(TransactionId xid)
return false; /* nothing to do */
/* Read and validate file */
- buf = ReadTwoPhaseFile(xid, false);
+ buf = ReadTwoPhaseFile(xid, true);
if (buf == NULL)
return false;
@@ -1479,7 +1468,7 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
* to disk if for some reason they have lived for a long time.
*/
if (gxact->ondisk)
- buf = ReadTwoPhaseFile(xid, true);
+ buf = ReadTwoPhaseFile(xid, false);
else
XlogReadTwoPhaseData(gxact->prepare_start_lsn, &buf, NULL);
@@ -1874,6 +1863,10 @@ restoreTwoPhaseData(void)
* write a WAL entry, and so there might be no evidence in WAL of those
* subxact XIDs.
*
+ * On corrupted two-phase files, fail immediately. Keeping around broken
+ * entries and let replay continue causes harm on the system, and a new
+ * backup should be rolled in.
+ *
* Our other responsibility is to determine and return the oldest valid XID
* among the prepared xacts (if none, return ShmemVariableCache->nextXid).
* This is needed to synchronize pg_subtrans startup properly.
@@ -2164,15 +2157,7 @@ ProcessTwoPhaseBuffer(TransactionId xid,
if (fromdisk)
{
/* Read and validate file */
- buf = ReadTwoPhaseFile(xid, true);
- if (buf == NULL)
- {
- ereport(WARNING,
- (errmsg("removing corrupt two-phase state file for transaction %u",
- xid)));
- RemoveTwoPhaseFile(xid, true);
- return NULL;
- }
+ buf = ReadTwoPhaseFile(xid, false);
}
else
{
@@ -2185,21 +2170,15 @@ ProcessTwoPhaseBuffer(TransactionId xid,
if (!TransactionIdEquals(hdr->xid, xid))
{
if (fromdisk)
- {
- ereport(WARNING,
- (errmsg("removing corrupt two-phase state file for transaction %u",
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("corrupted two-phase state file for transaction \"%u\"",
xid)));
- RemoveTwoPhaseFile(xid, true);
- }
else
- {
- ereport(WARNING,
- (errmsg("removing corrupt two-phase state from memory for transaction %u",
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("corrupted two-phase state in memory for transaction \"%u\"",
xid)));
- PrepareRedoRemove(xid, true);
- }
- pfree(buf);
- return NULL;
}
/*