diff options
author | Alvaro Herrera <alvherre@alvh.no-ip.org> | 2017-01-19 18:23:09 -0300 |
---|---|---|
committer | Alvaro Herrera <alvherre@alvh.no-ip.org> | 2017-01-19 18:24:17 -0300 |
commit | 8eace46d34ab6ac0d887aa4d3504bc4222c2e448 (patch) | |
tree | 1e6d8a76c81c56b452eff3885381f7d998aee7f7 /src/backend/access/transam/commit_ts.c | |
parent | 8b0fec93ecc788c8d8b329d41ab795712d8dcc5a (diff) | |
download | postgresql-8eace46d34ab6ac0d887aa4d3504bc4222c2e448.tar.gz postgresql-8eace46d34ab6ac0d887aa4d3504bc4222c2e448.zip |
Fix race condition in reading commit timestamps
If a user requests the commit timestamp for a transaction old enough
that its data is concurrently being truncated away by vacuum at just the
right time, they would receive an ugly internal file-not-found error
message from slru.c rather than the expected NULL return value.
In a primary server, the window for the race is very small: the lookup
has to occur exactly between the two calls by vacuum, and there's not a
lot that happens between them (mostly just a multixact truncate). In a
standby server, however, the window is larger because the truncation is
executed as soon as the WAL record for it is replayed, but the advance
of the oldest-Xid is not executed until the next checkpoint record.
To fix in the primary, simply reverse the order of operations in
vac_truncate_clog. To fix in the standby, augment the WAL truncation
record so that the standby is aware of the new oldest-XID value and can
apply the update immediately. WAL version bumped because of this.
No backpatch, because of the low importance of the bug and its rarity.
Author: Craig Ringer
Reviewed-By: Petr JelĂnek, Peter Eisentraut
Discussion: https://postgr.es/m/CAMsr+YFhVtRQT1VAwC+WGbbxZZRzNou=N9Ed-FrCqkwQ8H8oJQ@mail.gmail.com
Diffstat (limited to 'src/backend/access/transam/commit_ts.c')
-rw-r--r-- | src/backend/access/transam/commit_ts.c | 21 |
1 files changed, 13 insertions, 8 deletions
diff --git a/src/backend/access/transam/commit_ts.c b/src/backend/access/transam/commit_ts.c index 2403de3ae36..18a5f5602c7 100644 --- a/src/backend/access/transam/commit_ts.c +++ b/src/backend/access/transam/commit_ts.c @@ -113,7 +113,7 @@ static bool CommitTsPagePrecedes(int page1, int page2); static void ActivateCommitTs(void); static void DeactivateCommitTs(void); static void WriteZeroPageXlogRec(int pageno); -static void WriteTruncateXlogRec(int pageno); +static void WriteTruncateXlogRec(int pageno, TransactionId oldestXid); static void WriteSetTimestampXlogRec(TransactionId mainxid, int nsubxids, TransactionId *subxids, TimestampTz timestamp, RepOriginId nodeid); @@ -824,7 +824,7 @@ TruncateCommitTs(TransactionId oldestXact) return; /* nothing to remove */ /* Write XLOG record */ - WriteTruncateXlogRec(cutoffPage); + WriteTruncateXlogRec(cutoffPage, oldestXact); /* Now we can remove the old CommitTs segment(s) */ SimpleLruTruncate(CommitTsCtl, cutoffPage); @@ -910,10 +910,15 @@ WriteZeroPageXlogRec(int pageno) * Write a TRUNCATE xlog record */ static void -WriteTruncateXlogRec(int pageno) +WriteTruncateXlogRec(int pageno, TransactionId oldestXid) { + xl_commit_ts_truncate xlrec; + + xlrec.pageno = pageno; + xlrec.oldestXid = oldestXid; + XLogBeginInsert(); - XLogRegisterData((char *) (&pageno), sizeof(int)); + XLogRegisterData((char *) (&xlrec), SizeOfCommitTsTruncate); (void) XLogInsert(RM_COMMIT_TS_ID, COMMIT_TS_TRUNCATE); } @@ -967,17 +972,17 @@ commit_ts_redo(XLogReaderState *record) } else if (info == COMMIT_TS_TRUNCATE) { - int pageno; + xl_commit_ts_truncate *trunc = (xl_commit_ts_truncate *) XLogRecGetData(record); - memcpy(&pageno, XLogRecGetData(record), sizeof(int)); + AdvanceOldestCommitTsXid(trunc->oldestXid); /* * During XLOG replay, latest_page_number isn't set up yet; insert a * suitable value to bypass the sanity test in SimpleLruTruncate. */ - CommitTsCtl->shared->latest_page_number = pageno; + CommitTsCtl->shared->latest_page_number = trunc->pageno; - SimpleLruTruncate(CommitTsCtl, pageno); + SimpleLruTruncate(CommitTsCtl, trunc->pageno); } else if (info == COMMIT_TS_SETTS) { |