From f741300c90141ee274f19a13629ae03a9806b598 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera Date: Fri, 27 Jun 2014 14:43:53 -0400 Subject: Have multixact be truncated by checkpoint, not vacuum Instead of truncating pg_multixact at vacuum time, do it only at checkpoint time. The reason for doing it this way is twofold: first, we want it to delete only segments that we're certain will not be required if there's a crash immediately after the removal; and second, we want to do it relatively often so that older files are not left behind if there's an untimely crash. Per my proposal in http://www.postgresql.org/message-id/20140626044519.GJ7340@eldon.alvh.no-ip.org we now execute the truncation in the checkpointer process rather than as part of vacuum. Vacuum is in only charge of maintaining in shared memory the value to which it's possible to truncate the files; that value is stored as part of checkpoints also, and so upon recovery we can reuse the same value to re-execute truncate and reset the oldest-value-still-safe-to-use to one known to remain after truncation. Per bug reported by Jeff Janes in the course of his tests involving bug #8673. While at it, update some comments that hadn't been updated since multixacts were changed. Backpatch to 9.3, where persistency of pg_multixact files was introduced by commit 0ac5ad5134f2. --- src/backend/access/transam/xlog.c | 44 ++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 15 deletions(-) (limited to 'src/backend/access/transam/xlog.c') diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index abc5682e7f9..e5640793eb8 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -6264,6 +6264,7 @@ StartupXLOG(void) MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset); SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB); + MultiXactSetSafeTruncate(checkPoint.oldestMulti); XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch; XLogCtl->ckptXid = checkPoint.nextXid; @@ -8272,6 +8273,12 @@ CreateCheckPoint(int flags) */ END_CRIT_SECTION(); + /* + * Now that the checkpoint is safely on disk, we can update the point to + * which multixact can be truncated. + */ + MultiXactSetSafeTruncate(checkPoint.oldestMulti); + /* * Let smgr do post-checkpoint cleanup (eg, deleting old files). */ @@ -8305,6 +8312,11 @@ CreateCheckPoint(int flags) if (!RecoveryInProgress()) TruncateSUBTRANS(GetOldestXmin(NULL, false)); + /* + * Truncate pg_multixact too. + */ + TruncateMultiXact(); + /* Real work is done, but log and update stats before releasing lock. */ LogCheckpointEnd(false); @@ -8578,21 +8590,6 @@ CreateRestartPoint(int flags) } LWLockRelease(ControlFileLock); - /* - * Due to an historical accident multixact truncations are not WAL-logged, - * but just performed everytime the mxact horizon is increased. So, unless - * we explicitly execute truncations on a standby it will never clean out - * /pg_multixact which obviously is bad, both because it uses space and - * because we can wrap around into pre-existing data... - * - * We can only do the truncation here, after the UpdateControlFile() - * above, because we've now safely established a restart point, that - * guarantees we will not need need to access those multis. - * - * It's probably worth improving this. - */ - TruncateMultiXact(lastCheckPoint.oldestMulti); - /* * Delete old log files (those no longer needed even for previous * checkpoint/restartpoint) to prevent the disk holding the xlog from @@ -8651,6 +8648,21 @@ CreateRestartPoint(int flags) ThisTimeLineID = 0; } + /* + * Due to an historical accident multixact truncations are not WAL-logged, + * but just performed everytime the mxact horizon is increased. So, unless + * we explicitly execute truncations on a standby it will never clean out + * /pg_multixact which obviously is bad, both because it uses space and + * because we can wrap around into pre-existing data... + * + * We can only do the truncation here, after the UpdateControlFile() + * above, because we've now safely established a restart point. That + * guarantees we will not need to access those multis. + * + * It's probably worth improving this. + */ + TruncateMultiXact(); + /* * Truncate pg_subtrans if possible. We can throw away all data before * the oldest XMIN of any running transaction. No future transaction will @@ -9117,6 +9129,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) checkPoint.nextMultiOffset); SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB); SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB); + MultiXactSetSafeTruncate(checkPoint.oldestMulti); /* * If we see a shutdown checkpoint while waiting for an end-of-backup @@ -9217,6 +9230,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) checkPoint.oldestXidDB); MultiXactAdvanceOldest(checkPoint.oldestMulti, checkPoint.oldestMultiDB); + MultiXactSetSafeTruncate(checkPoint.oldestMulti); /* ControlFile->checkPointCopy always tracks the latest ckpt XID */ ControlFile->checkPointCopy.nextXidEpoch = checkPoint.nextXidEpoch; -- cgit v1.2.3