aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/transam/xlog.c
diff options
context:
space:
mode:
authorAlvaro Herrera <alvherre@alvh.no-ip.org>2014-06-27 14:43:53 -0400
committerAlvaro Herrera <alvherre@alvh.no-ip.org>2014-06-27 14:43:53 -0400
commitf741300c90141ee274f19a13629ae03a9806b598 (patch)
tree3fbe8ea3f5bfc8426a7b7c09baefd9460ce0a07d /src/backend/access/transam/xlog.c
parentb7e51d9c06e6a0da50abbbd0603ecb80f0b6f02b (diff)
downloadpostgresql-f741300c90141ee274f19a13629ae03a9806b598.tar.gz
postgresql-f741300c90141ee274f19a13629ae03a9806b598.zip
Have multixact be truncated by checkpoint, not vacuum
Instead of truncating pg_multixact at vacuum time, do it only at checkpoint time. The reason for doing it this way is twofold: first, we want it to delete only segments that we're certain will not be required if there's a crash immediately after the removal; and second, we want to do it relatively often so that older files are not left behind if there's an untimely crash. Per my proposal in http://www.postgresql.org/message-id/20140626044519.GJ7340@eldon.alvh.no-ip.org we now execute the truncation in the checkpointer process rather than as part of vacuum. Vacuum is in only charge of maintaining in shared memory the value to which it's possible to truncate the files; that value is stored as part of checkpoints also, and so upon recovery we can reuse the same value to re-execute truncate and reset the oldest-value-still-safe-to-use to one known to remain after truncation. Per bug reported by Jeff Janes in the course of his tests involving bug #8673. While at it, update some comments that hadn't been updated since multixacts were changed. Backpatch to 9.3, where persistency of pg_multixact files was introduced by commit 0ac5ad5134f2.
Diffstat (limited to 'src/backend/access/transam/xlog.c')
-rw-r--r--src/backend/access/transam/xlog.c44
1 files changed, 29 insertions, 15 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index abc5682e7f9..e5640793eb8 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -6264,6 +6264,7 @@ StartupXLOG(void)
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
+ MultiXactSetSafeTruncate(checkPoint.oldestMulti);
XLogCtl->ckptXidEpoch = checkPoint.nextXidEpoch;
XLogCtl->ckptXid = checkPoint.nextXid;
@@ -8273,6 +8274,12 @@ CreateCheckPoint(int flags)
END_CRIT_SECTION();
/*
+ * Now that the checkpoint is safely on disk, we can update the point to
+ * which multixact can be truncated.
+ */
+ MultiXactSetSafeTruncate(checkPoint.oldestMulti);
+
+ /*
* Let smgr do post-checkpoint cleanup (eg, deleting old files).
*/
smgrpostckpt();
@@ -8305,6 +8312,11 @@ CreateCheckPoint(int flags)
if (!RecoveryInProgress())
TruncateSUBTRANS(GetOldestXmin(NULL, false));
+ /*
+ * Truncate pg_multixact too.
+ */
+ TruncateMultiXact();
+
/* Real work is done, but log and update stats before releasing lock. */
LogCheckpointEnd(false);
@@ -8579,21 +8591,6 @@ CreateRestartPoint(int flags)
LWLockRelease(ControlFileLock);
/*
- * Due to an historical accident multixact truncations are not WAL-logged,
- * but just performed everytime the mxact horizon is increased. So, unless
- * we explicitly execute truncations on a standby it will never clean out
- * /pg_multixact which obviously is bad, both because it uses space and
- * because we can wrap around into pre-existing data...
- *
- * We can only do the truncation here, after the UpdateControlFile()
- * above, because we've now safely established a restart point, that
- * guarantees we will not need need to access those multis.
- *
- * It's probably worth improving this.
- */
- TruncateMultiXact(lastCheckPoint.oldestMulti);
-
- /*
* Delete old log files (those no longer needed even for previous
* checkpoint/restartpoint) to prevent the disk holding the xlog from
* growing full.
@@ -8652,6 +8649,21 @@ CreateRestartPoint(int flags)
}
/*
+ * Due to an historical accident multixact truncations are not WAL-logged,
+ * but just performed everytime the mxact horizon is increased. So, unless
+ * we explicitly execute truncations on a standby it will never clean out
+ * /pg_multixact which obviously is bad, both because it uses space and
+ * because we can wrap around into pre-existing data...
+ *
+ * We can only do the truncation here, after the UpdateControlFile()
+ * above, because we've now safely established a restart point. That
+ * guarantees we will not need to access those multis.
+ *
+ * It's probably worth improving this.
+ */
+ TruncateMultiXact();
+
+ /*
* Truncate pg_subtrans if possible. We can throw away all data before
* the oldest XMIN of any running transaction. No future transaction will
* attempt to reference any pg_subtrans entry older than that (see Asserts
@@ -9117,6 +9129,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
checkPoint.nextMultiOffset);
SetTransactionIdLimit(checkPoint.oldestXid, checkPoint.oldestXidDB);
SetMultiXactIdLimit(checkPoint.oldestMulti, checkPoint.oldestMultiDB);
+ MultiXactSetSafeTruncate(checkPoint.oldestMulti);
/*
* If we see a shutdown checkpoint while waiting for an end-of-backup
@@ -9217,6 +9230,7 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
checkPoint.oldestXidDB);
MultiXactAdvanceOldest(checkPoint.oldestMulti,
checkPoint.oldestMultiDB);
+ MultiXactSetSafeTruncate(checkPoint.oldestMulti);
/* ControlFile->checkPointCopy always tracks the latest ckpt XID */
ControlFile->checkPointCopy.nextXidEpoch = checkPoint.nextXidEpoch;