aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/transam/multixact.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/transam/multixact.c')
-rw-r--r--src/backend/access/transam/multixact.c117
1 files changed, 76 insertions, 41 deletions
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 2cdfed4945e..9f259bb54eb 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -45,14 +45,17 @@
* anything we saw during replay.
*
* We are able to remove segments no longer necessary by carefully tracking
- * each table's used values: during vacuum, any multixact older than a
- * certain value is removed; the cutoff value is stored in pg_class.
- * The minimum value in each database is stored in pg_database, and the
- * global minimum is part of pg_control. Any vacuum that is able to
- * advance its database's minimum value also computes a new global minimum,
- * and uses this value to truncate older segments. When new multixactid
- * values are to be created, care is taken that the counter does not
- * fall within the wraparound horizon considering the global minimum value.
+ * each table's used values: during vacuum, any multixact older than a certain
+ * value is removed; the cutoff value is stored in pg_class. The minimum value
+ * across all tables in each database is stored in pg_database, and the global
+ * minimum across all databases is part of pg_control and is kept in shared
+ * memory. At checkpoint time, after the value is known flushed in WAL, any
+ * files that correspond to multixacts older than that value are removed.
+ * (These files are also removed when a restartpoint is executed.)
+ *
+ * When new multixactid values are to be created, care is taken that the
+ * counter does not fall within the wraparound horizon considering the global
+ * minimum value.
*
* Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
@@ -91,7 +94,7 @@
* Note: because MultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF,
* MultiXact page numbering also wraps around at
* 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at
- * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need
+ * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need
* take no explicit notice of that fact in this module, except when comparing
* segment and page numbers in TruncateMultiXact (see
* MultiXactOffsetPagePrecedes).
@@ -188,16 +191,20 @@ typedef struct MultiXactStateData
/* next-to-be-assigned offset */
MultiXactOffset nextOffset;
- /* the Offset SLRU area was last truncated at this MultiXactId */
- MultiXactId lastTruncationPoint;
-
/*
- * oldest multixact that is still on disk. Anything older than this
- * should not be consulted.
+ * Oldest multixact that is still on disk. Anything older than this
+ * should not be consulted. These values are updated by vacuum.
*/
MultiXactId oldestMultiXactId;
Oid oldestMultiXactDB;
+ /*
+ * This is what the previous checkpoint stored as the truncate position.
+ * This value is the oldestMultiXactId that was valid when a checkpoint
+ * was last executed.
+ */
+ MultiXactId lastCheckpointedOldest;
+
/* support for anti-wraparound measures */
MultiXactId multiVacLimit;
MultiXactId multiWarnLimit;
@@ -234,12 +241,20 @@ typedef struct MultiXactStateData
* than its own OldestVisibleMXactId[] setting; this is necessary because
* the checkpointer could truncate away such data at any instant.
*
- * The checkpointer can compute the safe truncation point as the oldest
- * valid value among all the OldestMemberMXactId[] and
- * OldestVisibleMXactId[] entries, or nextMXact if none are valid.
- * Clearly, it is not possible for any later-computed OldestVisibleMXactId
- * value to be older than this, and so there is no risk of truncating data
- * that is still needed.
+ * The oldest valid value among all of the OldestMemberMXactId[] and
+ * OldestVisibleMXactId[] entries is considered by vacuum as the earliest
+ * possible value still having any live member transaction. Subtracting
+ * vacuum_multixact_freeze_min_age from that value we obtain the freezing
+ * point for multixacts for that table. Any value older than that is
+ * removed from tuple headers (or "frozen"; see FreezeMultiXactId. Note
+ * that multis that have member xids that are older than the cutoff point
+ * for xids must also be frozen, even if the multis themselves are newer
+ * than the multixid cutoff point). Whenever a full table vacuum happens,
+ * the freezing point so computed is used as the new pg_class.relminmxid
+ * value. The minimum of all those values in a database is stored as
+ * pg_database.datminmxid. In turn, the minimum of all of those values is
+ * stored in pg_control and used as truncation point for pg_multixact. At
+ * checkpoint or restartpoint, unneeded segments are removed.
*/
MultiXactId perBackendXactIds[1]; /* VARIABLE LENGTH ARRAY */
} MultiXactStateData;
@@ -1121,8 +1136,8 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
* We check known limits on MultiXact before resorting to the SLRU area.
*
* An ID older than MultiXactState->oldestMultiXactId cannot possibly be
- * useful; it should have already been removed by vacuum. We've truncated
- * the on-disk structures anyway. Returning the wrong values could lead
+ * useful; it has already been removed, or will be removed shortly, by
+ * truncation. Returning the wrong values could lead
* to an incorrect visibility result. However, to support pg_upgrade we
* need to allow an empty set to be returned regardless, if the caller is
* willing to accept it; the caller is expected to check that it's an
@@ -1932,14 +1947,14 @@ TrimMultiXact(void)
LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
/*
- * (Re-)Initialize our idea of the latest page number.
+ * (Re-)Initialize our idea of the latest page number for offsets.
*/
pageno = MultiXactIdToOffsetPage(multi);
MultiXactOffsetCtl->shared->latest_page_number = pageno;
/*
* Zero out the remainder of the current offsets page. See notes in
- * StartupCLOG() for motivation.
+ * TrimCLOG() for motivation.
*/
entryno = MultiXactIdToOffsetEntry(multi);
if (entryno != 0)
@@ -1962,7 +1977,7 @@ TrimMultiXact(void)
LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
/*
- * (Re-)Initialize our idea of the latest page number.
+ * (Re-)Initialize our idea of the latest page number for members.
*/
pageno = MXOffsetToMemberPage(offset);
MultiXactMemberCtl->shared->latest_page_number = pageno;
@@ -2241,6 +2256,18 @@ MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
}
/*
+ * Update the "safe truncation point". This is the newest value of oldestMulti
+ * that is known to be flushed as part of a checkpoint record.
+ */
+void
+MultiXactSetSafeTruncate(MultiXactId safeTruncateMulti)
+{
+ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
+ MultiXactState->lastCheckpointedOldest = safeTruncateMulti;
+ LWLockRelease(MultiXactGenLock);
+}
+
+/*
* Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
*
* NB: this is called while holding MultiXactGenLock. We want it to be very
@@ -2478,25 +2505,31 @@ SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int segpage, void *data)
* Remove all MultiXactOffset and MultiXactMember segments before the oldest
* ones still of interest.
*
- * On a primary, this is called by vacuum after it has successfully advanced a
- * database's datminmxid value; the cutoff value we're passed is the minimum of
- * all databases' datminmxid values.
- *
- * During crash recovery, it's called from CreateRestartPoint() instead. We
- * rely on the fact that xlog_redo() will already have called
- * MultiXactAdvanceOldest(). Our latest_page_number will already have been
- * initialized by StartupMultiXact() and kept up to date as new pages are
- * zeroed.
+ * On a primary, this is called by the checkpointer process after a checkpoint
+ * has been flushed; during crash recovery, it's called from
+ * CreateRestartPoint(). In the latter case, we rely on the fact that
+ * xlog_redo() will already have called MultiXactAdvanceOldest(). Our
+ * latest_page_number will already have been initialized by StartupMultiXact()
+ * and kept up to date as new pages are zeroed.
*/
void
-TruncateMultiXact(MultiXactId oldestMXact)
+TruncateMultiXact(void)
{
+ MultiXactId oldestMXact;
MultiXactOffset oldestOffset;
MultiXactOffset nextOffset;
mxtruncinfo trunc;
MultiXactId earliest;
MembersLiveRange range;
+ Assert(AmCheckpointerProcess() || AmStartupProcess() ||
+ !IsPostmasterEnvironment);
+
+ LWLockAcquire(MultiXactGenLock, LW_SHARED);
+ oldestMXact = MultiXactState->lastCheckpointedOldest;
+ LWLockRelease(MultiXactGenLock);
+ Assert(MultiXactIdIsValid(oldestMXact));
+
/*
* Note we can't just plow ahead with the truncation; it's possible that
* there are no segments to truncate, which is a problem because we are
@@ -2507,6 +2540,8 @@ TruncateMultiXact(MultiXactId oldestMXact)
trunc.earliestExistingPage = -1;
SlruScanDirectory(MultiXactOffsetCtl, SlruScanDirCbFindEarliest, &trunc);
earliest = trunc.earliestExistingPage * MULTIXACT_OFFSETS_PER_PAGE;
+ if (earliest < FirstMultiXactId)
+ earliest = FirstMultiXactId;
/* nothing to do */
if (MultiXactIdPrecedes(oldestMXact, earliest))
@@ -2514,8 +2549,7 @@ TruncateMultiXact(MultiXactId oldestMXact)
/*
* First, compute the safe truncation point for MultiXactMember. This is
- * the starting offset of the multixact we were passed as MultiXactOffset
- * cutoff.
+ * the starting offset of the oldest multixact.
*/
{
int pageno;
@@ -2538,10 +2572,6 @@ TruncateMultiXact(MultiXactId oldestMXact)
LWLockRelease(MultiXactOffsetControlLock);
}
- /* truncate MultiXactOffset */
- SimpleLruTruncate(MultiXactOffsetCtl,
- MultiXactIdToOffsetPage(oldestMXact));
-
/*
* To truncate MultiXactMembers, we need to figure out the active page
* range and delete all files outside that range. The start point is the
@@ -2559,6 +2589,11 @@ TruncateMultiXact(MultiXactId oldestMXact)
range.rangeEnd = MXOffsetToMemberPage(nextOffset);
SlruScanDirectory(MultiXactMemberCtl, SlruScanDirCbRemoveMembers, &range);
+
+ /* Now we can truncate MultiXactOffset */
+ SimpleLruTruncate(MultiXactOffsetCtl,
+ MultiXactIdToOffsetPage(oldestMXact));
+
}
/*