diff options
Diffstat (limited to 'src/backend/access/transam/multixact.c')
-rw-r--r-- | src/backend/access/transam/multixact.c | 117 |
1 files changed, 76 insertions, 41 deletions
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c index 2cdfed4945e..9f259bb54eb 100644 --- a/src/backend/access/transam/multixact.c +++ b/src/backend/access/transam/multixact.c @@ -45,14 +45,17 @@ * anything we saw during replay. * * We are able to remove segments no longer necessary by carefully tracking - * each table's used values: during vacuum, any multixact older than a - * certain value is removed; the cutoff value is stored in pg_class. - * The minimum value in each database is stored in pg_database, and the - * global minimum is part of pg_control. Any vacuum that is able to - * advance its database's minimum value also computes a new global minimum, - * and uses this value to truncate older segments. When new multixactid - * values are to be created, care is taken that the counter does not - * fall within the wraparound horizon considering the global minimum value. + * each table's used values: during vacuum, any multixact older than a certain + * value is removed; the cutoff value is stored in pg_class. The minimum value + * across all tables in each database is stored in pg_database, and the global + * minimum across all databases is part of pg_control and is kept in shared + * memory. At checkpoint time, after the value is known flushed in WAL, any + * files that correspond to multixacts older than that value are removed. + * (These files are also removed when a restartpoint is executed.) + * + * When new multixactid values are to be created, care is taken that the + * counter does not fall within the wraparound horizon considering the global + * minimum value. * * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -91,7 +94,7 @@ * Note: because MultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF, * MultiXact page numbering also wraps around at * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at - * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need + * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_PAGES_PER_SEGMENT. We need * take no explicit notice of that fact in this module, except when comparing * segment and page numbers in TruncateMultiXact (see * MultiXactOffsetPagePrecedes). @@ -188,16 +191,20 @@ typedef struct MultiXactStateData /* next-to-be-assigned offset */ MultiXactOffset nextOffset; - /* the Offset SLRU area was last truncated at this MultiXactId */ - MultiXactId lastTruncationPoint; - /* - * oldest multixact that is still on disk. Anything older than this - * should not be consulted. + * Oldest multixact that is still on disk. Anything older than this + * should not be consulted. These values are updated by vacuum. */ MultiXactId oldestMultiXactId; Oid oldestMultiXactDB; + /* + * This is what the previous checkpoint stored as the truncate position. + * This value is the oldestMultiXactId that was valid when a checkpoint + * was last executed. + */ + MultiXactId lastCheckpointedOldest; + /* support for anti-wraparound measures */ MultiXactId multiVacLimit; MultiXactId multiWarnLimit; @@ -234,12 +241,20 @@ typedef struct MultiXactStateData * than its own OldestVisibleMXactId[] setting; this is necessary because * the checkpointer could truncate away such data at any instant. * - * The checkpointer can compute the safe truncation point as the oldest - * valid value among all the OldestMemberMXactId[] and - * OldestVisibleMXactId[] entries, or nextMXact if none are valid. - * Clearly, it is not possible for any later-computed OldestVisibleMXactId - * value to be older than this, and so there is no risk of truncating data - * that is still needed. + * The oldest valid value among all of the OldestMemberMXactId[] and + * OldestVisibleMXactId[] entries is considered by vacuum as the earliest + * possible value still having any live member transaction. Subtracting + * vacuum_multixact_freeze_min_age from that value we obtain the freezing + * point for multixacts for that table. Any value older than that is + * removed from tuple headers (or "frozen"; see FreezeMultiXactId. Note + * that multis that have member xids that are older than the cutoff point + * for xids must also be frozen, even if the multis themselves are newer + * than the multixid cutoff point). Whenever a full table vacuum happens, + * the freezing point so computed is used as the new pg_class.relminmxid + * value. The minimum of all those values in a database is stored as + * pg_database.datminmxid. In turn, the minimum of all of those values is + * stored in pg_control and used as truncation point for pg_multixact. At + * checkpoint or restartpoint, unneeded segments are removed. */ MultiXactId perBackendXactIds[1]; /* VARIABLE LENGTH ARRAY */ } MultiXactStateData; @@ -1121,8 +1136,8 @@ GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members, * We check known limits on MultiXact before resorting to the SLRU area. * * An ID older than MultiXactState->oldestMultiXactId cannot possibly be - * useful; it should have already been removed by vacuum. We've truncated - * the on-disk structures anyway. Returning the wrong values could lead + * useful; it has already been removed, or will be removed shortly, by + * truncation. Returning the wrong values could lead * to an incorrect visibility result. However, to support pg_upgrade we * need to allow an empty set to be returned regardless, if the caller is * willing to accept it; the caller is expected to check that it's an @@ -1932,14 +1947,14 @@ TrimMultiXact(void) LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE); /* - * (Re-)Initialize our idea of the latest page number. + * (Re-)Initialize our idea of the latest page number for offsets. */ pageno = MultiXactIdToOffsetPage(multi); MultiXactOffsetCtl->shared->latest_page_number = pageno; /* * Zero out the remainder of the current offsets page. See notes in - * StartupCLOG() for motivation. + * TrimCLOG() for motivation. */ entryno = MultiXactIdToOffsetEntry(multi); if (entryno != 0) @@ -1962,7 +1977,7 @@ TrimMultiXact(void) LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE); /* - * (Re-)Initialize our idea of the latest page number. + * (Re-)Initialize our idea of the latest page number for members. */ pageno = MXOffsetToMemberPage(offset); MultiXactMemberCtl->shared->latest_page_number = pageno; @@ -2241,6 +2256,18 @@ MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB) } /* + * Update the "safe truncation point". This is the newest value of oldestMulti + * that is known to be flushed as part of a checkpoint record. + */ +void +MultiXactSetSafeTruncate(MultiXactId safeTruncateMulti) +{ + LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE); + MultiXactState->lastCheckpointedOldest = safeTruncateMulti; + LWLockRelease(MultiXactGenLock); +} + +/* * Make sure that MultiXactOffset has room for a newly-allocated MultiXactId. * * NB: this is called while holding MultiXactGenLock. We want it to be very @@ -2478,25 +2505,31 @@ SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int segpage, void *data) * Remove all MultiXactOffset and MultiXactMember segments before the oldest * ones still of interest. * - * On a primary, this is called by vacuum after it has successfully advanced a - * database's datminmxid value; the cutoff value we're passed is the minimum of - * all databases' datminmxid values. - * - * During crash recovery, it's called from CreateRestartPoint() instead. We - * rely on the fact that xlog_redo() will already have called - * MultiXactAdvanceOldest(). Our latest_page_number will already have been - * initialized by StartupMultiXact() and kept up to date as new pages are - * zeroed. + * On a primary, this is called by the checkpointer process after a checkpoint + * has been flushed; during crash recovery, it's called from + * CreateRestartPoint(). In the latter case, we rely on the fact that + * xlog_redo() will already have called MultiXactAdvanceOldest(). Our + * latest_page_number will already have been initialized by StartupMultiXact() + * and kept up to date as new pages are zeroed. */ void -TruncateMultiXact(MultiXactId oldestMXact) +TruncateMultiXact(void) { + MultiXactId oldestMXact; MultiXactOffset oldestOffset; MultiXactOffset nextOffset; mxtruncinfo trunc; MultiXactId earliest; MembersLiveRange range; + Assert(AmCheckpointerProcess() || AmStartupProcess() || + !IsPostmasterEnvironment); + + LWLockAcquire(MultiXactGenLock, LW_SHARED); + oldestMXact = MultiXactState->lastCheckpointedOldest; + LWLockRelease(MultiXactGenLock); + Assert(MultiXactIdIsValid(oldestMXact)); + /* * Note we can't just plow ahead with the truncation; it's possible that * there are no segments to truncate, which is a problem because we are @@ -2507,6 +2540,8 @@ TruncateMultiXact(MultiXactId oldestMXact) trunc.earliestExistingPage = -1; SlruScanDirectory(MultiXactOffsetCtl, SlruScanDirCbFindEarliest, &trunc); earliest = trunc.earliestExistingPage * MULTIXACT_OFFSETS_PER_PAGE; + if (earliest < FirstMultiXactId) + earliest = FirstMultiXactId; /* nothing to do */ if (MultiXactIdPrecedes(oldestMXact, earliest)) @@ -2514,8 +2549,7 @@ TruncateMultiXact(MultiXactId oldestMXact) /* * First, compute the safe truncation point for MultiXactMember. This is - * the starting offset of the multixact we were passed as MultiXactOffset - * cutoff. + * the starting offset of the oldest multixact. */ { int pageno; @@ -2538,10 +2572,6 @@ TruncateMultiXact(MultiXactId oldestMXact) LWLockRelease(MultiXactOffsetControlLock); } - /* truncate MultiXactOffset */ - SimpleLruTruncate(MultiXactOffsetCtl, - MultiXactIdToOffsetPage(oldestMXact)); - /* * To truncate MultiXactMembers, we need to figure out the active page * range and delete all files outside that range. The start point is the @@ -2559,6 +2589,11 @@ TruncateMultiXact(MultiXactId oldestMXact) range.rangeEnd = MXOffsetToMemberPage(nextOffset); SlruScanDirectory(MultiXactMemberCtl, SlruScanDirCbRemoveMembers, &range); + + /* Now we can truncate MultiXactOffset */ + SimpleLruTruncate(MultiXactOffsetCtl, + MultiXactIdToOffsetPage(oldestMXact)); + } /* |