aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/transam/multixact.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/transam/multixact.c')
-rw-r--r--src/backend/access/transam/multixact.c1191
1 files changed, 814 insertions, 377 deletions
diff --git a/src/backend/access/transam/multixact.c b/src/backend/access/transam/multixact.c
index 1ae671743c5..9f804f75990 100644
--- a/src/backend/access/transam/multixact.c
+++ b/src/backend/access/transam/multixact.c
@@ -3,12 +3,18 @@
* multixact.c
* PostgreSQL multi-transaction-log manager
*
- * The pg_multixact manager is a pg_clog-like manager that stores an array
- * of TransactionIds for each MultiXactId. It is a fundamental part of the
- * shared-row-lock implementation. A share-locked tuple stores a
- * MultiXactId in its Xmax, and a transaction that needs to wait for the
- * tuple to be unlocked can sleep on the potentially-several TransactionIds
- * that compose the MultiXactId.
+ * The pg_multixact manager is a pg_clog-like manager that stores an array of
+ * MultiXactMember for each MultiXactId. It is a fundamental part of the
+ * shared-row-lock implementation. Each MultiXactMember is comprised of a
+ * TransactionId and a set of flag bits. The name is a bit historical:
+ * originally, a MultiXactId consisted of more than one TransactionId (except
+ * in rare corner cases), hence "multi". Nowadays, however, it's perfectly
+ * legitimate to have MultiXactIds that only include a single Xid.
+ *
+ * The meaning of the flag bits is opaque to this module, but they are mostly
+ * used in heapam.c to identify lock modes that each of the member transactions
+ * is holding on any given tuple. This module just contains support to store
+ * and retrieve the arrays.
*
* We use two SLRU areas, one for storing the offsets at which the data
* starts for each MultiXactId in the other one. This trick allows us to
@@ -38,6 +44,15 @@
* replay, the next-MXID and next-offset counters are at least as large as
* anything we saw during replay.
*
+ * We are able to remove segments no longer necessary by carefully tracking
+ * each table's used values: during vacuum, any multixact older than a
+ * certain value is removed; the cutoff value is stored in pg_class.
+ * The minimum value in each database is stored in pg_database, and the
+ * global minimum is part of pg_control. Any vacuum that is able to
+ * advance its database's minimum value also computes a new global minimum,
+ * and uses this value to truncate older segments. When new multixactid
+ * values are to be created, care is taken that the counter does not
+ * fall within the wraparound horizon considering the global minimum value.
*
* Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
@@ -54,40 +69,84 @@
#include "access/twophase.h"
#include "access/twophase_rmgr.h"
#include "access/xact.h"
+#include "catalog/pg_type.h"
+#include "commands/dbcommands.h"
+#include "funcapi.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "storage/lmgr.h"
+#include "storage/pmsignal.h"
#include "storage/procarray.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
+#include "utils/snapmgr.h"
/*
* Defines for MultiXactOffset page sizes. A page is the same BLCKSZ as is
* used everywhere else in Postgres.
*
- * Note: because both MultiXactOffsets and TransactionIds are 32 bits and
- * wrap around at 0xFFFFFFFF, MultiXact page numbering also wraps around at
- * 0xFFFFFFFF/MULTIXACT_*_PER_PAGE, and segment numbering at
- * 0xFFFFFFFF/MULTIXACT_*_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need take no
- * explicit notice of that fact in this module, except when comparing segment
- * and page numbers in TruncateMultiXact
- * (see MultiXact{Offset,Member}PagePrecedes).
+ * Note: because MultiXactOffsets are 32 bits and wrap around at 0xFFFFFFFF,
+ * MultiXact page numbering also wraps around at
+ * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE, and segment numbering at
+ * 0xFFFFFFFF/MULTIXACT_OFFSETS_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need
+ * take no explicit notice of that fact in this module, except when comparing
+ * segment and page numbers in TruncateMultiXact (see
+ * MultiXactOffsetPagePrecedes).
*/
-/* We need four bytes per offset and also four bytes per member */
+/* We need four bytes per offset */
#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
-#define MULTIXACT_MEMBERS_PER_PAGE (BLCKSZ / sizeof(TransactionId))
#define MultiXactIdToOffsetPage(xid) \
((xid) / (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)
#define MultiXactIdToOffsetEntry(xid) \
((xid) % (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)
-#define MXOffsetToMemberPage(xid) \
- ((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)
-#define MXOffsetToMemberEntry(xid) \
- ((xid) % (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)
+/*
+ * The situation for members is a bit more complex: we store one byte of
+ * additional flag bits for each TransactionId. To do this without getting
+ * into alignment issues, we store four bytes of flags, and then the
+ * corresponding 4 Xids. Each such 5-word (20-byte) set we call a "group", and
+ * are stored as a whole in pages. Thus, with 8kB BLCKSZ, we keep 409 groups
+ * per page. This wastes 12 bytes per page, but that's OK -- simplicity (and
+ * performance) trumps space efficiency here.
+ *
+ * Note that the "offset" macros work with byte offset, not array indexes, so
+ * arithmetic must be done using "char *" pointers.
+ */
+/* We need eight bits per xact, so one xact fits in a byte */
+#define MXACT_MEMBER_BITS_PER_XACT 8
+#define MXACT_MEMBER_FLAGS_PER_BYTE 1
+#define MXACT_MEMBER_XACT_BITMASK ((1 << MXACT_MEMBER_BITS_PER_XACT) - 1)
+
+/* how many full bytes of flags are there in a group? */
+#define MULTIXACT_FLAGBYTES_PER_GROUP 4
+#define MULTIXACT_MEMBERS_PER_MEMBERGROUP \
+ (MULTIXACT_FLAGBYTES_PER_GROUP * MXACT_MEMBER_FLAGS_PER_BYTE)
+/* size in bytes of a complete group */
+#define MULTIXACT_MEMBERGROUP_SIZE \
+ (sizeof(TransactionId) * MULTIXACT_MEMBERS_PER_MEMBERGROUP + MULTIXACT_FLAGBYTES_PER_GROUP)
+#define MULTIXACT_MEMBERGROUPS_PER_PAGE (BLCKSZ / MULTIXACT_MEMBERGROUP_SIZE)
+#define MULTIXACT_MEMBERS_PER_PAGE \
+ (MULTIXACT_MEMBERGROUPS_PER_PAGE * MULTIXACT_MEMBERS_PER_MEMBERGROUP)
+
+/* page in which a member is to be found */
+#define MXOffsetToMemberPage(xid) ((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)
+
+/* Location (byte offset within page) of flag word for a given member */
+#define MXOffsetToFlagsOffset(xid) \
+ ((((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_MEMBERGROUP) % \
+ (TransactionId) MULTIXACT_MEMBERGROUPS_PER_PAGE) * \
+ (TransactionId) MULTIXACT_MEMBERGROUP_SIZE)
+#define MXOffsetToFlagsBitShift(xid) \
+ (((xid) % (TransactionId) MULTIXACT_MEMBERS_PER_MEMBERGROUP) * \
+ MXACT_MEMBER_BITS_PER_XACT)
+
+/* Location (byte offset within page) of TransactionId of given member */
+#define MXOffsetToMemberOffset(xid) \
+ (MXOffsetToFlagsOffset(xid) + MULTIXACT_FLAGBYTES_PER_GROUP + \
+ ((xid) % MULTIXACT_MEMBERS_PER_MEMBERGROUP) * sizeof(TransactionId))
/*
@@ -118,6 +177,19 @@ typedef struct MultiXactStateData
MultiXactId lastTruncationPoint;
/*
+ * oldest multixact that is still on disk. Anything older than this should
+ * not be consulted.
+ */
+ MultiXactId oldestMultiXactId;
+ Oid oldestMultiXactDB;
+
+ /* support for anti-wraparound measures */
+ MultiXactId multiVacLimit;
+ MultiXactId multiWarnLimit;
+ MultiXactId multiStopLimit;
+ MultiXactId multiWrapLimit;
+
+ /*
* Per-backend data starts here. We have two arrays stored in the area
* immediately following the MultiXactStateData struct. Each is indexed by
* BackendId.
@@ -180,7 +252,8 @@ static MultiXactId *OldestVisibleMXactId;
* so they will be uninteresting by the time our next transaction starts.
* (XXX not clear that this is correct --- other members of the MultiXact
* could hang around longer than we did. However, it's not clear what a
- * better policy for flushing old cache entries would be.)
+ * better policy for flushing old cache entries would be.) FIXME actually
+ * this is plain wrong now that multixact's may contain update Xids.
*
* We allocate the cache entries in a memory context that is deleted at
* transaction end, so we don't need to do retail freeing of entries.
@@ -189,53 +262,52 @@ typedef struct mXactCacheEnt
{
struct mXactCacheEnt *next;
MultiXactId multi;
- int nxids;
- TransactionId xids[1]; /* VARIABLE LENGTH ARRAY */
+ int nmembers;
+ MultiXactMember members[FLEXIBLE_ARRAY_MEMBER];
} mXactCacheEnt;
static mXactCacheEnt *MXactCache = NULL;
static MemoryContext MXactContext = NULL;
-
#ifdef MULTIXACT_DEBUG
#define debug_elog2(a,b) elog(a,b)
#define debug_elog3(a,b,c) elog(a,b,c)
#define debug_elog4(a,b,c,d) elog(a,b,c,d)
#define debug_elog5(a,b,c,d,e) elog(a,b,c,d,e)
+#define debug_elog6(a,b,c,d,e,f) elog(a,b,c,d,e,f)
#else
#define debug_elog2(a,b)
#define debug_elog3(a,b,c)
#define debug_elog4(a,b,c,d)
#define debug_elog5(a,b,c,d,e)
+#define debug_elog6(a,b,c,d,e,f)
#endif
/* internal MultiXactId management */
static void MultiXactIdSetOldestVisible(void);
-static MultiXactId CreateMultiXactId(int nxids, TransactionId *xids);
+static MultiXactId CreateMultiXactId(int nmembers, MultiXactMember *members);
static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
- int nxids, TransactionId *xids);
-static MultiXactId GetNewMultiXactId(int nxids, MultiXactOffset *offset);
+ int nmembers, MultiXactMember *members);
+static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset);
/* MultiXact cache management */
-static MultiXactId mXactCacheGetBySet(int nxids, TransactionId *xids);
-static int mXactCacheGetById(MultiXactId multi, TransactionId **xids);
-static void mXactCachePut(MultiXactId multi, int nxids, TransactionId *xids);
+static int mxactMemberComparator(const void *arg1, const void *arg2);
+static MultiXactId mXactCacheGetBySet(int nmembers, MultiXactMember *members);
+static int mXactCacheGetById(MultiXactId multi, MultiXactMember **members);
+static void mXactCachePut(MultiXactId multi, int nmembers,
+ MultiXactMember *members);
-#ifdef MULTIXACT_DEBUG
-static char *mxid_to_string(MultiXactId multi, int nxids, TransactionId *xids);
-#endif
+static char *mxstatus_to_string(MultiXactStatus status);
/* management of SLRU infrastructure */
static int ZeroMultiXactOffsetPage(int pageno, bool writeXlog);
static int ZeroMultiXactMemberPage(int pageno, bool writeXlog);
static bool MultiXactOffsetPagePrecedes(int page1, int page2);
static bool MultiXactMemberPagePrecedes(int page1, int page2);
-static bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);
static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
MultiXactOffset offset2);
static void ExtendMultiXactOffset(MultiXactId multi);
static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
-static void TruncateMultiXact(void);
static void WriteMZeroPageXlogRec(int pageno, uint8 info);
@@ -243,21 +315,22 @@ static void WriteMZeroPageXlogRec(int pageno, uint8 info);
* MultiXactIdCreate
* Construct a MultiXactId representing two TransactionIds.
*
- * The two XIDs must be different.
+ * The two XIDs must be different, or be requesting different statuses.
*
* NB - we don't worry about our local MultiXactId cache here, because that
* is handled by the lower-level routines.
*/
MultiXactId
-MultiXactIdCreate(TransactionId xid1, TransactionId xid2)
+MultiXactIdCreate(TransactionId xid1, MultiXactStatus status1,
+ TransactionId xid2, MultiXactStatus status2)
{
MultiXactId newMulti;
- TransactionId xids[2];
+ MultiXactMember members[2];
AssertArg(TransactionIdIsValid(xid1));
AssertArg(TransactionIdIsValid(xid2));
- Assert(!TransactionIdEquals(xid1, xid2));
+ Assert(!TransactionIdEquals(xid1, xid2) || (status1 != status2));
/*
* Note: unlike MultiXactIdExpand, we don't bother to check that both XIDs
@@ -265,13 +338,15 @@ MultiXactIdCreate(TransactionId xid1, TransactionId xid2)
* caller just did a check on xid1, so it'd be wasted effort.
*/
- xids[0] = xid1;
- xids[1] = xid2;
+ members[0].xid = xid1;
+ members[0].status = status1;
+ members[1].xid = xid2;
+ members[1].status = status2;
- newMulti = CreateMultiXactId(2, xids);
+ newMulti = CreateMultiXactId(2, members);
- debug_elog5(DEBUG2, "Create: returning %u for %u, %u",
- newMulti, xid1, xid2);
+ debug_elog3(DEBUG2, "Create: %s",
+ mxid_to_string(newMulti, 2, members));
return newMulti;
}
@@ -280,22 +355,27 @@ MultiXactIdCreate(TransactionId xid1, TransactionId xid2)
* MultiXactIdExpand
* Add a TransactionId to a pre-existing MultiXactId.
*
- * If the TransactionId is already a member of the passed MultiXactId,
- * just return it as-is.
+ * If the TransactionId is already a member of the passed MultiXactId with the
+ * same status, just return it as-is.
*
* Note that we do NOT actually modify the membership of a pre-existing
* MultiXactId; instead we create a new one. This is necessary to avoid
- * a race condition against MultiXactIdWait (see notes there).
+ * a race condition against code trying to wait for one MultiXactId to finish;
+ * see notes in heapam.c.
*
* NB - we don't worry about our local MultiXactId cache here, because that
* is handled by the lower-level routines.
+ *
+ * Note: It is critical that MultiXactIds that come from an old cluster (i.e.
+ * one upgraded by pg_upgrade from a cluster older than this feature) are not
+ * passed in.
*/
MultiXactId
-MultiXactIdExpand(MultiXactId multi, TransactionId xid)
+MultiXactIdExpand(MultiXactId multi, TransactionId xid, MultiXactStatus status)
{
MultiXactId newMulti;
- TransactionId *members;
- TransactionId *newMembers;
+ MultiXactMember *members;
+ MultiXactMember *newMembers;
int nmembers;
int i;
int j;
@@ -303,13 +383,20 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid)
AssertArg(MultiXactIdIsValid(multi));
AssertArg(TransactionIdIsValid(xid));
- debug_elog4(DEBUG2, "Expand: received multi %u, xid %u",
- multi, xid);
+ debug_elog5(DEBUG2, "Expand: received multi %u, xid %u status %s",
+ multi, xid, mxstatus_to_string(status));
- nmembers = GetMultiXactIdMembers(multi, &members);
+ /*
+ * Note: we don't allow for old multis here. The reason is that the
+ * only caller of this function does a check that the multixact is
+ * no longer running.
+ */
+ nmembers = GetMultiXactIdMembers(multi, &members, false);
if (nmembers < 0)
{
+ MultiXactMember member;
+
/*
* The MultiXactId is obsolete. This can only happen if all the
* MultiXactId members stop running between the caller checking and
@@ -317,7 +404,9 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid)
* caller, but it would complicate the API and it's unlikely to happen
* too often, so just deal with it by creating a singleton MultiXact.
*/
- newMulti = CreateMultiXactId(1, &xid);
+ member.xid = xid;
+ member.status = status;
+ newMulti = CreateMultiXactId(1, &member);
debug_elog4(DEBUG2, "Expand: %u has no members, create singleton %u",
multi, newMulti);
@@ -325,12 +414,13 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid)
}
/*
- * If the TransactionId is already a member of the MultiXactId, just
- * return the existing MultiXactId.
+ * If the TransactionId is already a member of the MultiXactId with the
+ * same status, just return the existing MultiXactId.
*/
for (i = 0; i < nmembers; i++)
{
- if (TransactionIdEquals(members[i], xid))
+ if (TransactionIdEquals(members[i].xid, xid) &&
+ (members[i].status == status))
{
debug_elog4(DEBUG2, "Expand: %u is already a member of %u",
xid, multi);
@@ -340,21 +430,31 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid)
}
/*
- * Determine which of the members of the MultiXactId are still running,
- * and use them to create a new one. (Removing dead members is just an
- * optimization, but a useful one. Note we have the same race condition
- * here as above: j could be 0 at the end of the loop.)
+ * Determine which of the members of the MultiXactId are still of interest.
+ * This is any running transaction, and also any transaction that grabbed
+ * something stronger than just a lock and was committed. (An update that
+ * aborted is of no interest here.)
+ *
+ * (Removing dead members is just an optimization, but a useful one.
+ * Note we have the same race condition here as above: j could be 0 at the
+ * end of the loop.)
*/
- newMembers = (TransactionId *)
- palloc(sizeof(TransactionId) * (nmembers + 1));
+ newMembers = (MultiXactMember *)
+ palloc(sizeof(MultiXactMember) * (nmembers + 1));
for (i = 0, j = 0; i < nmembers; i++)
{
- if (TransactionIdIsInProgress(members[i]))
- newMembers[j++] = members[i];
+ if (TransactionIdIsInProgress(members[i].xid) ||
+ ((members[i].status > MultiXactStatusForUpdate) &&
+ TransactionIdDidCommit(members[i].xid)))
+ {
+ newMembers[j].xid = members[i].xid;
+ newMembers[j++].status = members[i].status;
+ }
}
- newMembers[j++] = xid;
+ newMembers[j].xid = xid;
+ newMembers[j++].status = status;
newMulti = CreateMultiXactId(j, newMembers);
pfree(members);
@@ -372,17 +472,24 @@ MultiXactIdExpand(MultiXactId multi, TransactionId xid)
* We return true if at least one member of the given MultiXactId is still
* running. Note that a "false" result is certain not to change,
* because it is not legal to add members to an existing MultiXactId.
+ *
+ * Caller is expected to have verified that the multixact does not come from
+ * a pg_upgraded share-locked tuple.
*/
bool
MultiXactIdIsRunning(MultiXactId multi)
{
- TransactionId *members;
+ MultiXactMember *members;
int nmembers;
int i;
debug_elog3(DEBUG2, "IsRunning %u?", multi);
- nmembers = GetMultiXactIdMembers(multi, &members);
+ /*
+ * "false" here means we assume our callers have checked that the given
+ * multi cannot possibly come from a pg_upgraded database.
+ */
+ nmembers = GetMultiXactIdMembers(multi, &members, false);
if (nmembers < 0)
{
@@ -391,13 +498,15 @@ MultiXactIdIsRunning(MultiXactId multi)
}
/*
- * Checking for myself is cheap compared to looking in shared memory, so
- * first do the equivalent of MultiXactIdIsCurrent(). This is not needed
- * for correctness, it's just a fast path.
+ * Checking for myself is cheap compared to looking in shared memory;
+ * return true if any live subtransaction of the current top-level
+ * transaction is a member.
+ *
+ * This is not needed for correctness, it's just a fast path.
*/
for (i = 0; i < nmembers; i++)
{
- if (TransactionIdIsCurrentTransactionId(members[i]))
+ if (TransactionIdIsCurrentTransactionId(members[i].xid))
{
debug_elog3(DEBUG2, "IsRunning: I (%d) am running!", i);
pfree(members);
@@ -412,10 +521,10 @@ MultiXactIdIsRunning(MultiXactId multi)
*/
for (i = 0; i < nmembers; i++)
{
- if (TransactionIdIsInProgress(members[i]))
+ if (TransactionIdIsInProgress(members[i].xid))
{
debug_elog4(DEBUG2, "IsRunning: member %d (%u) is running",
- i, members[i]);
+ i, members[i].xid);
pfree(members);
return true;
}
@@ -429,54 +538,17 @@ MultiXactIdIsRunning(MultiXactId multi)
}
/*
- * MultiXactIdIsCurrent
- * Returns true if the current transaction is a member of the MultiXactId.
- *
- * We return true if any live subtransaction of the current top-level
- * transaction is a member. This is appropriate for the same reason that a
- * lock held by any such subtransaction is globally equivalent to a lock
- * held by the current subtransaction: no such lock could be released without
- * aborting this subtransaction, and hence releasing its locks. So it's not
- * necessary to add the current subxact to the MultiXact separately.
- */
-bool
-MultiXactIdIsCurrent(MultiXactId multi)
-{
- bool result = false;
- TransactionId *members;
- int nmembers;
- int i;
-
- nmembers = GetMultiXactIdMembers(multi, &members);
-
- if (nmembers < 0)
- return false;
-
- for (i = 0; i < nmembers; i++)
- {
- if (TransactionIdIsCurrentTransactionId(members[i]))
- {
- result = true;
- break;
- }
- }
-
- pfree(members);
-
- return result;
-}
-
-/*
* MultiXactIdSetOldestMember
* Save the oldest MultiXactId this transaction could be a member of.
*
- * We set the OldestMemberMXactId for a given transaction the first time
- * it's going to acquire a shared lock. We need to do this even if we end
- * up using a TransactionId instead of a MultiXactId, because there is a
- * chance that another transaction would add our XID to a MultiXactId.
+ * We set the OldestMemberMXactId for a given transaction the first time it's
+ * going to do some operation that might require a MultiXactId (tuple lock,
+ * update or delete). We need to do this even if we end up using a
+ * TransactionId instead of a MultiXactId, because there is a chance that
+ * another transaction would add our XID to a MultiXactId.
*
- * The value to set is the next-to-be-assigned MultiXactId, so this is meant
- * to be called just before acquiring a shared lock.
+ * The value to set is the next-to-be-assigned MultiXactId, so this is meant to
+ * be called just before doing any such possibly-MultiXactId-able operation.
*/
void
MultiXactIdSetOldestMember(void)
@@ -568,81 +640,23 @@ MultiXactIdSetOldestVisible(void)
}
/*
- * MultiXactIdWait
- * Sleep on a MultiXactId.
- *
- * We do this by sleeping on each member using XactLockTableWait. Any
- * members that belong to the current backend are *not* waited for, however;
- * this would not merely be useless but would lead to Assert failure inside
- * XactLockTableWait. By the time this returns, it is certain that all
- * transactions *of other backends* that were members of the MultiXactId
- * are dead (and no new ones can have been added, since it is not legal
- * to add members to an existing MultiXactId).
- *
- * But by the time we finish sleeping, someone else may have changed the Xmax
- * of the containing tuple, so the caller needs to iterate on us somehow.
+ * ReadNextMultiXactId
+ * Return the next MultiXactId to be assigned, but don't allocate it
*/
-void
-MultiXactIdWait(MultiXactId multi)
-{
- TransactionId *members;
- int nmembers;
-
- nmembers = GetMultiXactIdMembers(multi, &members);
-
- if (nmembers >= 0)
- {
- int i;
-
- for (i = 0; i < nmembers; i++)
- {
- TransactionId member = members[i];
-
- debug_elog4(DEBUG2, "MultiXactIdWait: waiting for %d (%u)",
- i, member);
- if (!TransactionIdIsCurrentTransactionId(member))
- XactLockTableWait(member);
- }
-
- pfree(members);
- }
-}
-
-/*
- * ConditionalMultiXactIdWait
- * As above, but only lock if we can get the lock without blocking.
- */
-bool
-ConditionalMultiXactIdWait(MultiXactId multi)
+MultiXactId
+ReadNextMultiXactId(void)
{
- bool result = true;
- TransactionId *members;
- int nmembers;
-
- nmembers = GetMultiXactIdMembers(multi, &members);
-
- if (nmembers >= 0)
- {
- int i;
+ MultiXactId mxid;
- for (i = 0; i < nmembers; i++)
- {
- TransactionId member = members[i];
-
- debug_elog4(DEBUG2, "ConditionalMultiXactIdWait: trying %d (%u)",
- i, member);
- if (!TransactionIdIsCurrentTransactionId(member))
- {
- result = ConditionalXactLockTableWait(member);
- if (!result)
- break;
- }
- }
+ /* XXX we could presumably do this without a lock. */
+ LWLockAcquire(MultiXactGenLock, LW_SHARED);
+ mxid = MultiXactState->nextMXact;
+ LWLockRelease(MultiXactGenLock);
- pfree(members);
- }
+ if (mxid < FirstMultiXactId)
+ mxid = FirstMultiXactId;
- return result;
+ return mxid;
}
/*
@@ -652,10 +666,10 @@ ConditionalMultiXactIdWait(MultiXactId multi)
* Make XLOG, SLRU and cache entries for a new MultiXactId, recording the
* given TransactionIds as members. Returns the newly created MultiXactId.
*
- * NB: the passed xids[] array will be sorted in-place.
+ * NB: the passed members[] array will be sorted in-place.
*/
static MultiXactId
-CreateMultiXactId(int nxids, TransactionId *xids)
+CreateMultiXactId(int nmembers, MultiXactMember *members)
{
MultiXactId multi;
MultiXactOffset offset;
@@ -663,10 +677,10 @@ CreateMultiXactId(int nxids, TransactionId *xids)
xl_multixact_create xlrec;
debug_elog3(DEBUG2, "Create: %s",
- mxid_to_string(InvalidMultiXactId, nxids, xids));
+ mxid_to_string(InvalidMultiXactId, nmembers, members));
/*
- * See if the same set of XIDs already exists in our cache; if so, just
+ * See if the same set of members already exists in our cache; if so, just
* re-use that MultiXactId. (Note: it might seem that looking in our
* cache is insufficient, and we ought to search disk to see if a
* duplicate definition already exists. But since we only ever create
@@ -675,7 +689,7 @@ CreateMultiXactId(int nxids, TransactionId *xids)
* corner cases where someone else added us to a MultiXact without our
* knowledge, but it's not worth checking for.)
*/
- multi = mXactCacheGetBySet(nxids, xids);
+ multi = mXactCacheGetBySet(nmembers, members);
if (MultiXactIdIsValid(multi))
{
debug_elog2(DEBUG2, "Create: in cache!");
@@ -687,7 +701,7 @@ CreateMultiXactId(int nxids, TransactionId *xids)
* in the OFFSETs and MEMBERs files. NB: this routine does
* START_CRIT_SECTION().
*/
- multi = GetNewMultiXactId(nxids, &offset);
+ multi = GetNewMultiXactId(nmembers, &offset);
/*
* Make an XLOG entry describing the new MXID.
@@ -704,27 +718,34 @@ CreateMultiXactId(int nxids, TransactionId *xids)
*/
xlrec.mid = multi;
xlrec.moff = offset;
- xlrec.nxids = nxids;
+ xlrec.nmembers = nmembers;
+ /*
+ * XXX Note: there's a lot of padding space in MultiXactMember. We could
+ * find a more compact representation of this Xlog record -- perhaps all the
+ * status flags in one XLogRecData, then all the xids in another one? Not
+ * clear that it's worth the trouble though.
+ */
rdata[0].data = (char *) (&xlrec);
- rdata[0].len = MinSizeOfMultiXactCreate;
+ rdata[0].len = SizeOfMultiXactCreate;
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]);
- rdata[1].data = (char *) xids;
- rdata[1].len = nxids * sizeof(TransactionId);
+
+ rdata[1].data = (char *) members;
+ rdata[1].len = nmembers * sizeof(MultiXactMember);
rdata[1].buffer = InvalidBuffer;
rdata[1].next = NULL;
(void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID, rdata);
/* Now enter the information into the OFFSETs and MEMBERs logs */
- RecordNewMultiXact(multi, offset, nxids, xids);
+ RecordNewMultiXact(multi, offset, nmembers, members);
/* Done with critical section */
END_CRIT_SECTION();
/* Store the new MultiXactId in the local cache, too */
- mXactCachePut(multi, nxids, xids);
+ mXactCachePut(multi, nmembers, members);
debug_elog2(DEBUG2, "Create: all done");
@@ -739,7 +760,7 @@ CreateMultiXactId(int nxids, TransactionId *xids)
*/
static void
RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
- int nxids, TransactionId *xids)
+ int nmembers, MultiXactMember *members)
{
int pageno;
int prev_pageno;
@@ -775,12 +796,21 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
prev_pageno = -1;
- for (i = 0; i < nxids; i++, offset++)
+ for (i = 0; i < nmembers; i++, offset++)
{
TransactionId *memberptr;
+ uint32 *flagsptr;
+ uint32 flagsval;
+ int bshift;
+ int flagsoff;
+ int memberoff;
+
+ Assert(members[i].status <= MultiXactStatusUpdate);
pageno = MXOffsetToMemberPage(offset);
- entryno = MXOffsetToMemberEntry(offset);
+ memberoff = MXOffsetToMemberOffset(offset);
+ flagsoff = MXOffsetToFlagsOffset(offset);
+ bshift = MXOffsetToFlagsBitShift(offset);
if (pageno != prev_pageno)
{
@@ -789,10 +819,17 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
}
memberptr = (TransactionId *)
- MultiXactMemberCtl->shared->page_buffer[slotno];
- memberptr += entryno;
+ (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
+
+ *memberptr = members[i].xid;
+
+ flagsptr = (uint32 *)
+ (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
- *memberptr = xids[i];
+ flagsval = *flagsptr;
+ flagsval &= ~(((1 << MXACT_MEMBER_BITS_PER_XACT) - 1) << bshift);
+ flagsval |= (members[i].status << bshift);
+ *flagsptr = flagsval;
MultiXactMemberCtl->shared->page_dirty[slotno] = true;
}
@@ -816,27 +853,115 @@ RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
* caller must end the critical section after writing SLRU data.
*/
static MultiXactId
-GetNewMultiXactId(int nxids, MultiXactOffset *offset)
+GetNewMultiXactId(int nmembers, MultiXactOffset *offset)
{
MultiXactId result;
MultiXactOffset nextOffset;
- debug_elog3(DEBUG2, "GetNew: for %d xids", nxids);
+ debug_elog3(DEBUG2, "GetNew: for %d xids", nmembers);
/* MultiXactIdSetOldestMember() must have been called already */
Assert(MultiXactIdIsValid(OldestMemberMXactId[MyBackendId]));
+ /* safety check, we should never get this far in a HS slave */
+ if (RecoveryInProgress())
+ elog(ERROR, "cannot assign MultiXactIds during recovery");
+
LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
/* Handle wraparound of the nextMXact counter */
if (MultiXactState->nextMXact < FirstMultiXactId)
MultiXactState->nextMXact = FirstMultiXactId;
- /*
- * Assign the MXID, and make sure there is room for it in the file.
- */
+ /* Assign the MXID */
result = MultiXactState->nextMXact;
+ /*----------
+ * Check to see if it's safe to assign another MultiXactId. This protects
+ * against catastrophic data loss due to multixact wraparound. The basic
+ * rules are:
+ *
+ * If we're past multiVacLimit, start trying to force autovacuum cycles.
+ * If we're past multiWarnLimit, start issuing warnings.
+ * If we're past multiStopLimit, refuse to create new MultiXactIds.
+ *
+ * Note these are pretty much the same protections in GetNewTransactionId.
+ *----------
+ */
+ if (!MultiXactIdPrecedes(result, MultiXactState->multiVacLimit))
+ {
+ /*
+ * For safety's sake, we release MultiXactGenLock while sending
+ * signals, warnings, etc. This is not so much because we care about
+ * preserving concurrency in this situation, as to avoid any
+ * possibility of deadlock while doing get_database_name(). First,
+ * copy all the shared values we'll need in this path.
+ */
+ MultiXactId multiWarnLimit = MultiXactState->multiWarnLimit;
+ MultiXactId multiStopLimit = MultiXactState->multiStopLimit;
+ MultiXactId multiWrapLimit = MultiXactState->multiWrapLimit;
+ Oid oldest_datoid = MultiXactState->oldestMultiXactDB;
+
+ LWLockRelease(MultiXactGenLock);
+
+ /*
+ * To avoid swamping the postmaster with signals, we issue the autovac
+ * request only once per 64K transaction starts. This still gives
+ * plenty of chances before we get into real trouble.
+ */
+ if (IsUnderPostmaster && (result % 65536) == 0)
+ SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
+
+ if (IsUnderPostmaster &&
+ !MultiXactIdPrecedes(result, multiStopLimit))
+ {
+ char *oldest_datname = get_database_name(oldest_datoid);
+
+ /* complain even if that DB has disappeared */
+ if (oldest_datname)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("database is not accepting commands that generate new MultiXactIds to avoid wraparound data loss in database \"%s\"",
+ oldest_datname),
+ errhint("Execute a database-wide VACUUM in that database.\n"
+ "You might also need to commit or roll back old prepared transactions.")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("database is not accepting commands that generate new MultiXactIds to avoid wraparound data loss in database with OID %u",
+ oldest_datoid),
+ errhint("Execute a database-wide VACUUM in that database.\n"
+ "You might also need to commit or roll back old prepared transactions.")));
+ }
+ else if (!MultiXactIdPrecedes(result, multiWarnLimit))
+ {
+ char *oldest_datname = get_database_name(oldest_datoid);
+
+ /* complain even if that DB has disappeared */
+ if (oldest_datname)
+ ereport(WARNING,
+ (errmsg("database \"%s\" must be vacuumed before %u more MultiXactIds are used",
+ oldest_datname,
+ multiWrapLimit - result),
+ errhint("Execute a database-wide VACUUM in that database.\n"
+ "You might also need to commit or roll back old prepared transactions.")));
+ else
+ ereport(WARNING,
+ (errmsg("database with OID %u must be vacuumed before %u more MultiXactIds are used",
+ oldest_datoid,
+ multiWrapLimit - result),
+ errhint("Execute a database-wide VACUUM in that database.\n"
+ "You might also need to commit or roll back old prepared transactions.")));
+ }
+
+ /* Re-acquire lock and start over */
+ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
+ result = MultiXactState->nextMXact;
+ if (result < FirstMultiXactId)
+ result = FirstMultiXactId;
+ }
+
+ /* Make sure there is room for the MXID in the file. */
ExtendMultiXactOffset(result);
/*
@@ -848,12 +973,12 @@ GetNewMultiXactId(int nxids, MultiXactOffset *offset)
if (nextOffset == 0)
{
*offset = 1;
- nxids++; /* allocate member slot 0 too */
+ nmembers++; /* allocate member slot 0 too */
}
else
*offset = nextOffset;
- ExtendMultiXactMember(nextOffset, nxids);
+ ExtendMultiXactMember(nextOffset, nmembers);
/*
* Critical section from here until caller has written the data into the
@@ -870,13 +995,14 @@ GetNewMultiXactId(int nxids, MultiXactOffset *offset)
*
* We don't care about MultiXactId wraparound here; it will be handled by
* the next iteration. But note that nextMXact may be InvalidMultiXactId
- * after this routine exits, so anyone else looking at the variable must
- * be prepared to deal with that. Similarly, nextOffset may be zero, but
- * we won't use that as the actual start offset of the next multixact.
+ * or the first value on a segment-beginning page after this routine exits,
+ * so anyone else looking at the variable must be prepared to deal with
+ * either case. Similarly, nextOffset may be zero, but we won't use that
+ * as the actual start offset of the next multixact.
*/
(MultiXactState->nextMXact)++;
- MultiXactState->nextOffset += nxids;
+ MultiXactState->nextOffset += nmembers;
LWLockRelease(MultiXactGenLock);
@@ -886,14 +1012,23 @@ GetNewMultiXactId(int nxids, MultiXactOffset *offset)
/*
* GetMultiXactIdMembers
- * Returns the set of TransactionIds that make up a MultiXactId
+ * Returns the set of MultiXactMembers that make up a MultiXactId
+ *
+ * If the given MultiXactId is older than the value we know to be oldest, we
+ * return -1. The caller is expected to allow that only in permissible cases,
+ * i.e. when the infomask lets it presuppose that the tuple had been
+ * share-locked before a pg_upgrade; this means that the HEAP_XMAX_LOCK_ONLY
+ * needs to be set, but HEAP_XMAX_KEYSHR_LOCK and HEAP_XMAX_EXCL_LOCK are not
+ * set.
*
- * We return -1 if the MultiXactId is too old to possibly have any members
- * still running; in that case we have not actually looked them up, and
- * *xids is not set.
+ * Other border conditions, such as trying to read a value that's larger than
+ * the value currently known as the next to assign, raise an error. Previously
+ * these also returned -1, but since this can lead to the wrong visibility
+ * results, it is dangerous to do that.
*/
int
-GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
+GetMultiXactIdMembers(MultiXactId multi, MultiXactMember **members,
+ bool allow_old)
{
int pageno;
int prev_pageno;
@@ -904,21 +1039,22 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
int length;
int truelength;
int i;
+ MultiXactId oldestMXact;
MultiXactId nextMXact;
MultiXactId tmpMXact;
MultiXactOffset nextOffset;
- TransactionId *ptr;
+ MultiXactMember *ptr;
debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
Assert(MultiXactIdIsValid(multi));
/* See if the MultiXactId is in the local cache */
- length = mXactCacheGetById(multi, xids);
+ length = mXactCacheGetById(multi, members);
if (length >= 0)
{
debug_elog3(DEBUG2, "GetMembers: found %s in the cache",
- mxid_to_string(multi, length, *xids));
+ mxid_to_string(multi, length, *members));
return length;
}
@@ -928,43 +1064,48 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
/*
* We check known limits on MultiXact before resorting to the SLRU area.
*
- * An ID older than our OldestVisibleMXactId[] entry can't possibly still
- * be running, and we'd run the risk of trying to read already-truncated
- * SLRU data if we did try to examine it.
+ * An ID older than MultiXactState->oldestMultiXactId cannot possibly be
+ * useful; it should have already been frozen by vacuum. We've truncated
+ * the on-disk structures anyway. Returning the wrong values could lead to
+ * an incorrect visibility result. However, to support pg_upgrade we need
+ * to allow an empty set to be returned regardless, if the caller is
+ * willing to accept it; the caller is expected to check that it's an
+ * allowed condition (such as ensuring that the infomask bits set on the
+ * tuple are consistent with the pg_upgrade scenario). If the caller is
+ * expecting this to be called only on recently created multis, then we
+ * raise an error.
*
* Conversely, an ID >= nextMXact shouldn't ever be seen here; if it is
- * seen, it implies undetected ID wraparound has occurred. We just
- * silently assume that such an ID is no longer running.
+ * seen, it implies undetected ID wraparound has occurred. This raises
+ * a hard error.
*
* Shared lock is enough here since we aren't modifying any global state.
- * Also, we can examine our own OldestVisibleMXactId without the lock,
- * since no one else is allowed to change it.
- */
- if (MultiXactIdPrecedes(multi, OldestVisibleMXactId[MyBackendId]))
- {
- debug_elog2(DEBUG2, "GetMembers: it's too old");
- *xids = NULL;
- return -1;
- }
-
- /*
- * Acquire the shared lock just long enough to grab the current counter
- * values. We may need both nextMXact and nextOffset; see below.
+ * Acquire it just long enough to grab the current counter values. We may
+ * need both nextMXact and nextOffset; see below.
*/
LWLockAcquire(MultiXactGenLock, LW_SHARED);
+ oldestMXact = MultiXactState->oldestMultiXactId;
nextMXact = MultiXactState->nextMXact;
nextOffset = MultiXactState->nextOffset;
LWLockRelease(MultiXactGenLock);
- if (!MultiXactIdPrecedes(multi, nextMXact))
+ if (MultiXactIdPrecedes(multi, oldestMXact))
{
- debug_elog2(DEBUG2, "GetMembers: it's too new!");
- *xids = NULL;
+ ereport(allow_old ? DEBUG1 : ERROR,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("MultiXactId %u does no longer exist -- apparent wraparound",
+ multi)));
return -1;
}
+ if (!MultiXactIdPrecedes(multi, nextMXact))
+ ereport(ERROR,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("MultiXactId %u has not been created yet -- apparent wraparound",
+ multi)));
+
/*
* Find out the offset at which we need to start reading MultiXactMembers
* and the number of members in the multixact. We determine the latter as
@@ -1055,8 +1196,8 @@ retry:
LWLockRelease(MultiXactOffsetControlLock);
- ptr = (TransactionId *) palloc(length * sizeof(TransactionId));
- *xids = ptr;
+ ptr = (MultiXactMember *) palloc(length * sizeof(MultiXactMember));
+ *members = ptr;
/* Now get the members themselves. */
LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
@@ -1066,9 +1207,13 @@ retry:
for (i = 0; i < length; i++, offset++)
{
TransactionId *xactptr;
+ uint32 *flagsptr;
+ int flagsoff;
+ int bshift;
+ int memberoff;
pageno = MXOffsetToMemberPage(offset);
- entryno = MXOffsetToMemberEntry(offset);
+ memberoff = MXOffsetToMemberOffset(offset);
if (pageno != prev_pageno)
{
@@ -1077,8 +1222,7 @@ retry:
}
xactptr = (TransactionId *)
- MultiXactMemberCtl->shared->page_buffer[slotno];
- xactptr += entryno;
+ (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
if (!TransactionIdIsValid(*xactptr))
{
@@ -1087,7 +1231,13 @@ retry:
continue;
}
- ptr[truelength++] = *xactptr;
+ flagsoff = MXOffsetToFlagsOffset(offset);
+ bshift = MXOffsetToFlagsBitShift(offset);
+ flagsptr = (uint32 *) (MultiXactMemberCtl->shared->page_buffer[slotno] + flagsoff);
+
+ ptr[truelength].xid = *xactptr;
+ ptr[truelength].status = (*flagsptr >> bshift) & MXACT_MEMBER_XACT_BITMASK;
+ truelength++;
}
LWLockRelease(MultiXactMemberControlLock);
@@ -1103,6 +1253,30 @@ retry:
}
/*
+ * mxactMemberComparator
+ * qsort comparison function for MultiXactMember
+ *
+ * We can't use wraparound comparison for XIDs because that does not respect
+ * the triangle inequality! Any old sort order will do.
+ */
+static int
+mxactMemberComparator(const void *arg1, const void *arg2)
+{
+ MultiXactMember member1 = *(const MultiXactMember *) arg1;
+ MultiXactMember member2 = *(const MultiXactMember *) arg2;
+
+ if (member1.xid > member2.xid)
+ return 1;
+ if (member1.xid < member2.xid)
+ return -1;
+ if (member1.status > member2.status)
+ return 1;
+ if (member1.status < member2.status)
+ return -1;
+ return 0;
+}
+
+/*
* mXactCacheGetBySet
* returns a MultiXactId from the cache based on the set of
* TransactionIds that compose it, or InvalidMultiXactId if
@@ -1113,26 +1287,29 @@ retry:
* for the majority of tuples, thus keeping MultiXactId usage low (saving
* both I/O and wraparound issues).
*
- * NB: the passed xids[] array will be sorted in-place.
+ * NB: the passed members array will be sorted in-place.
*/
static MultiXactId
-mXactCacheGetBySet(int nxids, TransactionId *xids)
+mXactCacheGetBySet(int nmembers, MultiXactMember *members)
{
mXactCacheEnt *entry;
debug_elog3(DEBUG2, "CacheGet: looking for %s",
- mxid_to_string(InvalidMultiXactId, nxids, xids));
+ mxid_to_string(InvalidMultiXactId, nmembers, members));
/* sort the array so comparison is easy */
- qsort(xids, nxids, sizeof(TransactionId), xidComparator);
+ qsort(members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
for (entry = MXactCache; entry != NULL; entry = entry->next)
{
- if (entry->nxids != nxids)
+ if (entry->nmembers != nmembers)
continue;
- /* We assume the cache entries are sorted */
- if (memcmp(xids, entry->xids, nxids * sizeof(TransactionId)) == 0)
+ /*
+ * We assume the cache entries are sorted, and that the unused bits in
+ * "status" are zeroed.
+ */
+ if (memcmp(members, entry->members, nmembers * sizeof(MultiXactMember)) == 0)
{
debug_elog3(DEBUG2, "CacheGet: found %u", entry->multi);
return entry->multi;
@@ -1145,14 +1322,14 @@ mXactCacheGetBySet(int nxids, TransactionId *xids)
/*
* mXactCacheGetById
- * returns the composing TransactionId set from the cache for a
+ * returns the composing MultiXactMember set from the cache for a
* given MultiXactId, if present.
*
* If successful, *xids is set to the address of a palloc'd copy of the
- * TransactionId set. Return value is number of members, or -1 on failure.
+ * MultiXactMember set. Return value is number of members, or -1 on failure.
*/
static int
-mXactCacheGetById(MultiXactId multi, TransactionId **xids)
+mXactCacheGetById(MultiXactId multi, MultiXactMember **members)
{
mXactCacheEnt *entry;
@@ -1162,18 +1339,18 @@ mXactCacheGetById(MultiXactId multi, TransactionId **xids)
{
if (entry->multi == multi)
{
- TransactionId *ptr;
+ MultiXactMember *ptr;
Size size;
- size = sizeof(TransactionId) * entry->nxids;
- ptr = (TransactionId *) palloc(size);
- *xids = ptr;
+ size = sizeof(MultiXactMember) * entry->nmembers;
+ ptr = (MultiXactMember *) palloc(size);
+ *members = ptr;
- memcpy(ptr, entry->xids, size);
+ memcpy(ptr, entry->members, size);
debug_elog3(DEBUG2, "CacheGet: found %s",
- mxid_to_string(multi, entry->nxids, entry->xids));
- return entry->nxids;
+ mxid_to_string(multi, entry->nmembers, entry->members));
+ return entry->nmembers;
}
}
@@ -1186,12 +1363,12 @@ mXactCacheGetById(MultiXactId multi, TransactionId **xids)
* Add a new MultiXactId and its composing set into the local cache.
*/
static void
-mXactCachePut(MultiXactId multi, int nxids, TransactionId *xids)
+mXactCachePut(MultiXactId multi, int nmembers, MultiXactMember *members)
{
mXactCacheEnt *entry;
debug_elog3(DEBUG2, "CachePut: storing %s",
- mxid_to_string(multi, nxids, xids));
+ mxid_to_string(multi, nmembers, members));
if (MXactContext == NULL)
{
@@ -1206,36 +1383,67 @@ mXactCachePut(MultiXactId multi, int nxids, TransactionId *xids)
entry = (mXactCacheEnt *)
MemoryContextAlloc(MXactContext,
- offsetof(mXactCacheEnt, xids) +
- nxids * sizeof(TransactionId));
+ offsetof(mXactCacheEnt, members) +
+ nmembers * sizeof(MultiXactMember));
entry->multi = multi;
- entry->nxids = nxids;
- memcpy(entry->xids, xids, nxids * sizeof(TransactionId));
+ entry->nmembers = nmembers;
+ memcpy(entry->members, members, nmembers * sizeof(MultiXactMember));
/* mXactCacheGetBySet assumes the entries are sorted, so sort them */
- qsort(entry->xids, nxids, sizeof(TransactionId), xidComparator);
+ qsort(entry->members, nmembers, sizeof(MultiXactMember), mxactMemberComparator);
entry->next = MXactCache;
MXactCache = entry;
}
-#ifdef MULTIXACT_DEBUG
static char *
-mxid_to_string(MultiXactId multi, int nxids, TransactionId *xids)
+mxstatus_to_string(MultiXactStatus status)
+{
+ switch (status)
+ {
+ case MultiXactStatusForKeyShare:
+ return "keysh";
+ case MultiXactStatusForShare:
+ return "sh";
+ case MultiXactStatusForNoKeyUpdate:
+ return "fornokeyupd";
+ case MultiXactStatusForUpdate:
+ return "forupd";
+ case MultiXactStatusNoKeyUpdate:
+ return "nokeyupd";
+ case MultiXactStatusUpdate:
+ return "upd";
+ default:
+ elog(ERROR, "unrecognized multixact status %d", status);
+ return "";
+ }
+}
+
+char *
+mxid_to_string(MultiXactId multi, int nmembers, MultiXactMember *members)
{
- char *str = palloc(15 * (nxids + 1) + 4);
+ static char *str = NULL;
+ StringInfoData buf;
int i;
- snprintf(str, 47, "%u %d[%u", multi, nxids, xids[0]);
+ if (str != NULL)
+ pfree(str);
- for (i = 1; i < nxids; i++)
- snprintf(str + strlen(str), 17, ", %u", xids[i]);
+ initStringInfo(&buf);
- strcat(str, "]");
+ appendStringInfo(&buf, "%u %d[%u (%s)", multi, nmembers, members[0].xid,
+ mxstatus_to_string(members[0].status));
+
+ for (i = 1; i < nmembers; i++)
+ appendStringInfo(&buf, ", %u (%s)", members[i].xid,
+ mxstatus_to_string(members[i].status));
+
+ appendStringInfoChar(&buf, ']');
+ str = MemoryContextStrdup(TopMemoryContext, buf.data);
+ pfree(buf.data);
return str;
}
-#endif
/*
* AtEOXact_MultiXact
@@ -1512,8 +1720,9 @@ ZeroMultiXactMemberPage(int pageno, bool writeXlog)
* This must be called ONCE during postmaster or standalone-backend startup.
*
* StartupXLOG has already established nextMXact/nextOffset by calling
- * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact. Note that we
- * may already have replayed WAL data into the SLRU files.
+ * MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact, and the oldestMulti
+ * info from pg_control and/or MultiXactAdvanceOldest. Note that we may
+ * already have replayed WAL data into the SLRU files.
*
* We don't need any locks here, really; the SLRU locks are taken
* only because slru.c expects to be called with locks held.
@@ -1525,6 +1734,7 @@ StartupMultiXact(void)
MultiXactOffset offset = MultiXactState->nextOffset;
int pageno;
int entryno;
+ int flagsoff;
/* Clean up offsets state */
LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
@@ -1569,28 +1779,30 @@ StartupMultiXact(void)
* Zero out the remainder of the current members page. See notes in
* TrimCLOG() for motivation.
*/
- entryno = MXOffsetToMemberEntry(offset);
- if (entryno != 0)
+ flagsoff = MXOffsetToFlagsOffset(offset);
+ if (flagsoff != 0)
{
int slotno;
TransactionId *xidptr;
+ int memberoff;
+ memberoff = MXOffsetToMemberOffset(offset);
slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, true, offset);
- xidptr = (TransactionId *) MultiXactMemberCtl->shared->page_buffer[slotno];
- xidptr += entryno;
+ xidptr = (TransactionId *)
+ (MultiXactMemberCtl->shared->page_buffer[slotno] + memberoff);
- MemSet(xidptr, 0, BLCKSZ - (entryno * sizeof(TransactionId)));
+ MemSet(xidptr, 0, BLCKSZ - memberoff);
+
+ /*
+ * Note: we don't need to zero out the flag bits in the remaining
+ * members of the current group, because they are always reset before
+ * writing.
+ */
MultiXactMemberCtl->shared->page_dirty[slotno] = true;
}
LWLockRelease(MultiXactMemberControlLock);
-
- /*
- * Initialize lastTruncationPoint to invalid, ensuring that the first
- * checkpoint will try to do truncation.
- */
- MultiXactState->lastTruncationPoint = InvalidMultiXactId;
}
/*
@@ -1607,22 +1819,25 @@ ShutdownMultiXact(void)
}
/*
- * Get the next MultiXactId and offset to save in a checkpoint record
+ * Get the MultiXact data to save in a checkpoint record
*/
void
MultiXactGetCheckptMulti(bool is_shutdown,
MultiXactId *nextMulti,
- MultiXactOffset *nextMultiOffset)
+ MultiXactOffset *nextMultiOffset,
+ MultiXactId *oldestMulti,
+ Oid *oldestMultiDB)
{
LWLockAcquire(MultiXactGenLock, LW_SHARED);
-
*nextMulti = MultiXactState->nextMXact;
*nextMultiOffset = MultiXactState->nextOffset;
-
+ *oldestMulti = MultiXactState->oldestMultiXactId;
+ *oldestMultiDB = MultiXactState->oldestMultiXactDB;
LWLockRelease(MultiXactGenLock);
- debug_elog4(DEBUG2, "MultiXact: checkpoint is nextMulti %u, nextOffset %u",
- *nextMulti, *nextMultiOffset);
+ debug_elog6(DEBUG2,
+ "MultiXact: checkpoint is nextMulti %u, nextOffset %u, oldestMulti %u in DB %u",
+ *nextMulti, *nextMultiOffset, *oldestMulti, *oldestMultiDB);
}
/*
@@ -1637,17 +1852,6 @@ CheckPointMultiXact(void)
SimpleLruFlush(MultiXactOffsetCtl, true);
SimpleLruFlush(MultiXactMemberCtl, true);
- /*
- * Truncate the SLRU files. This could be done at any time, but
- * checkpoint seems a reasonable place for it. There is one exception: if
- * we are called during xlog recovery, then shared->latest_page_number
- * isn't valid (because StartupMultiXact hasn't been called yet) and so
- * SimpleLruTruncate would get confused. It seems best not to risk
- * removing any data during recovery anyway, so don't truncate.
- */
- if (!RecoveryInProgress())
- TruncateMultiXact();
-
TRACE_POSTGRESQL_MULTIXACT_CHECKPOINT_DONE(true);
}
@@ -1672,8 +1876,128 @@ MultiXactSetNextMXact(MultiXactId nextMulti,
}
/*
+ * Determine the last safe MultiXactId to allocate given the currently oldest
+ * datminmxid (ie, the oldest MultiXactId that might exist in any database
+ * of our cluster), and the OID of the (or a) database with that value.
+ */
+void
+SetMultiXactIdLimit(MultiXactId oldest_datminmxid, Oid oldest_datoid)
+{
+ MultiXactId multiVacLimit;
+ MultiXactId multiWarnLimit;
+ MultiXactId multiStopLimit;
+ MultiXactId multiWrapLimit;
+ MultiXactId curMulti;
+
+ Assert(MultiXactIdIsValid(oldest_datminmxid));
+
+ /*
+ * The place where we actually get into deep trouble is halfway around
+ * from the oldest potentially-existing XID/multi. (This calculation is
+ * probably off by one or two counts for Xids, because the special XIDs
+ * reduce the size of the loop a little bit. But we throw in plenty of
+ * slop below, so it doesn't matter.)
+ */
+ multiWrapLimit = oldest_datminmxid + (MaxMultiXactId >> 1);
+ if (multiWrapLimit < FirstMultiXactId)
+ multiWrapLimit += FirstMultiXactId;
+
+ /*
+ * We'll refuse to continue assigning MultiXactIds once we get within 100
+ * multi of data loss.
+ */
+ multiStopLimit = multiWrapLimit - 100;
+ if (multiStopLimit < FirstMultiXactId)
+ multiStopLimit -= FirstMultiXactId;
+
+ /*
+ * We'll start complaining loudly when we get within 10M multis of the stop
+ * point. This is kind of arbitrary, but if you let your gas gauge get
+ * down to 1% of full, would you be looking for the next gas station? We
+ * need to be fairly liberal about this number because there are lots of
+ * scenarios where most transactions are done by automatic clients that
+ * won't pay attention to warnings. (No, we're not gonna make this
+ * configurable. If you know enough to configure it, you know enough to
+ * not get in this kind of trouble in the first place.)
+ */
+ multiWarnLimit = multiStopLimit - 10000000;
+ if (multiWarnLimit < FirstMultiXactId)
+ multiWarnLimit -= FirstMultiXactId;
+
+ /*
+ * We'll start trying to force autovacuums when oldest_datminmxid gets
+ * to be more than 200 million transactions old.
+ */
+ multiVacLimit = oldest_datminmxid + 200000000;
+ if (multiVacLimit < FirstMultiXactId)
+ multiVacLimit += FirstMultiXactId;
+
+ /* Grab lock for just long enough to set the new limit values */
+ LWLockAcquire(MultiXactGenLock, LW_EXCLUSIVE);
+ MultiXactState->oldestMultiXactId = oldest_datminmxid;
+ MultiXactState->oldestMultiXactDB = oldest_datoid;
+ MultiXactState->multiVacLimit = multiVacLimit;
+ MultiXactState->multiWarnLimit = multiWarnLimit;
+ MultiXactState->multiStopLimit = multiStopLimit;
+ MultiXactState->multiWrapLimit = multiWrapLimit;
+ curMulti = MultiXactState->nextMXact;
+ LWLockRelease(MultiXactGenLock);
+
+ /* Log the info */
+ ereport(DEBUG1,
+ (errmsg("MultiXactId wrap limit is %u, limited by database with OID %u",
+ multiWrapLimit, oldest_datoid)));
+
+ /*
+ * If past the autovacuum force point, immediately signal an autovac
+ * request. The reason for this is that autovac only processes one
+ * database per invocation. Once it's finished cleaning up the oldest
+ * database, it'll call here, and we'll signal the postmaster to start
+ * another iteration immediately if there are still any old databases.
+ */
+ if (MultiXactIdPrecedes(multiVacLimit, curMulti) &&
+ IsUnderPostmaster && !InRecovery)
+ SendPostmasterSignal(PMSIGNAL_START_AUTOVAC_LAUNCHER);
+
+ /* Give an immediate warning if past the wrap warn point */
+ if (MultiXactIdPrecedes(multiWarnLimit, curMulti) && !InRecovery)
+ {
+ char *oldest_datname;
+
+ /*
+ * We can be called when not inside a transaction, for example during
+ * StartupXLOG(). In such a case we cannot do database access, so we
+ * must just report the oldest DB's OID.
+ *
+ * Note: it's also possible that get_database_name fails and returns
+ * NULL, for example because the database just got dropped. We'll
+ * still warn, even though the warning might now be unnecessary.
+ */
+ if (IsTransactionState())
+ oldest_datname = get_database_name(oldest_datoid);
+ else
+ oldest_datname = NULL;
+
+ if (oldest_datname)
+ ereport(WARNING,
+ (errmsg("database \"%s\" must be vacuumed before %u more MultiXactId are used",
+ oldest_datname,
+ multiWrapLimit - curMulti),
+ errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
+ "You might also need to commit or roll back old prepared transactions.")));
+ else
+ ereport(WARNING,
+ (errmsg("database with OID %u must be vacuumed before %u more MultiXactId are used",
+ oldest_datoid,
+ multiWrapLimit - curMulti),
+ errhint("To avoid a database shutdown, execute a database-wide VACUUM in that database.\n"
+ "You might also need to commit or roll back old prepared transactions.")));
+ }
+}
+
+/*
* Ensure the next-to-be-assigned MultiXactId is at least minMulti,
- * and similarly nextOffset is at least minMultiOffset
+ * and similarly nextOffset is at least minMultiOffset.
*
* This is used when we can determine minimum safe values from an XLog
* record (either an on-line checkpoint or an mxact creation log entry).
@@ -1700,6 +2024,17 @@ MultiXactAdvanceNextMXact(MultiXactId minMulti,
}
/*
+ * Update our oldestMultiXactId value, but only if it's more recent than
+ * what we had.
+ */
+void
+MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB)
+{
+ if (MultiXactIdPrecedes(MultiXactState->oldestMultiXactId, oldestMulti))
+ SetMultiXactIdLimit(oldestMulti, oldestMultiDB);
+}
+
+/*
* Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
*
* NB: this is called while holding MultiXactGenLock. We want it to be very
@@ -1748,13 +2083,16 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
*/
while (nmembers > 0)
{
- int entryno;
+ int flagsoff;
+ int flagsbit;
+ int difference;
/*
* Only zero when at first entry of a page.
*/
- entryno = MXOffsetToMemberEntry(offset);
- if (entryno == 0)
+ flagsoff = MXOffsetToFlagsOffset(offset);
+ flagsbit = MXOffsetToFlagsBitShift(offset);
+ if (flagsoff == 0 && flagsbit == 0)
{
int pageno;
@@ -1769,33 +2107,32 @@ ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
}
/* Advance to next page (OK if nmembers goes negative) */
- offset += (MULTIXACT_MEMBERS_PER_PAGE - entryno);
- nmembers -= (MULTIXACT_MEMBERS_PER_PAGE - entryno);
+ difference = MULTIXACT_MEMBERS_PER_PAGE - offset % MULTIXACT_MEMBERS_PER_PAGE;
+ offset += difference;
+ nmembers -= difference;
}
}
/*
- * Remove all MultiXactOffset and MultiXactMember segments before the oldest
- * ones still of interest.
+ * GetOldestMultiXactId
*
- * This is called only during checkpoints. We assume no more than one
- * backend does this at a time.
+ * Return the oldest MultiXactId that's still possibly still seen as live by
+ * any running transaction. Older ones might still exist on disk, but they no
+ * longer have any running member transaction.
*
- * XXX do we have any issues with needing to checkpoint here?
+ * It's not safe to truncate MultiXact SLRU segments on the value returned by
+ * this function; however, it can be used by a full-table vacuum to set the
+ * point at which it will be possible to truncate SLRU for that table.
*/
-static void
-TruncateMultiXact(void)
+MultiXactId
+GetOldestMultiXactId(void)
{
- MultiXactId nextMXact;
- MultiXactOffset nextOffset;
- MultiXactId oldestMXact;
- MultiXactOffset oldestOffset;
- int cutoffPage;
- int i;
+ MultiXactId oldestMXact;
+ MultiXactId nextMXact;
+ int i;
/*
- * First, compute where we can safely truncate. Per notes above, this is
- * the oldest valid value among all the OldestMemberMXactId[] and
+ * This is the oldest valid value among all the OldestMemberMXactId[] and
* OldestVisibleMXactId[] entries, or nextMXact if none are valid.
*/
LWLockAcquire(MultiXactGenLock, LW_SHARED);
@@ -1824,28 +2161,69 @@ TruncateMultiXact(void)
oldestMXact = thisoldest;
}
- /* Save the current nextOffset too */
- nextOffset = MultiXactState->nextOffset;
-
LWLockRelease(MultiXactGenLock);
- debug_elog3(DEBUG2, "MultiXact: truncation point = %u", oldestMXact);
+ return oldestMXact;
+}
+
+typedef struct mxtruncinfo
+{
+ int earliestExistingPage;
+} mxtruncinfo;
+
+/*
+ * SlruScanDirectory callback
+ * This callback determines the earliest existing page number.
+ */
+static bool
+SlruScanDirCbFindEarliest(SlruCtl ctl, char *filename, int segpage, void *data)
+{
+ mxtruncinfo *trunc = (mxtruncinfo *) data;
+
+ if (trunc->earliestExistingPage == -1 ||
+ ctl->PagePrecedes(segpage, trunc->earliestExistingPage))
+ {
+ trunc->earliestExistingPage = segpage;
+ }
+
+ return false; /* keep going */
+}
+
+/*
+ * Remove all MultiXactOffset and MultiXactMember segments before the oldest
+ * ones still of interest.
+ *
+ * This is called by vacuum after it has successfully advanced a database's
+ * datminmxid value; the cutoff value we're passed is the minimum of all
+ * databases' datminmxid values.
+ */
+void
+TruncateMultiXact(MultiXactId oldestMXact)
+{
+ MultiXactOffset oldestOffset;
+ mxtruncinfo trunc;
+ MultiXactId earliest;
/*
- * If we already truncated at this point, do nothing. This saves time
- * when no MultiXacts are getting used, which is probably not uncommon.
+ * Note we can't just plow ahead with the truncation; it's possible that
+ * there are no segments to truncate, which is a problem because we are
+ * going to attempt to read the offsets page to determine where to truncate
+ * the members SLRU. So we first scan the directory to determine the
+ * earliest offsets page number that we can read without error.
*/
- if (MultiXactState->lastTruncationPoint == oldestMXact)
+ trunc.earliestExistingPage = -1;
+ SlruScanDirectory(MultiXactOffsetCtl, SlruScanDirCbFindEarliest, &trunc);
+ earliest = trunc.earliestExistingPage * MULTIXACT_OFFSETS_PER_PAGE;
+
+ /* nothing to do */
+ if (MultiXactIdPrecedes(oldestMXact, earliest))
return;
/*
- * We need to determine where to truncate MultiXactMember. If we found a
- * valid oldest MultiXactId, read its starting offset; otherwise we use
- * the nextOffset value we saved above.
+ * First, compute the safe truncation point for MultiXactMember.
+ * This is the starting offset of the multixact we were passed
+ * as MultiXactOffset cutoff.
*/
- if (oldestMXact == nextMXact)
- oldestOffset = nextOffset;
- else
{
int pageno;
int slotno;
@@ -1857,34 +2235,23 @@ TruncateMultiXact(void)
pageno = MultiXactIdToOffsetPage(oldestMXact);
entryno = MultiXactIdToOffsetEntry(oldestMXact);
- slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno, oldestMXact);
- offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
+ slotno = SimpleLruReadPage_ReadOnly(MultiXactOffsetCtl, pageno,
+ oldestMXact);
+ offptr = (MultiXactOffset *)
+ MultiXactOffsetCtl->shared->page_buffer[slotno];
offptr += entryno;
oldestOffset = *offptr;
LWLockRelease(MultiXactOffsetControlLock);
}
- /*
- * The cutoff point is the start of the segment containing oldestMXact. We
- * pass the *page* containing oldestMXact to SimpleLruTruncate.
- */
- cutoffPage = MultiXactIdToOffsetPage(oldestMXact);
-
- SimpleLruTruncate(MultiXactOffsetCtl, cutoffPage);
-
- /*
- * Also truncate MultiXactMember at the previously determined offset.
- */
- cutoffPage = MXOffsetToMemberPage(oldestOffset);
+ /* truncate MultiXactOffset */
+ SimpleLruTruncate(MultiXactOffsetCtl,
+ MultiXactIdToOffsetPage(oldestMXact));
- SimpleLruTruncate(MultiXactMemberCtl, cutoffPage);
-
- /*
- * Set the last known truncation point. We don't need a lock for this
- * since only one backend does checkpoints at a time.
- */
- MultiXactState->lastTruncationPoint = oldestMXact;
+ /* truncate MultiXactMembers and we're done */
+ SimpleLruTruncate(MultiXactMemberCtl,
+ MXOffsetToMemberPage(oldestOffset));
}
/*
@@ -1934,7 +2301,7 @@ MultiXactMemberPagePrecedes(int page1, int page2)
* XXX do we need to do something special for InvalidMultiXactId?
* (Doesn't look like it.)
*/
-static bool
+bool
MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
{
int32 diff = (int32) (multi1 - multi2);
@@ -1953,7 +2320,6 @@ MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
return (diff < 0);
}
-
/*
* Write an xlog record reflecting the zeroing of either a MEMBERs or
* OFFSETs page (info shows which)
@@ -2013,16 +2379,18 @@ multixact_redo(XLogRecPtr lsn, XLogRecord *record)
}
else if (info == XLOG_MULTIXACT_CREATE_ID)
{
- xl_multixact_create *xlrec = (xl_multixact_create *) XLogRecGetData(record);
- TransactionId *xids = xlrec->xids;
+ xl_multixact_create *xlrec =
+ (xl_multixact_create *) XLogRecGetData(record);
TransactionId max_xid;
int i;
/* Store the data back into the SLRU files */
- RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nxids, xids);
+ RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nmembers,
+ xlrec->members);
/* Make sure nextMXact/nextOffset are beyond what this record has */
- MultiXactAdvanceNextMXact(xlrec->mid + 1, xlrec->moff + xlrec->nxids);
+ MultiXactAdvanceNextMXact(xlrec->mid + 1,
+ xlrec->moff + xlrec->nmembers);
/*
* Make sure nextXid is beyond any XID mentioned in the record. This
@@ -2030,10 +2398,10 @@ multixact_redo(XLogRecPtr lsn, XLogRecord *record)
* evidence in the XLOG, but let's be safe.
*/
max_xid = record->xl_xid;
- for (i = 0; i < xlrec->nxids; i++)
+ for (i = 0; i < xlrec->nmembers; i++)
{
- if (TransactionIdPrecedes(max_xid, xids[i]))
- max_xid = xids[i];
+ if (TransactionIdPrecedes(max_xid, xlrec->members[i].xid))
+ max_xid = xlrec->members[i].xid;
}
/*
@@ -2053,3 +2421,72 @@ multixact_redo(XLogRecPtr lsn, XLogRecord *record)
else
elog(PANIC, "multixact_redo: unknown op code %u", info);
}
+
+Datum
+pg_get_multixact_members(PG_FUNCTION_ARGS)
+{
+ typedef struct
+ {
+ MultiXactMember *members;
+ int nmembers;
+ int iter;
+ } mxact;
+ MultiXactId mxid = PG_GETARG_UINT32(0);
+ mxact *multi;
+ FuncCallContext *funccxt;
+
+ if (mxid < FirstMultiXactId)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid MultiXactId: %u", mxid)));
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcxt;
+ TupleDesc tupdesc;
+
+ funccxt = SRF_FIRSTCALL_INIT();
+ oldcxt = MemoryContextSwitchTo(funccxt->multi_call_memory_ctx);
+
+ multi = palloc(sizeof(mxact));
+ /* no need to allow for old values here */
+ multi->nmembers = GetMultiXactIdMembers(mxid, &multi->members, false);
+ multi->iter = 0;
+
+ tupdesc = CreateTemplateTupleDesc(2, false);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "xid",
+ XIDOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "mode",
+ TEXTOID, -1, 0);
+
+ funccxt->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+ funccxt->user_fctx = multi;
+
+ MemoryContextSwitchTo(oldcxt);
+ }
+
+ funccxt = SRF_PERCALL_SETUP();
+ multi = (mxact *) funccxt->user_fctx;
+
+ while (multi->iter < multi->nmembers)
+ {
+ HeapTuple tuple;
+ char *values[2];
+
+ values[0] = palloc(32);
+ sprintf(values[0], "%u", multi->members[multi->iter].xid);
+ values[1] = mxstatus_to_string(multi->members[multi->iter].status);
+
+ tuple = BuildTupleFromCStrings(funccxt->attinmeta, values);
+
+ multi->iter++;
+ pfree(values[0]);
+ SRF_RETURN_NEXT(funccxt, HeapTupleGetDatum(tuple));
+ }
+
+ if (multi->nmembers > 0)
+ pfree(multi->members);
+ pfree(multi);
+
+ SRF_RETURN_DONE(funccxt);
+}