diff options
author | Alvaro Herrera <alvherre@alvh.no-ip.org> | 2008-05-12 20:02:02 +0000 |
---|---|---|
committer | Alvaro Herrera <alvherre@alvh.no-ip.org> | 2008-05-12 20:02:02 +0000 |
commit | 5da9da71c44f27ba48fdad08ef263bf70e43e689 (patch) | |
tree | d8afb52acd9386a59c1862a265d4f8e6d2fdbaba /src/backend/utils/time/snapmgr.c | |
parent | aa82790fcab98b8d3d4eca2e2f6f7bfce57870bc (diff) | |
download | postgresql-5da9da71c44f27ba48fdad08ef263bf70e43e689.tar.gz postgresql-5da9da71c44f27ba48fdad08ef263bf70e43e689.zip |
Improve snapshot manager by keeping explicit track of snapshots.
There are two ways to track a snapshot: there's the "registered" list, which
is used for arbitrary long-lived snapshots; and there's the "active stack",
which is used for the snapshot that is considered "active" at any time.
This also allows users of snapshots to stop worrying about snapshot memory
allocation and freeing, and about using PG_TRY blocks around ActiveSnapshot
assignment. This is all done automatically now.
As a consequence, this allows us to reset MyProc->xmin when there are no
more snapshots registered in the current backend, reducing the impact that
long-running transactions have on VACUUM.
Diffstat (limited to 'src/backend/utils/time/snapmgr.c')
-rw-r--r-- | src/backend/utils/time/snapmgr.c | 562 |
1 files changed, 516 insertions, 46 deletions
diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index ac461213b3c..71a35bf6fa0 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -2,11 +2,28 @@ * snapmgr.c * PostgreSQL snapshot manager * + * We keep track of snapshots in two ways: the "registered snapshots" list, + * and the "active snapshot" stack. All snapshots in any of them is supposed + * to be in persistent memory. When a snapshot is no longer in any of these + * lists (tracked by separate refcounts of each snapshot), its memory can be + * freed. + * + * These arrangements let us reset MyProc->xmin when there are no snapshots + * referenced by this transaction. (One possible improvement would be to be + * able to advance Xmin when the snapshot with the earliest Xmin is no longer + * referenced. That's a bit harder though, it requires more locking, and + * anyway it should be rather uncommon to keep snapshots referenced for too + * long.) + * + * Note: parts of this code could probably be replaced by appropriate use + * of resowner.c. + * + * * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/time/snapmgr.c,v 1.1 2008/03/26 18:48:59 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/utils/time/snapmgr.c,v 1.2 2008/05/12 20:02:02 alvherre Exp $ * *------------------------------------------------------------------------- */ @@ -14,28 +31,30 @@ #include "access/xact.h" #include "access/transam.h" +#include "storage/proc.h" #include "storage/procarray.h" +#include "utils/memutils.h" #include "utils/snapmgr.h" #include "utils/tqual.h" +#include "utils/memutils.h" /* + * CurrentSnapshot points to the only snapshot taken in a serializable + * transaction, and to the latest one taken in a read-committed transaction. + * SecondarySnapshot is a snapshot that's always up-to-date as of the current + * instant, even on a serializable transaction. It should only be used for + * special-purpose code (say, RI checking.) + * * These SnapshotData structs are static to simplify memory allocation * (see the hack in GetSnapshotData to avoid repeated malloc/free). */ -static SnapshotData SerializableSnapshotData = {HeapTupleSatisfiesMVCC}; -static SnapshotData LatestSnapshotData = {HeapTupleSatisfiesMVCC}; - -/* Externally visible pointers to valid snapshots: */ -Snapshot SerializableSnapshot = NULL; -Snapshot LatestSnapshot = NULL; +static SnapshotData CurrentSnapshotData = {HeapTupleSatisfiesMVCC}; +static SnapshotData SecondarySnapshotData = {HeapTupleSatisfiesMVCC}; -/* - * This pointer is not maintained by this module, but it's convenient - * to declare it here anyway. Callers typically assign a copy of - * GetTransactionSnapshot's result to ActiveSnapshot. - */ -Snapshot ActiveSnapshot = NULL; +/* Pointers to valid snapshots */ +static Snapshot CurrentSnapshot = NULL; +static Snapshot SecondarySnapshot = NULL; /* * These are updated by GetSnapshotData. We initialize them this way @@ -46,35 +65,106 @@ TransactionId TransactionXmin = FirstNormalTransactionId; TransactionId RecentXmin = FirstNormalTransactionId; TransactionId RecentGlobalXmin = FirstNormalTransactionId; +/* + * Elements of the list of registered snapshots. + * + * Note that we keep refcounts both here and in SnapshotData. This is because + * the same snapshot may be registered more than once in a subtransaction, and + * if a subxact aborts we want to be able to substract the correct amount of + * counts from SnapshotData. (Another approach would be keeping one + * RegdSnapshotElt each time a snapshot is registered, but that seems + * unnecessary wastage.) + * + * NB: the code assumes that elements in this list are in non-increasing + * order of s_level; also, the list must be NULL-terminated. + */ +typedef struct RegdSnapshotElt +{ + Snapshot s_snap; + uint32 s_count; + int s_level; + struct RegdSnapshotElt *s_next; +} RegdSnapshotElt; + +/* + * Elements of the active snapshot stack. + * + * It's not necessary to keep a refcount like we do for the registered list; + * each element here accounts for exactly one active_count on SnapshotData. + * We cannot condense them like we do for RegdSnapshotElt because it would mess + * up the order of entries in the stack. + * + * NB: the code assumes that elements in this list are in non-increasing + * order of as_level; also, the list must be NULL-terminated. + */ +typedef struct ActiveSnapshotElt +{ + Snapshot as_snap; + int as_level; + struct ActiveSnapshotElt *as_next; +} ActiveSnapshotElt; + +/* Head of the list of registered snapshots */ +static RegdSnapshotElt *RegisteredSnapshotList = NULL; + +/* Top of the stack of active snapshots */ +static ActiveSnapshotElt *ActiveSnapshot = NULL; + +/* first GetTransactionSnapshot call in a transaction? */ +bool FirstSnapshotSet = false; + +/* + * Remembers whether this transaction registered a serializable snapshot at + * start. We cannot trust FirstSnapshotSet in combination with + * IsXactIsoLevelSerializable, because GUC may be reset before us. + */ +static bool registered_serializable = false; + + +static Snapshot CopySnapshot(Snapshot snapshot); +static void FreeSnapshot(Snapshot snapshot); +static void SnapshotResetXmin(void); + /* * GetTransactionSnapshot * Get the appropriate snapshot for a new query in a transaction. * - * The SerializableSnapshot is the first one taken in a transaction. - * In serializable mode we just use that one throughout the transaction. - * In read-committed mode, we take a new snapshot each time we are called. * - * Note that the return value points at static storage that will be modified - * by future calls and by CommandCounterIncrement(). Callers should copy - * the result with CopySnapshot() if it is to be used very long. + * Note that the return value may point at static storage that will be modified + * by future calls and by CommandCounterIncrement(). Callers should call + * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be + * used very long. */ Snapshot GetTransactionSnapshot(void) { /* First call in transaction? */ - if (SerializableSnapshot == NULL) + if (!FirstSnapshotSet) { - SerializableSnapshot = GetSnapshotData(&SerializableSnapshotData, true); - return SerializableSnapshot; + CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); + FirstSnapshotSet = true; + + /* + * In serializable mode, the first snapshot must live until end of xact + * regardless of what the caller does with it, so we must register it + * internally here and unregister it at end of xact. + */ + if (IsXactIsoLevelSerializable) + { + CurrentSnapshot = RegisterSnapshot(CurrentSnapshot); + registered_serializable = true; + } + + return CurrentSnapshot; } if (IsXactIsoLevelSerializable) - return SerializableSnapshot; + return CurrentSnapshot; - LatestSnapshot = GetSnapshotData(&LatestSnapshotData, false); + CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); - return LatestSnapshot; + return CurrentSnapshot; } /* @@ -86,36 +176,59 @@ Snapshot GetLatestSnapshot(void) { /* Should not be first call in transaction */ - if (SerializableSnapshot == NULL) + if (!FirstSnapshotSet) elog(ERROR, "no snapshot has been set"); - LatestSnapshot = GetSnapshotData(&LatestSnapshotData, false); + SecondarySnapshot = GetSnapshotData(&SecondarySnapshotData); - return LatestSnapshot; + return SecondarySnapshot; +} + +/* + * SnapshotSetCommandId + * Propagate CommandCounterIncrement into the static snapshots, if set + */ +void +SnapshotSetCommandId(CommandId curcid) +{ + if (!FirstSnapshotSet) + return; + + if (CurrentSnapshot) + CurrentSnapshot->curcid = curcid; + if (SecondarySnapshot) + SecondarySnapshot->curcid = curcid; } /* * CopySnapshot * Copy the given snapshot. * - * The copy is palloc'd in the current memory context. + * The copy is palloc'd in TopTransactionContext and has initial refcounts set + * to 0. The returned snapshot has the copied flag set. */ -Snapshot +static Snapshot CopySnapshot(Snapshot snapshot) { Snapshot newsnap; Size subxipoff; Size size; + Assert(snapshot != InvalidSnapshot); + /* We allocate any XID arrays needed in the same palloc block. */ size = subxipoff = sizeof(SnapshotData) + snapshot->xcnt * sizeof(TransactionId); if (snapshot->subxcnt > 0) size += snapshot->subxcnt * sizeof(TransactionId); - newsnap = (Snapshot) palloc(size); + newsnap = (Snapshot) MemoryContextAlloc(TopTransactionContext, size); memcpy(newsnap, snapshot, sizeof(SnapshotData)); + newsnap->regd_count = 0; + newsnap->active_count = 0; + newsnap->copied = true; + /* setup XID array */ if (snapshot->xcnt > 0) { @@ -141,32 +254,389 @@ CopySnapshot(Snapshot snapshot) /* * FreeSnapshot - * Free a snapshot previously copied with CopySnapshot. + * Free the memory associated with a snapshot. + */ +static void +FreeSnapshot(Snapshot snapshot) +{ + Assert(snapshot->regd_count == 0); + Assert(snapshot->active_count == 0); + + pfree(snapshot); +} + +/* + * PushActiveSnapshot + * Set the given snapshot as the current active snapshot * - * This is currently identical to pfree, but is provided for cleanliness. + * If this is the first use of this snapshot, create a new long-lived copy with + * active refcount=1. Otherwise, only increment the refcount. + */ +void +PushActiveSnapshot(Snapshot snap) +{ + ActiveSnapshotElt *newactive; + + Assert(snap != InvalidSnapshot); + + newactive = MemoryContextAlloc(TopTransactionContext, sizeof(ActiveSnapshotElt)); + /* Static snapshot? Create a persistent copy */ + newactive->as_snap = snap->copied ? snap : CopySnapshot(snap); + newactive->as_next = ActiveSnapshot; + newactive->as_level = GetCurrentTransactionNestLevel(); + + newactive->as_snap->active_count++; + + ActiveSnapshot = newactive; +} + +/* + * PushUpdatedSnapshot + * As above, except we set the snapshot's CID to the current CID. + */ +void +PushUpdatedSnapshot(Snapshot snapshot) +{ + Snapshot newsnap; + + /* + * We cannot risk modifying a snapshot that's possibly already used + * elsewhere, so make a new copy to scribble on. + */ + newsnap = CopySnapshot(snapshot); + newsnap->curcid = GetCurrentCommandId(false); + + PushActiveSnapshot(newsnap); +} + +/* + * PopActiveSnapshot * - * Do *not* apply this to the results of GetTransactionSnapshot or - * GetLatestSnapshot, since those are just static structs. + * Remove the topmost snapshot from the active snapshot stack, decrementing the + * reference count, and free it if this was the last reference. */ void -FreeSnapshot(Snapshot snapshot) +PopActiveSnapshot(void) { - pfree(snapshot); + ActiveSnapshotElt *newstack; + + newstack = ActiveSnapshot->as_next; + + Assert(ActiveSnapshot->as_snap->active_count > 0); + + ActiveSnapshot->as_snap->active_count--; + + if (ActiveSnapshot->as_snap->active_count == 0 && + ActiveSnapshot->as_snap->regd_count == 0) + FreeSnapshot(ActiveSnapshot->as_snap); + + pfree(ActiveSnapshot); + ActiveSnapshot = newstack; + + SnapshotResetXmin(); } /* - * FreeXactSnapshot - * Free snapshot(s) at end of transaction. + * GetActiveSnapshot + * Return the topmost snapshot in the Active stack. + */ +Snapshot +GetActiveSnapshot(void) +{ + Assert(ActiveSnapshot != NULL); + + return ActiveSnapshot->as_snap; +} + +/* + * ActiveSnapshotSet + * Return whether there is at least one snapsho in the Active stack + */ +bool +ActiveSnapshotSet(void) +{ + return ActiveSnapshot != NULL; +} + +/* + * RegisterSnapshot + * Register a snapshot as being in use + * + * If InvalidSnapshot is passed, it is not registered. + */ +Snapshot +RegisterSnapshot(Snapshot snapshot) +{ + RegdSnapshotElt *elt; + RegdSnapshotElt *newhead; + int level; + + if (snapshot == InvalidSnapshot) + return InvalidSnapshot; + + level = GetCurrentTransactionNestLevel(); + + /* + * If there's already an item in the list for the same snapshot and the + * same subxact nest level, increment its refcounts. Otherwise create a + * new one. + */ + for (elt = RegisteredSnapshotList; elt != NULL; elt = elt->s_next) + { + if (elt->s_level < level) + break; + + if (elt->s_snap == snapshot && elt->s_level == level) + { + elt->s_snap->regd_count++; + elt->s_count++; + + return elt->s_snap; + } + } + + /* + * Create the new list element. If it's not been copied into persistent + * memory already, we must do so; otherwise we can just increment the + * reference count. + */ + newhead = MemoryContextAlloc(TopTransactionContext, sizeof(RegdSnapshotElt)); + newhead->s_next = RegisteredSnapshotList; + /* Static snapshot? Create a persistent copy */ + newhead->s_snap = snapshot->copied ? snapshot : CopySnapshot(snapshot); + newhead->s_level = level; + newhead->s_count = 1; + + newhead->s_snap->regd_count++; + + RegisteredSnapshotList = newhead; + + return RegisteredSnapshotList->s_snap; +} + +/* + * UnregisterSnapshot + * Signals that a snapshot is no longer necessary + * + * If both reference counts fall to zero, the snapshot memory is released. + * If only the registered list refcount falls to zero, just the list element is + * freed. + */ +void +UnregisterSnapshot(Snapshot snapshot) +{ + RegdSnapshotElt *prev = NULL; + RegdSnapshotElt *elt; + bool found = false; + + if (snapshot == InvalidSnapshot) + return; + + for (elt = RegisteredSnapshotList; elt != NULL; elt = elt->s_next) + { + if (elt->s_snap == snapshot) + { + Assert(elt->s_snap->regd_count > 0); + Assert(elt->s_count > 0); + + elt->s_snap->regd_count--; + elt->s_count--; + found = true; + + if (elt->s_count == 0) + { + /* delink it from the registered snapshot list */ + if (prev) + prev->s_next = elt->s_next; + else + RegisteredSnapshotList = elt->s_next; + + /* free the snapshot itself if it's no longer relevant */ + if (elt->s_snap->regd_count == 0 && elt->s_snap->active_count == 0) + FreeSnapshot(elt->s_snap); + + /* and free the list element */ + pfree(elt); + } + + break; + } + + prev = elt; + } + + if (!found) + elog(WARNING, "unregistering failed for snapshot %p", snapshot); + + SnapshotResetXmin(); +} + +/* + * SnapshotResetXmin + * + * If there are no more snapshots, we can reset our PGPROC->xmin to InvalidXid. + * Note we can do this without locking because we assume that storing an Xid + * is atomic. + */ +static void +SnapshotResetXmin(void) +{ + if (RegisteredSnapshotList == NULL && ActiveSnapshot == NULL) + MyProc->xmin = InvalidTransactionId; +} + +/* + * AtSubCommit_Snapshot */ void -FreeXactSnapshot(void) +AtSubCommit_Snapshot(int level) { + ActiveSnapshotElt *active; + RegdSnapshotElt *regd; + /* - * We do not free the xip arrays for the static snapshot structs; they - * will be reused soon. So this is now just a state change to prevent - * outside callers from accessing the snapshots. + * Relabel the active snapshots set in this subtransaction as though they + * are owned by the parent subxact. */ - SerializableSnapshot = NULL; - LatestSnapshot = NULL; - ActiveSnapshot = NULL; /* just for cleanliness */ + for (active = ActiveSnapshot; active != NULL; active = active->as_next) + { + if (active->as_level < level) + break; + active->as_level = level - 1; + } + + /* + * Reassign all registered snapshots to the parent subxact. + * + * Note: this code is somewhat bogus in that we could end up with multiple + * entries for the same snapshot and the same subxact level (my parent's + * level). Cleaning that up is more trouble than it's currently worth, + * however. + */ + for (regd = RegisteredSnapshotList; regd != NULL; regd = regd->s_next) + { + if (regd->s_level == level) + regd->s_level--; + } +} + +/* + * AtSubAbort_Snapshot + * Clean up snapshots after a subtransaction abort + */ +void +AtSubAbort_Snapshot(int level) +{ + RegdSnapshotElt *prev; + RegdSnapshotElt *regd; + + /* Forget the active snapshots set by this subtransaction */ + while (ActiveSnapshot && ActiveSnapshot->as_level >= level) + { + ActiveSnapshotElt *next; + + next = ActiveSnapshot->as_next; + + /* + * Decrement the snapshot's active count. If it's still registered or + * marked as active by an outer subtransaction, we can't free it yet. + */ + Assert(ActiveSnapshot->as_snap->active_count >= 1); + ActiveSnapshot->as_snap->active_count -= 1; + + if (ActiveSnapshot->as_snap->active_count == 0 && + ActiveSnapshot->as_snap->regd_count == 0) + FreeSnapshot(ActiveSnapshot->as_snap); + + /* and free the stack element */ + pfree(ActiveSnapshot); + + ActiveSnapshot = next; + } + + /* Unregister all snapshots registered during this subtransaction */ + prev = NULL; + for (regd = RegisteredSnapshotList; regd != NULL; ) + { + if (regd->s_level >= level) + { + RegdSnapshotElt *tofree; + + if (prev) + prev->s_next = regd->s_next; + else + RegisteredSnapshotList = regd->s_next; + + tofree = regd; + regd = regd->s_next; + + tofree->s_snap->regd_count -= tofree->s_count; + + /* free the snapshot if possible */ + if (tofree->s_snap->regd_count == 0 && + tofree->s_snap->active_count == 0) + FreeSnapshot(tofree->s_snap); + + /* and free the list element */ + pfree(tofree); + } + else + { + prev = regd; + regd = regd->s_next; + } + } + + SnapshotResetXmin(); +} + +/* + * AtEOXact_Snapshot + * Snapshot manager's cleanup function for end of transaction + */ +void +AtEOXact_Snapshot(bool isCommit) +{ + /* On commit, complain about leftover snapshots */ + if (isCommit) + { + ActiveSnapshotElt *active; + RegdSnapshotElt *regd; + + /* + * On a serializable snapshot we must first unregister our private + * refcount to the serializable snapshot. + */ + if (registered_serializable) + UnregisterSnapshot(CurrentSnapshot); + + /* complain about unpopped active snapshots */ + for (active = ActiveSnapshot; active != NULL; active = active->as_next) + { + ereport(WARNING, + (errmsg("snapshot %p still active", active))); + } + + /* complain about any unregistered snapshot */ + for (regd = RegisteredSnapshotList; regd != NULL; regd = regd->s_next) + { + ereport(WARNING, + (errmsg("snapshot %p not destroyed at commit (%d regd refs, %d active refs)", + regd->s_snap, regd->s_snap->regd_count, + regd->s_snap->active_count))); + } + } + + /* + * And reset our state. We don't need to free the memory explicitely -- + * it'll go away with TopTransactionContext. + */ + ActiveSnapshot = NULL; + RegisteredSnapshotList = NULL; + + CurrentSnapshot = NULL; + SecondarySnapshot = NULL; + + FirstSnapshotSet = false; + registered_serializable = false; } |