diff options
author | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2023-11-08 13:30:50 +0200 |
---|---|---|
committer | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2023-11-08 13:30:50 +0200 |
commit | b8bff07daa85c837a2747b4d35cd5a27e73fb7b2 (patch) | |
tree | b9c98f5071e676c3bb7f94a6a6909406e3d9531b /src/backend/storage/buffer/bufmgr.c | |
parent | b70c2143bbbe291fe2b444150772972fa53972f1 (diff) | |
download | postgresql-b8bff07daa85c837a2747b4d35cd5a27e73fb7b2.tar.gz postgresql-b8bff07daa85c837a2747b4d35cd5a27e73fb7b2.zip |
Make ResourceOwners more easily extensible.
Instead of having a separate array/hash for each resource kind, use a
single array and hash to hold all kinds of resources. This makes it
possible to introduce new resource "kinds" without having to modify
the ResourceOwnerData struct. In particular, this makes it possible
for extensions to register custom resource kinds.
The old approach was to have a small array of resources of each kind,
and if it fills up, switch to a hash table. The new approach also uses
an array and a hash, but now the array and the hash are used at the
same time. The array is used to hold the recently added resources, and
when it fills up, they are moved to the hash. This keeps the access to
recent entries fast, even when there are a lot of long-held resources.
All the resource-specific ResourceOwnerEnlarge*(),
ResourceOwnerRemember*(), and ResourceOwnerForget*() functions have
been replaced with three generic functions that take resource kind as
argument. For convenience, we still define resource-specific wrapper
macros around the generic functions with the old names, but they are
now defined in the source files that use those resource kinds.
The release callback no longer needs to call ResourceOwnerForget on
the resource being released. ResourceOwnerRelease unregisters the
resource from the owner before calling the callback. That needed some
changes in bufmgr.c and some other files, where releasing the
resources previously always called ResourceOwnerForget.
Each resource kind specifies a release priority, and
ResourceOwnerReleaseAll releases the resources in priority order. To
make that possible, we have to restrict what you can do between
phases. After calling ResourceOwnerRelease(), you are no longer
allowed to remember any more resources in it or to forget any
previously remembered resources by calling ResourceOwnerForget. There
was one case where that was done previously. At subtransaction commit,
AtEOSubXact_Inval() would handle the invalidation messages and call
RelationFlushRelation(), which temporarily increased the reference
count on the relation being flushed. We now switch to the parent
subtransaction's resource owner before calling AtEOSubXact_Inval(), so
that there is a valid ResourceOwner to temporarily hold that relcache
reference.
Other end-of-xact routines make similar calls to AtEOXact_Inval()
between release phases, but I didn't see any regression test failures
from those, so I'm not sure if they could reach a codepath that needs
remembering extra resources.
There were two exceptions to how the resource leak WARNINGs on commit
were printed previously: llvmjit silently released the context without
printing the warning, and a leaked buffer io triggered a PANIC. Now
everything prints a WARNING, including those cases.
Add tests in src/test/modules/test_resowner.
Reviewed-by: Aleksander Alekseev, Michael Paquier, Julien Rouhaud
Reviewed-by: Kyotaro Horiguchi, Hayato Kuroda, Álvaro Herrera, Zhihong Yu
Reviewed-by: Peter Eisentraut, Andres Freund
Discussion: https://www.postgresql.org/message-id/cbfabeb0-cd3c-e951-a572-19b365ed314d%40iki.fi
Diffstat (limited to 'src/backend/storage/buffer/bufmgr.c')
-rw-r--r-- | src/backend/storage/buffer/bufmgr.c | 164 |
1 files changed, 126 insertions, 38 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 506f71e6533..f7c67d504cd 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -47,6 +47,7 @@ #include "postmaster/bgwriter.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" +#include "storage/fd.h" #include "storage/ipc.h" #include "storage/lmgr.h" #include "storage/proc.h" @@ -55,7 +56,7 @@ #include "utils/memdebug.h" #include "utils/ps_status.h" #include "utils/rel.h" -#include "utils/resowner_private.h" +#include "utils/resowner.h" #include "utils/timestamp.h" @@ -205,6 +206,30 @@ static PrivateRefCountEntry *GetPrivateRefCountEntry(Buffer buffer, bool do_move static inline int32 GetPrivateRefCount(Buffer buffer); static void ForgetPrivateRefCountEntry(PrivateRefCountEntry *ref); +/* ResourceOwner callbacks to hold in-progress I/Os and buffer pins */ +static void ResOwnerReleaseBufferIO(Datum res); +static char *ResOwnerPrintBufferIO(Datum res); +static void ResOwnerReleaseBufferPin(Datum res); +static char *ResOwnerPrintBufferPin(Datum res); + +const ResourceOwnerDesc buffer_io_resowner_desc = +{ + .name = "buffer io", + .release_phase = RESOURCE_RELEASE_BEFORE_LOCKS, + .release_priority = RELEASE_PRIO_BUFFER_IOS, + .ReleaseResource = ResOwnerReleaseBufferIO, + .DebugPrint = ResOwnerPrintBufferIO +}; + +const ResourceOwnerDesc buffer_pin_resowner_desc = +{ + .name = "buffer pin", + .release_phase = RESOURCE_RELEASE_BEFORE_LOCKS, + .release_priority = RELEASE_PRIO_BUFFER_PINS, + .ReleaseResource = ResOwnerReleaseBufferPin, + .DebugPrint = ResOwnerPrintBufferPin +}; + /* * Ensure that the PrivateRefCountArray has sufficient space to store one more * entry. This has to be called before using NewPrivateRefCountEntry() to fill @@ -470,6 +495,7 @@ static BlockNumber ExtendBufferedRelShared(BufferManagerRelation bmr, static bool PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy); static void PinBuffer_Locked(BufferDesc *buf); static void UnpinBuffer(BufferDesc *buf); +static void UnpinBufferNoOwner(BufferDesc *buf); static void BufferSync(int flags); static uint32 WaitBufHdrUnlocked(BufferDesc *buf); static int SyncOneBuffer(int buf_id, bool skip_recently_used, @@ -477,7 +503,8 @@ static int SyncOneBuffer(int buf_id, bool skip_recently_used, static void WaitIO(BufferDesc *buf); static bool StartBufferIO(BufferDesc *buf, bool forInput); static void TerminateBufferIO(BufferDesc *buf, bool clear_dirty, - uint32 set_flag_bits); + uint32 set_flag_bits, bool forget_owner); +static void AbortBufferIO(Buffer buffer); static void shared_buffer_write_error_callback(void *arg); static void local_buffer_write_error_callback(void *arg); static BufferDesc *BufferAlloc(SMgrRelation smgr, @@ -639,7 +666,7 @@ ReadRecentBuffer(RelFileLocator rlocator, ForkNumber forkNum, BlockNumber blockN Assert(BufferIsValid(recent_buffer)); - ResourceOwnerEnlargeBuffers(CurrentResourceOwner); + ResourceOwnerEnlarge(CurrentResourceOwner); ReservePrivateRefCountEntry(); InitBufferTag(&tag, &rlocator, forkNum, blockNum); @@ -1173,7 +1200,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, else { /* Set BM_VALID, terminate IO, and wake up any waiters */ - TerminateBufferIO(bufHdr, false, BM_VALID); + TerminateBufferIO(bufHdr, false, BM_VALID, true); } VacuumPageMiss++; @@ -1228,7 +1255,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, uint32 victim_buf_state; /* Make sure we will have room to remember the buffer pin */ - ResourceOwnerEnlargeBuffers(CurrentResourceOwner); + ResourceOwnerEnlarge(CurrentResourceOwner); ReservePrivateRefCountEntry(); /* create a tag so we can lookup the buffer */ @@ -1315,9 +1342,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, * use. * * We could do this after releasing the partition lock, but then we'd - * have to call ResourceOwnerEnlargeBuffers() & - * ReservePrivateRefCountEntry() before acquiring the lock, for the - * rare case of such a collision. + * have to call ResourceOwnerEnlarge() & ReservePrivateRefCountEntry() + * before acquiring the lock, for the rare case of such a collision. */ UnpinBuffer(victim_buf_hdr); @@ -1595,7 +1621,7 @@ GetVictimBuffer(BufferAccessStrategy strategy, IOContext io_context) * entry, and a resource owner slot for the pin. */ ReservePrivateRefCountEntry(); - ResourceOwnerEnlargeBuffers(CurrentResourceOwner); + ResourceOwnerEnlarge(CurrentResourceOwner); /* we return here if a prospective victim buffer gets used concurrently */ again: @@ -1946,7 +1972,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, int existing_id; /* in case we need to pin an existing buffer below */ - ResourceOwnerEnlargeBuffers(CurrentResourceOwner); + ResourceOwnerEnlarge(CurrentResourceOwner); ReservePrivateRefCountEntry(); InitBufferTag(&tag, &bmr.smgr->smgr_rlocator.locator, fork, first_block + i); @@ -2090,7 +2116,7 @@ ExtendBufferedRelShared(BufferManagerRelation bmr, if (lock) LWLockAcquire(BufferDescriptorGetContentLock(buf_hdr), LW_EXCLUSIVE); - TerminateBufferIO(buf_hdr, false, BM_VALID); + TerminateBufferIO(buf_hdr, false, BM_VALID, true); } pgBufferUsage.shared_blks_written += extend_by; @@ -2283,7 +2309,7 @@ ReleaseAndReadBuffer(Buffer buffer, * taking the buffer header lock; instead update the state variable in loop of * CAS operations. Hopefully it's just a single CAS. * - * Note that ResourceOwnerEnlargeBuffers and ReservePrivateRefCountEntry() + * Note that ResourceOwnerEnlarge() and ReservePrivateRefCountEntry() * must have been done already. * * Returns true if buffer is BM_VALID, else false. This provision allows @@ -2379,7 +2405,7 @@ PinBuffer(BufferDesc *buf, BufferAccessStrategy strategy) * * As this function is called with the spinlock held, the caller has to * previously call ReservePrivateRefCountEntry() and - * ResourceOwnerEnlargeBuffers(CurrentResourceOwner); + * ResourceOwnerEnlarge(CurrentResourceOwner); * * Currently, no callers of this function want to modify the buffer's * usage_count at all, so there's no need for a strategy parameter. @@ -2441,6 +2467,15 @@ PinBuffer_Locked(BufferDesc *buf) static void UnpinBuffer(BufferDesc *buf) { + Buffer b = BufferDescriptorGetBuffer(buf); + + ResourceOwnerForgetBuffer(CurrentResourceOwner, b); + UnpinBufferNoOwner(buf); +} + +static void +UnpinBufferNoOwner(BufferDesc *buf) +{ PrivateRefCountEntry *ref; Buffer b = BufferDescriptorGetBuffer(buf); @@ -2449,9 +2484,6 @@ UnpinBuffer(BufferDesc *buf) /* not moving as we're likely deleting it soon anyway */ ref = GetPrivateRefCountEntry(b, false); Assert(ref != NULL); - - ResourceOwnerForgetBuffer(CurrentResourceOwner, b); - Assert(ref->refcount > 0); ref->refcount--; if (ref->refcount == 0) @@ -3122,7 +3154,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context) /* Make sure we can handle the pin */ ReservePrivateRefCountEntry(); - ResourceOwnerEnlargeBuffers(CurrentResourceOwner); + ResourceOwnerEnlarge(CurrentResourceOwner); /* * Check whether buffer needs writing. @@ -3252,6 +3284,7 @@ CheckForBufferLeaks(void) int RefCountErrors = 0; PrivateRefCountEntry *res; int i; + char *s; /* check the array */ for (i = 0; i < REFCOUNT_ARRAY_ENTRIES; i++) @@ -3260,7 +3293,10 @@ CheckForBufferLeaks(void) if (res->buffer != InvalidBuffer) { - PrintBufferLeakWarning(res->buffer); + s = DebugPrintBufferRefcount(res->buffer); + elog(WARNING, "buffer refcount leak: %s", s); + pfree(s); + RefCountErrors++; } } @@ -3273,7 +3309,9 @@ CheckForBufferLeaks(void) hash_seq_init(&hstat, PrivateRefCountHash); while ((res = (PrivateRefCountEntry *) hash_seq_search(&hstat)) != NULL) { - PrintBufferLeakWarning(res->buffer); + s = DebugPrintBufferRefcount(res->buffer); + elog(WARNING, "buffer refcount leak: %s", s); + pfree(s); RefCountErrors++; } } @@ -3285,12 +3323,13 @@ CheckForBufferLeaks(void) /* * Helper routine to issue warnings when a buffer is unexpectedly pinned */ -void -PrintBufferLeakWarning(Buffer buffer) +char * +DebugPrintBufferRefcount(Buffer buffer) { BufferDesc *buf; int32 loccount; char *path; + char *result; BackendId backend; uint32 buf_state; @@ -3312,13 +3351,13 @@ PrintBufferLeakWarning(Buffer buffer) path = relpathbackend(BufTagGetRelFileLocator(&buf->tag), backend, BufTagGetForkNum(&buf->tag)); buf_state = pg_atomic_read_u32(&buf->state); - elog(WARNING, - "buffer refcount leak: [%03d] " - "(rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)", - buffer, path, - buf->tag.blockNum, buf_state & BUF_FLAG_MASK, - BUF_STATE_GET_REFCOUNT(buf_state), loccount); + + result = psprintf("[%03d] (rel=%s, blockNum=%u, flags=0x%x, refcount=%u %d)", + buffer, path, + buf->tag.blockNum, buf_state & BUF_FLAG_MASK, + BUF_STATE_GET_REFCOUNT(buf_state), loccount); pfree(path); + return result; } /* @@ -3522,7 +3561,7 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object, * Mark the buffer as clean (unless BM_JUST_DIRTIED has become set) and * end the BM_IO_IN_PROGRESS state. */ - TerminateBufferIO(buf, true, 0); + TerminateBufferIO(buf, true, 0, true); TRACE_POSTGRESQL_BUFFER_FLUSH_DONE(BufTagGetForkNum(&buf->tag), buf->tag.blockNum, @@ -4182,7 +4221,7 @@ FlushRelationBuffers(Relation rel) /* Make sure we can handle the pin */ ReservePrivateRefCountEntry(); - ResourceOwnerEnlargeBuffers(CurrentResourceOwner); + ResourceOwnerEnlarge(CurrentResourceOwner); buf_state = LockBufHdr(bufHdr); if (BufTagMatchesRelFileLocator(&bufHdr->tag, &rel->rd_locator) && @@ -4279,7 +4318,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels) /* Make sure we can handle the pin */ ReservePrivateRefCountEntry(); - ResourceOwnerEnlargeBuffers(CurrentResourceOwner); + ResourceOwnerEnlarge(CurrentResourceOwner); buf_state = LockBufHdr(bufHdr); if (BufTagMatchesRelFileLocator(&bufHdr->tag, &srelent->rlocator) && @@ -4489,7 +4528,7 @@ FlushDatabaseBuffers(Oid dbid) /* Make sure we can handle the pin */ ReservePrivateRefCountEntry(); - ResourceOwnerEnlargeBuffers(CurrentResourceOwner); + ResourceOwnerEnlarge(CurrentResourceOwner); buf_state = LockBufHdr(bufHdr); if (bufHdr->tag.dbOid == dbid && @@ -4566,7 +4605,7 @@ void IncrBufferRefCount(Buffer buffer) { Assert(BufferIsPinned(buffer)); - ResourceOwnerEnlargeBuffers(CurrentResourceOwner); + ResourceOwnerEnlarge(CurrentResourceOwner); if (BufferIsLocal(buffer)) LocalRefCount[-buffer - 1]++; else @@ -5164,7 +5203,7 @@ StartBufferIO(BufferDesc *buf, bool forInput) { uint32 buf_state; - ResourceOwnerEnlargeBufferIOs(CurrentResourceOwner); + ResourceOwnerEnlarge(CurrentResourceOwner); for (;;) { @@ -5209,9 +5248,14 @@ StartBufferIO(BufferDesc *buf, bool forInput) * set_flag_bits gets ORed into the buffer's flags. It must include * BM_IO_ERROR in a failure case. For successful completion it could * be 0, or BM_VALID if we just finished reading in the page. + * + * If forget_owner is true, we release the buffer I/O from the current + * resource owner. (forget_owner=false is used when the resource owner itself + * is being released) */ static void -TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits) +TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits, + bool forget_owner) { uint32 buf_state; @@ -5226,8 +5270,9 @@ TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits) buf_state |= set_flag_bits; UnlockBufHdr(buf, buf_state); - ResourceOwnerForgetBufferIO(CurrentResourceOwner, - BufferDescriptorGetBuffer(buf)); + if (forget_owner) + ResourceOwnerForgetBufferIO(CurrentResourceOwner, + BufferDescriptorGetBuffer(buf)); ConditionVariableBroadcast(BufferDescriptorGetIOCV(buf)); } @@ -5240,8 +5285,12 @@ TerminateBufferIO(BufferDesc *buf, bool clear_dirty, uint32 set_flag_bits) * * If I/O was in progress, we always set BM_IO_ERROR, even though it's * possible the error condition wasn't related to the I/O. + * + * Note: this does not remove the buffer I/O from the resource owner. + * That's correct when we're releasing the whole resource owner, but + * beware if you use this in other contexts. */ -void +static void AbortBufferIO(Buffer buffer) { BufferDesc *buf_hdr = GetBufferDescriptor(buffer - 1); @@ -5277,7 +5326,7 @@ AbortBufferIO(Buffer buffer) } } - TerminateBufferIO(buf_hdr, false, BM_IO_ERROR); + TerminateBufferIO(buf_hdr, false, BM_IO_ERROR, false); } /* @@ -5629,3 +5678,42 @@ IssuePendingWritebacks(WritebackContext *wb_context, IOContext io_context) wb_context->nr_pending = 0; } + +/* ResourceOwner callbacks */ + +static void +ResOwnerReleaseBufferIO(Datum res) +{ + Buffer buffer = DatumGetInt32(res); + + AbortBufferIO(buffer); +} + +static char * +ResOwnerPrintBufferIO(Datum res) +{ + Buffer buffer = DatumGetInt32(res); + + return psprintf("lost track of buffer IO on buffer %d", buffer); +} + +static void +ResOwnerReleaseBufferPin(Datum res) +{ + Buffer buffer = DatumGetInt32(res); + + /* Like ReleaseBuffer, but don't call ResourceOwnerForgetBuffer */ + if (!BufferIsValid(buffer)) + elog(ERROR, "bad buffer ID: %d", buffer); + + if (BufferIsLocal(buffer)) + UnpinLocalBufferNoOwner(buffer); + else + UnpinBufferNoOwner(GetBufferDescriptor(buffer - 1)); +} + +static char * +ResOwnerPrintBufferPin(Datum res) +{ + return DebugPrintBufferRefcount(DatumGetInt32(res)); +} |