aboutsummaryrefslogtreecommitdiff
path: root/src/backend/storage/buffer/bufmgr.c
diff options
context:
space:
mode:
authorAndres Freund <andres@anarazel.de>2023-02-09 22:22:26 -0800
committerAndres Freund <andres@anarazel.de>2023-02-09 22:22:26 -0800
commitf30d62c2fc60acfa62d3b83a73dc9bf7f83cfe2f (patch)
treec66e2b3bb3f0fb8c76c6e185fcba9e8e985fde83 /src/backend/storage/buffer/bufmgr.c
parent40d0b2d4153d903d41adee6a303428540389191b (diff)
downloadpostgresql-f30d62c2fc60acfa62d3b83a73dc9bf7f83cfe2f.tar.gz
postgresql-f30d62c2fc60acfa62d3b83a73dc9bf7f83cfe2f.zip
pgstat: Track more detailed relation IO statistics
Commit 28e626bde00 introduced the infrastructure for tracking more detailed IO statistics. This commit adds the actual collection of the new IO statistics for relations and temporary relations. See aforementioned commit for goals and high-level design. The changes in this commit are fairly straight-forward. The bulk of the change is to passing sufficient information to the callsites of pgstat_count_io_op(). A somewhat unsightly detail is that it currently is hard to find a better place to count fsyncs than in md.c, whereas the other pgstat_count_io_op() calls are in bufmgr.c/localbuf.c. As the number of fsyncs is tied to md.c implementation details, it's not obvious there is a better answer. Author: Melanie Plageman <melanieplageman@gmail.com> Reviewed-by: Andres Freund <andres@anarazel.de> Discussion: https://postgr.es/m/20200124195226.lth52iydq2n2uilq@alap3.anarazel.de
Diffstat (limited to 'src/backend/storage/buffer/bufmgr.c')
-rw-r--r--src/backend/storage/buffer/bufmgr.c110
1 files changed, 94 insertions, 16 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 1bc8d6555b9..2d6dbc65612 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -472,8 +472,9 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr,
ForkNumber forkNum,
BlockNumber blockNum,
BufferAccessStrategy strategy,
- bool *foundPtr);
-static void FlushBuffer(BufferDesc *buf, SMgrRelation reln);
+ bool *foundPtr, IOContext *io_context);
+static void FlushBuffer(BufferDesc *buf, SMgrRelation reln,
+ IOObject io_object, IOContext io_context);
static void FindAndDropRelationBuffers(RelFileLocator rlocator,
ForkNumber forkNum,
BlockNumber nForkBlock,
@@ -814,6 +815,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
BufferDesc *bufHdr;
Block bufBlock;
bool found;
+ IOContext io_context;
+ IOObject io_object;
bool isExtend;
bool isLocalBuf = SmgrIsTemp(smgr);
@@ -846,7 +849,14 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
if (isLocalBuf)
{
- bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
+ /*
+ * LocalBufferAlloc() will set the io_context to IOCONTEXT_NORMAL. We
+ * do not use a BufferAccessStrategy for I/O of temporary tables.
+ * However, in some cases, the "strategy" may not be NULL, so we can't
+ * rely on IOContextForStrategy() to set the right IOContext for us.
+ * This may happen in cases like CREATE TEMPORARY TABLE AS...
+ */
+ bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found, &io_context);
if (found)
pgBufferUsage.local_blks_hit++;
else if (isExtend)
@@ -862,7 +872,7 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* not currently in memory.
*/
bufHdr = BufferAlloc(smgr, relpersistence, forkNum, blockNum,
- strategy, &found);
+ strategy, &found, &io_context);
if (found)
pgBufferUsage.shared_blks_hit++;
else if (isExtend)
@@ -977,7 +987,16 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
*/
Assert(!(pg_atomic_read_u32(&bufHdr->state) & BM_VALID)); /* spinlock not needed */
- bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr);
+ if (isLocalBuf)
+ {
+ bufBlock = LocalBufHdrGetBlock(bufHdr);
+ io_object = IOOBJECT_TEMP_RELATION;
+ }
+ else
+ {
+ bufBlock = BufHdrGetBlock(bufHdr);
+ io_object = IOOBJECT_RELATION;
+ }
if (isExtend)
{
@@ -986,6 +1005,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
/* don't set checksum for all-zero page */
smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, false);
+ pgstat_count_io_op(io_object, io_context, IOOP_EXTEND);
+
/*
* NB: we're *not* doing a ScheduleBufferTagForWriteback here;
* although we're essentially performing a write. At least on linux
@@ -1013,6 +1034,8 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
+ pgstat_count_io_op(io_object, io_context, IOOP_READ);
+
if (track_io_timing)
{
INSTR_TIME_SET_CURRENT(io_time);
@@ -1106,14 +1129,19 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* *foundPtr is actually redundant with the buffer's BM_VALID flag, but
* we keep it for simplicity in ReadBuffer.
*
+ * io_context is passed as an output parameter to avoid calling
+ * IOContextForStrategy() when there is a shared buffers hit and no IO
+ * statistics need be captured.
+ *
* No locks are held either at entry or exit.
*/
static BufferDesc *
BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
BlockNumber blockNum,
BufferAccessStrategy strategy,
- bool *foundPtr)
+ bool *foundPtr, IOContext *io_context)
{
+ bool from_ring;
BufferTag newTag; /* identity of requested block */
uint32 newHash; /* hash value for newTag */
LWLock *newPartitionLock; /* buffer partition lock for it */
@@ -1165,8 +1193,11 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
{
/*
* If we get here, previous attempts to read the buffer must
- * have failed ... but we shall bravely try again.
+ * have failed ... but we shall bravely try again. Set
+ * io_context since we will in fact need to count an IO
+ * Operation.
*/
+ *io_context = IOContextForStrategy(strategy);
*foundPtr = false;
}
}
@@ -1180,6 +1211,8 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
*/
LWLockRelease(newPartitionLock);
+ *io_context = IOContextForStrategy(strategy);
+
/* Loop here in case we have to try another victim buffer */
for (;;)
{
@@ -1193,7 +1226,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
* Select a victim buffer. The buffer is returned with its header
* spinlock still held!
*/
- buf = StrategyGetBuffer(strategy, &buf_state);
+ buf = StrategyGetBuffer(strategy, &buf_state, &from_ring);
Assert(BUF_STATE_GET_REFCOUNT(buf_state) == 0);
@@ -1247,7 +1280,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
UnlockBufHdr(buf, buf_state);
if (XLogNeedsFlush(lsn) &&
- StrategyRejectBuffer(strategy, buf))
+ StrategyRejectBuffer(strategy, buf, from_ring))
{
/* Drop lock/pin and loop around for another buffer */
LWLockRelease(BufferDescriptorGetContentLock(buf));
@@ -1262,7 +1295,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
smgr->smgr_rlocator.locator.dbOid,
smgr->smgr_rlocator.locator.relNumber);
- FlushBuffer(buf, NULL);
+ FlushBuffer(buf, NULL, IOOBJECT_RELATION, *io_context);
LWLockRelease(BufferDescriptorGetContentLock(buf));
ScheduleBufferTagForWriteback(&BackendWritebackContext,
@@ -1443,6 +1476,28 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
LWLockRelease(newPartitionLock);
+ if (oldFlags & BM_VALID)
+ {
+ /*
+ * When a BufferAccessStrategy is in use, blocks evicted from shared
+ * buffers are counted as IOOP_EVICT in the corresponding context
+ * (e.g. IOCONTEXT_BULKWRITE). Shared buffers are evicted by a
+ * strategy in two cases: 1) while initially claiming buffers for the
+ * strategy ring 2) to replace an existing strategy ring buffer
+ * because it is pinned or in use and cannot be reused.
+ *
+ * Blocks evicted from buffers already in the strategy ring are
+ * counted as IOOP_REUSE in the corresponding strategy context.
+ *
+ * At this point, we can accurately count evictions and reuses,
+ * because we have successfully claimed the valid buffer. Previously,
+ * we may have been forced to release the buffer due to concurrent
+ * pinners or erroring out.
+ */
+ pgstat_count_io_op(IOOBJECT_RELATION, *io_context,
+ from_ring ? IOOP_REUSE : IOOP_EVICT);
+ }
+
/*
* Buffer contents are currently invalid. Try to obtain the right to
* start I/O. If StartBufferIO returns false, then someone else managed
@@ -2563,7 +2618,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used, WritebackContext *wb_context)
PinBuffer_Locked(bufHdr);
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
- FlushBuffer(bufHdr, NULL);
+ FlushBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
@@ -2813,7 +2868,8 @@ BufferGetTag(Buffer buffer, RelFileLocator *rlocator, ForkNumber *forknum,
* as the second parameter. If not, pass NULL.
*/
static void
-FlushBuffer(BufferDesc *buf, SMgrRelation reln)
+FlushBuffer(BufferDesc *buf, SMgrRelation reln, IOObject io_object,
+ IOContext io_context)
{
XLogRecPtr recptr;
ErrorContextCallback errcallback;
@@ -2907,6 +2963,26 @@ FlushBuffer(BufferDesc *buf, SMgrRelation reln)
bufToWrite,
false);
+ /*
+ * When a strategy is in use, only flushes of dirty buffers already in the
+ * strategy ring are counted as strategy writes (IOCONTEXT
+ * [BULKREAD|BULKWRITE|VACUUM] IOOP_WRITE) for the purpose of IO
+ * statistics tracking.
+ *
+ * If a shared buffer initially added to the ring must be flushed before
+ * being used, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE.
+ *
+ * If a shared buffer which was added to the ring later because the
+ * current strategy buffer is pinned or in use or because all strategy
+ * buffers were dirty and rejected (for BAS_BULKREAD operations only)
+ * requires flushing, this is counted as an IOCONTEXT_NORMAL IOOP_WRITE
+ * (from_ring will be false).
+ *
+ * When a strategy is not in use, the write can only be a "regular" write
+ * of a dirty shared buffer (IOCONTEXT_NORMAL IOOP_WRITE).
+ */
+ pgstat_count_io_op(IOOBJECT_RELATION, io_context, IOOP_WRITE);
+
if (track_io_timing)
{
INSTR_TIME_SET_CURRENT(io_time);
@@ -3549,6 +3625,8 @@ FlushRelationBuffers(Relation rel)
buf_state &= ~(BM_DIRTY | BM_JUST_DIRTIED);
pg_atomic_unlocked_write_u32(&bufHdr->state, buf_state);
+ pgstat_count_io_op(IOOBJECT_TEMP_RELATION, IOCONTEXT_NORMAL, IOOP_WRITE);
+
/* Pop the error context stack */
error_context_stack = errcallback.previous;
}
@@ -3581,7 +3659,7 @@ FlushRelationBuffers(Relation rel)
{
PinBuffer_Locked(bufHdr);
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
- FlushBuffer(bufHdr, RelationGetSmgr(rel));
+ FlushBuffer(bufHdr, RelationGetSmgr(rel), IOOBJECT_RELATION, IOCONTEXT_NORMAL);
LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
UnpinBuffer(bufHdr);
}
@@ -3679,7 +3757,7 @@ FlushRelationsAllBuffers(SMgrRelation *smgrs, int nrels)
{
PinBuffer_Locked(bufHdr);
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
- FlushBuffer(bufHdr, srelent->srel);
+ FlushBuffer(bufHdr, srelent->srel, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
UnpinBuffer(bufHdr);
}
@@ -3889,7 +3967,7 @@ FlushDatabaseBuffers(Oid dbid)
{
PinBuffer_Locked(bufHdr);
LWLockAcquire(BufferDescriptorGetContentLock(bufHdr), LW_SHARED);
- FlushBuffer(bufHdr, NULL);
+ FlushBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
LWLockRelease(BufferDescriptorGetContentLock(bufHdr));
UnpinBuffer(bufHdr);
}
@@ -3916,7 +3994,7 @@ FlushOneBuffer(Buffer buffer)
Assert(LWLockHeldByMe(BufferDescriptorGetContentLock(bufHdr)));
- FlushBuffer(bufHdr, NULL);
+ FlushBuffer(bufHdr, NULL, IOOBJECT_RELATION, IOCONTEXT_NORMAL);
}
/*