diff options
author | Andres Freund <andres@anarazel.de> | 2023-02-08 20:53:42 -0800 |
---|---|---|
committer | Andres Freund <andres@anarazel.de> | 2023-02-08 20:53:42 -0800 |
commit | 28e626bde00efe8051fbd677227c97b1aa3c6a1a (patch) | |
tree | 4f54973fcd7b65d71d5003f2385aafbda771c27b /src/backend/utils/activity/pgstat_io.c | |
parent | 49c2c5fcb1e1e4311f08687f78dd9121330ae637 (diff) | |
download | postgresql-28e626bde00efe8051fbd677227c97b1aa3c6a1a.tar.gz postgresql-28e626bde00efe8051fbd677227c97b1aa3c6a1a.zip |
pgstat: Infrastructure for more detailed IO statistics
This commit adds the infrastructure for more detailed IO statistics. The calls
to actually count IOs, a system view to access the new statistics,
documentation and tests will be added in subsequent commits, to make review
easier.
While we already had some IO statistics, e.g. in pg_stat_bgwriter and
pg_stat_database, they did not provide sufficient detail to understand what
the main sources of IO are, or whether configuration changes could avoid
IO. E.g., pg_stat_bgwriter.buffers_backend does contain the number of buffers
written out by a backend, but as that includes extending relations (always
done by backends) and writes triggered by the use of buffer access strategies,
it cannot easily be used to tune background writer or checkpointer. Similarly,
pg_stat_database.blks_read cannot easily be used to tune shared_buffers /
compute a cache hit ratio, as the use of buffer access strategies will often
prevent a large fraction of the read blocks to end up in shared_buffers.
The new IO statistics count IO operations (evict, extend, fsync, read, reuse,
and write), and are aggregated for each combination of backend type (backend,
autovacuum worker, bgwriter, etc), target object of the IO (relations, temp
relations) and context of the IO (normal, vacuum, bulkread, bulkwrite).
What is tracked in this series of patches, is sufficient to perform the
aforementioned analyses. Further details, e.g. tracking the number of buffer
hits, would make that even easier, but was left out for now, to keep the scope
of the already large patchset manageable.
Bumps PGSTAT_FILE_FORMAT_ID.
Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Andres Freund <andres@anarazel.de>
Reviewed-by: Justin Pryzby <pryzby@telsasoft.com>
Reviewed-by: Kyotaro Horiguchi <horikyota.ntt@gmail.com>
Discussion: https://postgr.es/m/20200124195226.lth52iydq2n2uilq@alap3.anarazel.de
Diffstat (limited to 'src/backend/utils/activity/pgstat_io.c')
-rw-r--r-- | src/backend/utils/activity/pgstat_io.c | 391 |
1 files changed, 391 insertions, 0 deletions
diff --git a/src/backend/utils/activity/pgstat_io.c b/src/backend/utils/activity/pgstat_io.c new file mode 100644 index 00000000000..0e07e0848d3 --- /dev/null +++ b/src/backend/utils/activity/pgstat_io.c @@ -0,0 +1,391 @@ +/* ------------------------------------------------------------------------- + * + * pgstat_io.c + * Implementation of IO statistics. + * + * This file contains the implementation of IO statistics. It is kept separate + * from pgstat.c to enforce the line between the statistics access / storage + * implementation and the details about individual types of statistics. + * + * Copyright (c) 2021-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/activity/pgstat_io.c + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "utils/pgstat_internal.h" + + +static PgStat_BktypeIO PendingIOStats; +bool have_iostats = false; + + +/* + * Check that stats have not been counted for any combination of IOObject, + * IOContext, and IOOp which are not tracked for the passed-in BackendType. The + * passed-in PgStat_BktypeIO must contain stats from the BackendType specified + * by the second parameter. Caller is responsible for locking the passed-in + * PgStat_BktypeIO, if needed. + */ +bool +pgstat_bktype_io_stats_valid(PgStat_BktypeIO *backend_io, + BackendType bktype) +{ + bool bktype_tracked = pgstat_tracks_io_bktype(bktype); + + for (IOObject io_object = IOOBJECT_FIRST; + io_object < IOOBJECT_NUM_TYPES; io_object++) + { + for (IOContext io_context = IOCONTEXT_FIRST; + io_context < IOCONTEXT_NUM_TYPES; io_context++) + { + /* + * Don't bother trying to skip to the next loop iteration if + * pgstat_tracks_io_object() would return false here. We still + * need to validate that each counter is zero anyway. + */ + for (IOOp io_op = IOOP_FIRST; io_op < IOOP_NUM_TYPES; io_op++) + { + /* No stats, so nothing to validate */ + if (backend_io->data[io_object][io_context][io_op] == 0) + continue; + + /* There are stats and there shouldn't be */ + if (!bktype_tracked || + !pgstat_tracks_io_op(bktype, io_object, io_context, io_op)) + return false; + } + } + } + + return true; +} + +void +pgstat_count_io_op(IOObject io_object, IOContext io_context, IOOp io_op) +{ + Assert(io_object < IOOBJECT_NUM_TYPES); + Assert(io_context < IOCONTEXT_NUM_TYPES); + Assert(io_op < IOOP_NUM_TYPES); + Assert(pgstat_tracks_io_op(MyBackendType, io_object, io_context, io_op)); + + PendingIOStats.data[io_object][io_context][io_op]++; + + have_iostats = true; +} + +PgStat_IO * +pgstat_fetch_stat_io(void) +{ + pgstat_snapshot_fixed(PGSTAT_KIND_IO); + + return &pgStatLocal.snapshot.io; +} + +/* + * Flush out locally pending IO statistics + * + * If no stats have been recorded, this function returns false. + * + * If nowait is true, this function returns true if the lock could not be + * acquired. Otherwise, return false. + */ +bool +pgstat_flush_io(bool nowait) +{ + LWLock *bktype_lock; + PgStat_BktypeIO *bktype_shstats; + + if (!have_iostats) + return false; + + bktype_lock = &pgStatLocal.shmem->io.locks[MyBackendType]; + bktype_shstats = + &pgStatLocal.shmem->io.stats.stats[MyBackendType]; + + if (!nowait) + LWLockAcquire(bktype_lock, LW_EXCLUSIVE); + else if (!LWLockConditionalAcquire(bktype_lock, LW_EXCLUSIVE)) + return true; + + for (IOObject io_object = IOOBJECT_FIRST; + io_object < IOOBJECT_NUM_TYPES; io_object++) + { + for (IOContext io_context = IOCONTEXT_FIRST; + io_context < IOCONTEXT_NUM_TYPES; io_context++) + { + for (IOOp io_op = IOOP_FIRST; + io_op < IOOP_NUM_TYPES; io_op++) + bktype_shstats->data[io_object][io_context][io_op] += + PendingIOStats.data[io_object][io_context][io_op]; + } + } + + Assert(pgstat_bktype_io_stats_valid(bktype_shstats, MyBackendType)); + + LWLockRelease(bktype_lock); + + memset(&PendingIOStats, 0, sizeof(PendingIOStats)); + + have_iostats = false; + + return false; +} + +const char * +pgstat_get_io_context_name(IOContext io_context) +{ + switch (io_context) + { + case IOCONTEXT_BULKREAD: + return "bulkread"; + case IOCONTEXT_BULKWRITE: + return "bulkwrite"; + case IOCONTEXT_NORMAL: + return "normal"; + case IOCONTEXT_VACUUM: + return "vacuum"; + } + + elog(ERROR, "unrecognized IOContext value: %d", io_context); + pg_unreachable(); +} + +const char * +pgstat_get_io_object_name(IOObject io_object) +{ + switch (io_object) + { + case IOOBJECT_RELATION: + return "relation"; + case IOOBJECT_TEMP_RELATION: + return "temp relation"; + } + + elog(ERROR, "unrecognized IOObject value: %d", io_object); + pg_unreachable(); +} + +void +pgstat_io_reset_all_cb(TimestampTz ts) +{ + for (int i = 0; i < BACKEND_NUM_TYPES; i++) + { + LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i]; + PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i]; + + LWLockAcquire(bktype_lock, LW_EXCLUSIVE); + + /* + * Use the lock in the first BackendType's PgStat_BktypeIO to protect + * the reset timestamp as well. + */ + if (i == 0) + pgStatLocal.shmem->io.stats.stat_reset_timestamp = ts; + + memset(bktype_shstats, 0, sizeof(*bktype_shstats)); + LWLockRelease(bktype_lock); + } +} + +void +pgstat_io_snapshot_cb(void) +{ + for (int i = 0; i < BACKEND_NUM_TYPES; i++) + { + LWLock *bktype_lock = &pgStatLocal.shmem->io.locks[i]; + PgStat_BktypeIO *bktype_shstats = &pgStatLocal.shmem->io.stats.stats[i]; + PgStat_BktypeIO *bktype_snap = &pgStatLocal.snapshot.io.stats[i]; + + LWLockAcquire(bktype_lock, LW_SHARED); + + /* + * Use the lock in the first BackendType's PgStat_BktypeIO to protect + * the reset timestamp as well. + */ + if (i == 0) + pgStatLocal.snapshot.io.stat_reset_timestamp = + pgStatLocal.shmem->io.stats.stat_reset_timestamp; + + /* using struct assignment due to better type safety */ + *bktype_snap = *bktype_shstats; + LWLockRelease(bktype_lock); + } +} + +/* +* IO statistics are not collected for all BackendTypes. +* +* The following BackendTypes do not participate in the cumulative stats +* subsystem or do not perform IO on which we currently track: +* - Syslogger because it is not connected to shared memory +* - Archiver because most relevant archiving IO is delegated to a +* specialized command or module +* - WAL Receiver and WAL Writer IO is not tracked in pg_stat_io for now +* +* Function returns true if BackendType participates in the cumulative stats +* subsystem for IO and false if it does not. +* +* When adding a new BackendType, also consider adding relevant restrictions to +* pgstat_tracks_io_object() and pgstat_tracks_io_op(). +*/ +bool +pgstat_tracks_io_bktype(BackendType bktype) +{ + /* + * List every type so that new backend types trigger a warning about + * needing to adjust this switch. + */ + switch (bktype) + { + case B_INVALID: + case B_ARCHIVER: + case B_LOGGER: + case B_WAL_RECEIVER: + case B_WAL_WRITER: + return false; + + case B_AUTOVAC_LAUNCHER: + case B_AUTOVAC_WORKER: + case B_BACKEND: + case B_BG_WORKER: + case B_BG_WRITER: + case B_CHECKPOINTER: + case B_STANDALONE_BACKEND: + case B_STARTUP: + case B_WAL_SENDER: + return true; + } + + return false; +} + +/* + * Some BackendTypes do not perform IO on certain IOObjects or in certain + * IOContexts. Some IOObjects are never operated on in some IOContexts. Check + * that the given BackendType is expected to do IO in the given IOContext and + * on the given IOObject and that the given IOObject is expected to be operated + * on in the given IOContext. + */ +bool +pgstat_tracks_io_object(BackendType bktype, IOObject io_object, + IOContext io_context) +{ + bool no_temp_rel; + + /* + * Some BackendTypes should never track IO statistics. + */ + if (!pgstat_tracks_io_bktype(bktype)) + return false; + + /* + * Currently, IO on temporary relations can only occur in the + * IOCONTEXT_NORMAL IOContext. + */ + if (io_context != IOCONTEXT_NORMAL && + io_object == IOOBJECT_TEMP_RELATION) + return false; + + /* + * In core Postgres, only regular backends and WAL Sender processes + * executing queries will use local buffers and operate on temporary + * relations. Parallel workers will not use local buffers (see + * InitLocalBuffers()); however, extensions leveraging background workers + * have no such limitation, so track IO on IOOBJECT_TEMP_RELATION for + * BackendType B_BG_WORKER. + */ + no_temp_rel = bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || + bktype == B_CHECKPOINTER || bktype == B_AUTOVAC_WORKER || + bktype == B_STANDALONE_BACKEND || bktype == B_STARTUP; + + if (no_temp_rel && io_context == IOCONTEXT_NORMAL && + io_object == IOOBJECT_TEMP_RELATION) + return false; + + /* + * Some BackendTypes do not currently perform any IO in certain + * IOContexts, and, while it may not be inherently incorrect for them to + * do so, excluding those rows from the view makes the view easier to use. + */ + if ((bktype == B_CHECKPOINTER || bktype == B_BG_WRITER) && + (io_context == IOCONTEXT_BULKREAD || + io_context == IOCONTEXT_BULKWRITE || + io_context == IOCONTEXT_VACUUM)) + return false; + + if (bktype == B_AUTOVAC_LAUNCHER && io_context == IOCONTEXT_VACUUM) + return false; + + if ((bktype == B_AUTOVAC_WORKER || bktype == B_AUTOVAC_LAUNCHER) && + io_context == IOCONTEXT_BULKWRITE) + return false; + + return true; +} + +/* + * Some BackendTypes will never do certain IOOps and some IOOps should not + * occur in certain IOContexts or on certain IOObjects. Check that the given + * IOOp is valid for the given BackendType in the given IOContext and on the + * given IOObject. Note that there are currently no cases of an IOOp being + * invalid for a particular BackendType only within a certain IOContext and/or + * only on a certain IOObject. + */ +bool +pgstat_tracks_io_op(BackendType bktype, IOObject io_object, + IOContext io_context, IOOp io_op) +{ + bool strategy_io_context; + + /* if (io_context, io_object) will never collect stats, we're done */ + if (!pgstat_tracks_io_object(bktype, io_object, io_context)) + return false; + + /* + * Some BackendTypes will not do certain IOOps. + */ + if ((bktype == B_BG_WRITER || bktype == B_CHECKPOINTER) && + (io_op == IOOP_READ || io_op == IOOP_EVICT)) + return false; + + if ((bktype == B_AUTOVAC_LAUNCHER || bktype == B_BG_WRITER || + bktype == B_CHECKPOINTER) && io_op == IOOP_EXTEND) + return false; + + /* + * Some IOOps are not valid in certain IOContexts and some IOOps are only + * valid in certain contexts. + */ + if (io_context == IOCONTEXT_BULKREAD && io_op == IOOP_EXTEND) + return false; + + strategy_io_context = io_context == IOCONTEXT_BULKREAD || + io_context == IOCONTEXT_BULKWRITE || io_context == IOCONTEXT_VACUUM; + + /* + * IOOP_REUSE is only relevant when a BufferAccessStrategy is in use. + */ + if (!strategy_io_context && io_op == IOOP_REUSE) + return false; + + /* + * IOOP_FSYNC IOOps done by a backend using a BufferAccessStrategy are + * counted in the IOCONTEXT_NORMAL IOContext. See comment in + * register_dirty_segment() for more details. + */ + if (strategy_io_context && io_op == IOOP_FSYNC) + return false; + + /* + * Temporary tables are not logged and thus do not require fsync'ing. + */ + if (io_context == IOCONTEXT_NORMAL && + io_object == IOOBJECT_TEMP_RELATION && io_op == IOOP_FSYNC) + return false; + + return true; +} |