diff options
-rw-r--r-- | doc/src/sgml/system-views.sgml | 294 | ||||
-rw-r--r-- | src/backend/catalog/system_views.sql | 7 | ||||
-rw-r--r-- | src/backend/storage/aio/Makefile | 1 | ||||
-rw-r--r-- | src/backend/storage/aio/aio_funcs.c | 230 | ||||
-rw-r--r-- | src/backend/storage/aio/meson.build | 1 | ||||
-rw-r--r-- | src/include/catalog/catversion.h | 2 | ||||
-rw-r--r-- | src/include/catalog/pg_proc.dat | 9 | ||||
-rw-r--r-- | src/test/regress/expected/privileges.out | 18 | ||||
-rw-r--r-- | src/test/regress/expected/rules.out | 16 | ||||
-rw-r--r-- | src/test/regress/sql/privileges.sql | 3 |
10 files changed, 580 insertions, 1 deletions
diff --git a/doc/src/sgml/system-views.sgml b/doc/src/sgml/system-views.sgml index 3f5a306247e..e9a59af8c34 100644 --- a/doc/src/sgml/system-views.sgml +++ b/doc/src/sgml/system-views.sgml @@ -52,6 +52,11 @@ <tbody> <row> + <entry><link linkend="view-pg-aios"><structname>pg_aios</structname></link></entry> + <entry>In-use asynchronous IO handles</entry> + </row> + + <row> <entry><link linkend="view-pg-available-extensions"><structname>pg_available_extensions</structname></link></entry> <entry>available extensions</entry> </row> @@ -231,6 +236,295 @@ </table> </sect1> + <sect1 id="view-pg-aios"> + <title><structname>pg_aios</structname></title> + + <indexterm zone="view-pg-aios"> + <primary>pg_aios</primary> + </indexterm> + + <para> + The <structname>pg_aios</structname> view lists all <xref + linkend="glossary-aio"/> handles that are currently in-use. An I/O handle + is used to reference an I/O operation that is being prepared, executed or + is in the process of completing. <structname>pg_aios</structname> contains + one row for each I/O handle. + </para> + + <para> + This view is mainly useful for developers of + <productname>PostgreSQL</productname>, but may also be useful when tuning + <productname>PostgreSQL</productname>. + </para> + + <table> + <title><structname>pg_aios</structname> Columns</title> + <tgroup cols="1"> + <thead> + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + Column Type + </para> + <para> + Description + </para></entry> + </row> + </thead> + + <tbody> + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>pid</structfield> <type>int4</type> + </para> + <para> + Process ID of the server process that is issuing this I/O. + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>io_id</structfield> <type>int4</type> + </para> + <para> + Identifier of the I/O handle. Handles are reused once the I/O + completed (or if the handle is released before I/O is started). On reuse + <link linkend="view-pg-aios-io-generation"> + <structname>pg_aios</structname>.<structfield>io_generation</structfield> + </link> + is incremented. + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry" id="view-pg-aios-io-generation"><para role="column_definition"> + <structfield>io_generation</structfield> <type>int8</type> + </para> + <para> + Generation of the I/O handle. + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>state</structfield> <type>text</type> + </para> + <para> + State of the I/O handle: + <itemizedlist> + <listitem> + <para> + <literal>HANDED_OUT</literal>, referenced by code but not yet used + </para> + </listitem> + <listitem> + <para> + <literal>DEFINED</literal>, information necessary for execution is known + </para> + </listitem> + <listitem> + <para> + <literal>STAGED</literal>, ready for execution + </para> + </listitem> + <listitem> + <para> + <literal>SUBMITTED</literal>, submitted for execution + </para> + </listitem> + <listitem> + <para> + <literal>COMPLETED_IO</literal>, finished, but result has not yet been processed + </para> + </listitem> + <listitem> + <para> + <literal>COMPLETED_SHARED</literal>, shared completion processing completed + </para> + </listitem> + <listitem> + <para> + <literal>COMPLETED_LOCAL</literal>, backend local completion processing completed + </para> + </listitem> + </itemizedlist> + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>operation</structfield> <type>text</type> + </para> + <para> + Operation performed using the I/O handle: + <itemizedlist> + <listitem> + <para> + <literal>invalid</literal>, not yet known + </para> + </listitem> + <listitem> + <para> + <literal>readv</literal>, a vectored read + </para> + </listitem> + <listitem> + <para> + <literal>writev</literal>, a vectored write + </para> + </listitem> + </itemizedlist> + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>off</structfield> <type>int8</type> + </para> + <para> + Offset of the I/O operation. + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>length</structfield> <type>int8</type> + </para> + <para> + Length of the I/O operation. + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>target</structfield> <type>text</type> + </para> + <para> + What kind of object is the I/O targeting: + <itemizedlist spacing="compact"> + <listitem> + <para> + <literal>smgr</literal>, I/O on relations + </para> + </listitem> + </itemizedlist> + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>handle_data_len</structfield> <type>int2</type> + </para> + <para> + Length of the data associated with the I/O operation. For I/O to/from + <xref linkend="guc-shared-buffers"/> and <xref + linkend="guc-temp-buffers"/>, this indicates the number of buffers the + I/O is operating on. + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>raw_result</structfield> <type>int4</type> + </para> + <para> + Low-level result of the I/O operation, or NULL if the operation has not + yet completed. + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>result</structfield> <type>text</type> + </para> + <para> + High-level result of the I/O operation: + <itemizedlist> + <listitem> + <para> + <literal>UNKNOWN</literal> means that the result of the + operation is not yet known. + </para> + </listitem> + <listitem> + <para> + <literal>OK</literal> means the I/O completed successfully. + </para> + </listitem> + <listitem> + <para> + <literal>PARTIAL</literal> means that the I/O completed without + error, but did not process all data. Commonly callers will need to + retry and perform the remainder of the work in a separate I/O. + </para> + </listitem> + <listitem> + <para> + <literal>WARNING</literal> means that the I/O completed without + error, but that execution of the IO triggered a warning. E.g. when + encountering a corrupted buffer with <xref + linkend="guc-zero-damaged-pages"/> enabled. + </para> + </listitem> + <listitem> + <para> + <literal>ERROR</literal> means the I/O failed with an error. + </para> + </listitem> + </itemizedlist> + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>target_desc</structfield> <type>text</type> + </para> + <para> + Description of what the I/O operation is targeting. + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>f_sync</structfield> <type>bool</type> + </para> + <para> + Flag indicating whether the I/O is executed synchronously. + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>f_localmem</structfield> <type>bool</type> + </para> + <para> + Flag indicating whether the I/O references process local memory. + </para></entry> + </row> + + <row> + <entry role="catalog_table_entry"><para role="column_definition"> + <structfield>f_buffered</structfield> <type>bool</type> + </para> + <para> + Flag indicating whether the I/O is buffered I/O. + </para></entry> + </row> + + </tbody> + </tgroup> + </table> + + <para> + The <structname>pg_aios</structname> view is read-only. + </para> + + <para> + By default, the <structname>pg_aios</structname> view can be read only by + superusers or roles with privileges of the + <literal>pg_read_all_stats</literal> role. + </para> + </sect1> + <sect1 id="view-pg-available-extensions"> <title><structname>pg_available_extensions</structname></title> diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 31d269b7ee0..64a7240aa77 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -1391,3 +1391,10 @@ CREATE VIEW pg_stat_subscription_stats AS CREATE VIEW pg_wait_events AS SELECT * FROM pg_get_wait_events(); + +CREATE VIEW pg_aios AS + SELECT * FROM pg_get_aios(); +REVOKE ALL ON pg_aios FROM PUBLIC; +GRANT SELECT ON pg_aios TO pg_read_all_stats; +REVOKE EXECUTE ON FUNCTION pg_get_aios() FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pg_get_aios() TO pg_read_all_stats; diff --git a/src/backend/storage/aio/Makefile b/src/backend/storage/aio/Makefile index c06c50771e0..3f2469cc399 100644 --- a/src/backend/storage/aio/Makefile +++ b/src/backend/storage/aio/Makefile @@ -11,6 +11,7 @@ include $(top_builddir)/src/Makefile.global OBJS = \ aio.o \ aio_callback.o \ + aio_funcs.o \ aio_init.o \ aio_io.o \ aio_target.o \ diff --git a/src/backend/storage/aio/aio_funcs.c b/src/backend/storage/aio/aio_funcs.c new file mode 100644 index 00000000000..584e683371a --- /dev/null +++ b/src/backend/storage/aio/aio_funcs.c @@ -0,0 +1,230 @@ +/*------------------------------------------------------------------------- + * + * aio_funcs.c + * AIO - SQL interface for AIO + * + * + * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/storage/aio/aio_funcs.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "fmgr.h" +#include "funcapi.h" +#include "nodes/execnodes.h" +#include "port/atomics.h" +#include "storage/aio_internal.h" +#include "storage/lock.h" +#include "storage/proc.h" +#include "storage/procnumber.h" +#include "utils/builtins.h" +#include "utils/fmgrprotos.h" +#include "utils/tuplestore.h" + + +/* + * Byte length of an iovec. + */ +static size_t +iov_byte_length(const struct iovec *iov, int cnt) +{ + size_t len = 0; + + for (int i = 0; i < cnt; i++) + { + len += iov[i].iov_len; + } + + return len; +} + +Datum +pg_get_aios(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + + InitMaterializedSRF(fcinfo, 0); + +#define PG_GET_AIOS_COLS 15 + + for (uint64 i = 0; i < pgaio_ctl->io_handle_count; i++) + { + PgAioHandle *live_ioh = &pgaio_ctl->io_handles[i]; + uint32 ioh_id = pgaio_io_get_id(live_ioh); + Datum values[PG_GET_AIOS_COLS] = {0}; + bool nulls[PG_GET_AIOS_COLS] = {0}; + ProcNumber owner; + PGPROC *owner_proc; + int32 owner_pid; + PgAioHandleState start_state; + uint64 start_generation; + PgAioHandle ioh_copy; + struct iovec iov_copy[PG_IOV_MAX]; + + + /* + * There is no lock that could prevent the state of the IO to advance + * concurrently - and we don't want to introduce one, as that would + * introduce atomics into a very common path. Instead we + * + * 1) Determine the state + generation of the IO. + * + * 2) Copy the IO to local memory. + * + * 3) Check if state or generation of the IO changed. If the state + * changed, retry, if the generation changed don't display the IO. + */ + + /* 1) from above */ + start_generation = live_ioh->generation; + + /* + * Retry at this point, so we can accept changing states, but not + * changing generations. + */ +retry: + pg_read_barrier(); + start_state = live_ioh->state; + + if (start_state == PGAIO_HS_IDLE) + continue; + + /* 2) from above */ + memcpy(&ioh_copy, live_ioh, sizeof(PgAioHandle)); + + /* + * Safe to copy even if no iovec is used - we always reserve the + * required space. + */ + memcpy(&iov_copy, &pgaio_ctl->iovecs[ioh_copy.iovec_off], + PG_IOV_MAX * sizeof(struct iovec)); + + /* + * Copy information about owner before 3) below, if the process exited + * it'd have to wait for the IO to finish first, which we would detect + * in 3). + */ + owner = ioh_copy.owner_procno; + owner_proc = GetPGProcByNumber(owner); + owner_pid = owner_proc->pid; + + /* 3) from above */ + pg_read_barrier(); + + /* + * The IO completed and a new one was started with the same ID. Don't + * display it - it really started after this function was called. + * There be a risk of a livelock if we just retried endlessly, if IOs + * complete very quickly. + */ + if (live_ioh->generation != start_generation) + continue; + + /* + * The IO's state changed while we were "rendering" it. Just start + * from scratch. There's no risk of a livelock here, as an IO has a + * limited sets of states it can be in, and state changes go only in a + * single direction. + */ + if (live_ioh->state != start_state) + goto retry; + + /* + * Now that we have copied the IO into local memory and checked that + * it's still in the same state, we are not allowed to access "live" + * memory anymore. To make it slightly easier to catch such cases, set + * the "live" pointers to NULL. + */ + live_ioh = NULL; + owner_proc = NULL; + + + /* column: owning pid */ + if (owner_pid != 0) + values[0] = Int32GetDatum(owner_pid); + else + nulls[0] = false; + + /* column: IO's id */ + values[1] = ioh_id; + + /* column: IO's generation */ + values[2] = Int64GetDatum(start_generation); + + /* column: IO's state */ + values[3] = CStringGetTextDatum(pgaio_io_get_state_name(&ioh_copy)); + + /* + * If the IO is in PGAIO_HS_HANDED_OUT state, none of the following + * fields are valid yet (or are in the process of being set). + * Therefore we don't want to display any other columns. + */ + if (start_state == PGAIO_HS_HANDED_OUT) + { + memset(nulls + 4, 1, (lengthof(nulls) - 4) * sizeof(bool)); + goto display; + } + + /* column: IO's operation */ + values[4] = CStringGetTextDatum(pgaio_io_get_op_name(&ioh_copy)); + + /* columns: details about the IO's operation (offset, length) */ + switch (ioh_copy.op) + { + case PGAIO_OP_INVALID: + nulls[5] = true; + nulls[6] = true; + break; + case PGAIO_OP_READV: + values[5] = Int64GetDatum(ioh_copy.op_data.read.offset); + values[6] = + Int64GetDatum(iov_byte_length(iov_copy, ioh_copy.op_data.read.iov_length)); + break; + case PGAIO_OP_WRITEV: + values[5] = Int64GetDatum(ioh_copy.op_data.write.offset); + values[6] = + Int64GetDatum(iov_byte_length(iov_copy, ioh_copy.op_data.write.iov_length)); + break; + } + + /* column: IO's target */ + values[7] = CStringGetTextDatum(pgaio_io_get_target_name(&ioh_copy)); + + /* column: length of IO's data array */ + values[8] = Int16GetDatum(ioh_copy.handle_data_len); + + /* column: raw result (i.e. some form of syscall return value) */ + if (start_state == PGAIO_HS_COMPLETED_IO + || start_state == PGAIO_HS_COMPLETED_SHARED + || start_state == PGAIO_HS_COMPLETED_LOCAL) + values[9] = Int32GetDatum(ioh_copy.result); + else + nulls[9] = true; + + /* + * column: result in the higher level representation (unknown if not + * finished) + */ + values[10] = + CStringGetTextDatum(pgaio_result_status_string(ioh_copy.distilled_result.status)); + + /* column: target description */ + values[11] = CStringGetTextDatum(pgaio_io_get_target_description(&ioh_copy)); + + /* columns: one for each flag */ + values[12] = BoolGetDatum(ioh_copy.flags & PGAIO_HF_SYNCHRONOUS); + values[13] = BoolGetDatum(ioh_copy.flags & PGAIO_HF_REFERENCES_LOCAL); + values[14] = BoolGetDatum(ioh_copy.flags & PGAIO_HF_BUFFERED); + +display: + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls); + } + + return (Datum) 0; +} diff --git a/src/backend/storage/aio/meson.build b/src/backend/storage/aio/meson.build index 2f0f03d8071..da6df2d3654 100644 --- a/src/backend/storage/aio/meson.build +++ b/src/backend/storage/aio/meson.build @@ -3,6 +3,7 @@ backend_sources += files( 'aio.c', 'aio_callback.c', + 'aio_funcs.c', 'aio_init.c', 'aio_io.c', 'aio_target.c', diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 798a186e893..8b96f3b8bf2 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -57,6 +57,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 202503262 +#define CATALOG_VERSION_NO 202504011 #endif diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 8b68b16d79d..d9c41fa426b 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -12493,4 +12493,13 @@ proargtypes => 'int4', prosrc => 'gist_stratnum_common' }, +# AIO related functions +{ oid => '9200', descr => 'information about in-progress asynchronous IOs', + proname => 'pg_get_aios', prorows => '100', proretset => 't', + provolatile => 'v', proparallel => 'r', prorettype => 'record', proargtypes => '', + proallargtypes => '{int4,int4,int8,text,text,int8,int8,text,int2,int4,text,text,bool,bool,bool}', + proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}', + proargnames => '{pid,io_id,io_generation,state,operation,off,length,target,handle_data_len,raw_result,result,target_desc,f_sync,f_localmem,f_buffered}', + prosrc => 'pg_get_aios' }, + ] diff --git a/src/test/regress/expected/privileges.out b/src/test/regress/expected/privileges.out index 954f549555e..5588d83e1bf 100644 --- a/src/test/regress/expected/privileges.out +++ b/src/test/regress/expected/privileges.out @@ -3132,6 +3132,12 @@ DROP USER regress_locktable_user; -- switch to superuser \c - CREATE ROLE regress_readallstats; +SELECT has_table_privilege('regress_readallstats','pg_aios','SELECT'); -- no + has_table_privilege +--------------------- + f +(1 row) + SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no has_table_privilege --------------------- @@ -3145,6 +3151,12 @@ SELECT has_table_privilege('regress_readallstats','pg_shmem_allocations','SELECT (1 row) GRANT pg_read_all_stats TO regress_readallstats; +SELECT has_table_privilege('regress_readallstats','pg_aios','SELECT'); -- yes + has_table_privilege +--------------------- + t +(1 row) + SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- yes has_table_privilege --------------------- @@ -3159,6 +3171,12 @@ SELECT has_table_privilege('regress_readallstats','pg_shmem_allocations','SELECT -- run query to ensure that functions within views can be executed SET ROLE regress_readallstats; +SELECT COUNT(*) >= 0 AS ok FROM pg_aios; + ok +---- + t +(1 row) + SELECT COUNT(*) >= 0 AS ok FROM pg_backend_memory_contexts; ok ---- diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 47478969135..d9533deb04e 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -1286,6 +1286,22 @@ drop table cchild; SELECT viewname, definition FROM pg_views WHERE schemaname = 'pg_catalog' ORDER BY viewname; +pg_aios| SELECT pid, + io_id, + io_generation, + state, + operation, + off, + length, + target, + handle_data_len, + raw_result, + result, + target_desc, + f_sync, + f_localmem, + f_buffered + FROM pg_get_aios() pg_get_aios(pid, io_id, io_generation, state, operation, off, length, target, handle_data_len, raw_result, result, target_desc, f_sync, f_localmem, f_buffered); pg_available_extension_versions| SELECT e.name, e.version, (x.extname IS NOT NULL) AS installed, diff --git a/src/test/regress/sql/privileges.sql b/src/test/regress/sql/privileges.sql index b81694c24f2..286b1d03756 100644 --- a/src/test/regress/sql/privileges.sql +++ b/src/test/regress/sql/privileges.sql @@ -1919,16 +1919,19 @@ DROP USER regress_locktable_user; CREATE ROLE regress_readallstats; +SELECT has_table_privilege('regress_readallstats','pg_aios','SELECT'); -- no SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- no SELECT has_table_privilege('regress_readallstats','pg_shmem_allocations','SELECT'); -- no GRANT pg_read_all_stats TO regress_readallstats; +SELECT has_table_privilege('regress_readallstats','pg_aios','SELECT'); -- yes SELECT has_table_privilege('regress_readallstats','pg_backend_memory_contexts','SELECT'); -- yes SELECT has_table_privilege('regress_readallstats','pg_shmem_allocations','SELECT'); -- yes -- run query to ensure that functions within views can be executed SET ROLE regress_readallstats; +SELECT COUNT(*) >= 0 AS ok FROM pg_aios; SELECT COUNT(*) >= 0 AS ok FROM pg_backend_memory_contexts; SELECT COUNT(*) >= 0 AS ok FROM pg_shmem_allocations; RESET ROLE; |