aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt')
-rw-r--r--src/backend/utils/adt/mcxtfuncs.c426
1 files changed, 392 insertions, 34 deletions
diff --git a/src/backend/utils/adt/mcxtfuncs.c b/src/backend/utils/adt/mcxtfuncs.c
index 396c2f223b4..3ede88e5036 100644
--- a/src/backend/utils/adt/mcxtfuncs.c
+++ b/src/backend/utils/adt/mcxtfuncs.c
@@ -17,28 +17,25 @@
#include "funcapi.h"
#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "access/twophase.h"
+#include "catalog/pg_authid_d.h"
#include "storage/proc.h"
#include "storage/procarray.h"
+#include "utils/acl.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/hsearch.h"
+#include "utils/memutils.h"
+#include "utils/wait_event_types.h"
/* ----------
* The max bytes for showing identifiers of MemoryContext.
* ----------
*/
#define MEMORY_CONTEXT_IDENT_DISPLAY_SIZE 1024
-
-/*
- * MemoryContextId
- * Used for storage of transient identifiers for
- * pg_get_backend_memory_contexts.
- */
-typedef struct MemoryContextId
-{
- MemoryContext context;
- int context_id;
-} MemoryContextId;
+struct MemoryStatsBackendState *memCxtState = NULL;
+struct MemoryStatsCtl *memCxtArea = NULL;
/*
* int_list_to_array
@@ -89,7 +86,7 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore,
*/
for (MemoryContext cur = context; cur != NULL; cur = cur->parent)
{
- MemoryContextId *entry;
+ MemoryStatsContextId *entry;
bool found;
entry = hash_search(context_id_lookup, &cur, HASH_FIND, &found);
@@ -143,24 +140,7 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore,
else
nulls[1] = true;
- switch (context->type)
- {
- case T_AllocSetContext:
- type = "AllocSet";
- break;
- case T_GenerationContext:
- type = "Generation";
- break;
- case T_SlabContext:
- type = "Slab";
- break;
- case T_BumpContext:
- type = "Bump";
- break;
- default:
- type = "???";
- break;
- }
+ type = ContextTypeToString(context->type);
values[2] = CStringGetTextDatum(type);
values[3] = Int32GetDatum(list_length(path)); /* level */
@@ -176,6 +156,38 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore,
}
/*
+ * ContextTypeToString
+ * Returns a textual representation of a context type
+ *
+ * This should cover the same types as MemoryContextIsValid.
+ */
+const char *
+ContextTypeToString(NodeTag type)
+{
+ const char *context_type;
+
+ switch (type)
+ {
+ case T_AllocSetContext:
+ context_type = "AllocSet";
+ break;
+ case T_GenerationContext:
+ context_type = "Generation";
+ break;
+ case T_SlabContext:
+ context_type = "Slab";
+ break;
+ case T_BumpContext:
+ context_type = "Bump";
+ break;
+ default:
+ context_type = "???";
+ break;
+ }
+ return context_type;
+}
+
+/*
* pg_get_backend_memory_contexts
* SQL SRF showing backend memory context.
*/
@@ -189,7 +201,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS)
HTAB *context_id_lookup;
ctl.keysize = sizeof(MemoryContext);
- ctl.entrysize = sizeof(MemoryContextId);
+ ctl.entrysize = sizeof(MemoryStatsContextId);
ctl.hcxt = CurrentMemoryContext;
context_id_lookup = hash_create("pg_get_backend_memory_contexts",
@@ -216,7 +228,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS)
foreach_ptr(MemoryContextData, cur, contexts)
{
- MemoryContextId *entry;
+ MemoryStatsContextId *entry;
bool found;
/*
@@ -224,8 +236,8 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS)
* PutMemoryContextsStatsTupleStore needs this to populate the "path"
* column with the parent context_ids.
*/
- entry = (MemoryContextId *) hash_search(context_id_lookup, &cur,
- HASH_ENTER, &found);
+ entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &cur,
+ HASH_ENTER, &found);
entry->context_id = context_id++;
Assert(!found);
@@ -305,3 +317,349 @@ pg_log_backend_memory_contexts(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(true);
}
+
+/*
+ * pg_get_process_memory_contexts
+ * Signal a backend or an auxiliary process to send its memory contexts,
+ * wait for the results and display them.
+ *
+ * By default, only superusers or users with PG_READ_ALL_STATS are allowed to
+ * signal a process to return the memory contexts. This is because allowing
+ * any users to issue this request at an unbounded rate would cause lots of
+ * requests to be sent, which can lead to denial of service. Additional roles
+ * can be permitted with GRANT.
+ *
+ * On receipt of this signal, a backend or an auxiliary process sets the flag
+ * in the signal handler, which causes the next CHECK_FOR_INTERRUPTS()
+ * or process-specific interrupt handler to copy the memory context details
+ * to a dynamic shared memory space.
+ *
+ * We have defined a limit on DSA memory that could be allocated per process -
+ * if the process has more memory contexts than what can fit in the allocated
+ * size, the excess contexts are summarized and represented as cumulative total
+ * at the end of the buffer.
+ *
+ * After sending the signal, wait on a condition variable. The publishing
+ * backend, after copying the data to shared memory, sends signal on that
+ * condition variable. There is one condition variable per publishing backend.
+ * Once the condition variable is signalled, check if the latest memory context
+ * information is available and display.
+ *
+ * If the publishing backend does not respond before the condition variable
+ * times out, which is set to MEMSTATS_WAIT_TIMEOUT, retry given that there is
+ * time left within the timeout specified by the user, before giving up and
+ * returning previously published statistics, if any. If no previous statistics
+ * exist, return NULL.
+ */
+#define MEMSTATS_WAIT_TIMEOUT 100
+Datum
+pg_get_process_memory_contexts(PG_FUNCTION_ARGS)
+{
+ int pid = PG_GETARG_INT32(0);
+ bool summary = PG_GETARG_BOOL(1);
+ double timeout = PG_GETARG_FLOAT8(2);
+ PGPROC *proc;
+ ProcNumber procNumber = INVALID_PROC_NUMBER;
+ bool proc_is_aux = false;
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ MemoryStatsEntry *memcxt_info;
+ TimestampTz start_timestamp;
+
+ /*
+ * See if the process with given pid is a backend or an auxiliary process
+ * and remember the type for when we requery the process later.
+ */
+ proc = BackendPidGetProc(pid);
+ if (proc == NULL)
+ {
+ proc = AuxiliaryPidGetProc(pid);
+ proc_is_aux = true;
+ }
+
+ /*
+ * BackendPidGetProc() and AuxiliaryPidGetProc() return NULL if the pid
+ * isn't valid; this is however not a problem and leave with a WARNING.
+ * See comment in pg_log_backend_memory_contexts for a discussion on this.
+ */
+ if (proc == NULL)
+ {
+ /*
+ * This is just a warning so a loop-through-resultset will not abort
+ * if one backend terminated on its own during the run.
+ */
+ ereport(WARNING,
+ errmsg("PID %d is not a PostgreSQL server process", pid));
+ PG_RETURN_NULL();
+ }
+
+ InitMaterializedSRF(fcinfo, 0);
+
+ procNumber = GetNumberFromPGProc(proc);
+
+ LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE);
+ memCxtState[procNumber].summary = summary;
+ LWLockRelease(&memCxtState[procNumber].lw_lock);
+
+ start_timestamp = GetCurrentTimestamp();
+
+ /*
+ * Send a signal to a PostgreSQL process, informing it we want it to
+ * produce information about its memory contexts.
+ */
+ if (SendProcSignal(pid, PROCSIG_GET_MEMORY_CONTEXT, procNumber) < 0)
+ {
+ ereport(WARNING,
+ errmsg("could not send signal to process %d: %m", pid));
+ PG_RETURN_NULL();
+ }
+
+ /*
+ * Even if the proc has published statistics, the may not be due to the
+ * current request, but previously published stats. Check if the stats
+ * are updated by comparing the timestamp, if the stats are newer than our
+ * previously recorded timestamp from before sending the procsignal, they
+ * must by definition be updated. Wait for the timeout specified by the
+ * user, following which display old statistics if available or return
+ * NULL.
+ */
+ while (1)
+ {
+ long msecs;
+
+ /*
+ * We expect to come out of sleep when the requested process has
+ * finished publishing the statistics, verified using the valid DSA
+ * pointer.
+ *
+ * Make sure that the information belongs to pid we requested
+ * information for, Otherwise loop back and wait for the server
+ * process to finish publishing statistics.
+ */
+ LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE);
+
+ /*
+ * Note in procnumber.h file says that a procNumber can be re-used for
+ * a different backend immediately after a backend exits. In case an
+ * old process' data was there and not updated by the current process
+ * in the slot identified by the procNumber, the pid of the requested
+ * process and the proc_id might not match.
+ */
+ if (memCxtState[procNumber].proc_id == pid)
+ {
+ /*
+ * Break if the latest stats have been read, indicated by
+ * statistics timestamp being newer than the current request
+ * timestamp.
+ */
+ msecs = TimestampDifferenceMilliseconds(start_timestamp,
+ memCxtState[procNumber].stats_timestamp);
+
+ if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer)
+ && msecs > 0)
+ break;
+ }
+ LWLockRelease(&memCxtState[procNumber].lw_lock);
+
+ /*
+ * Recheck the state of the backend before sleeping on the condition
+ * variable to ensure the process is still alive. Only check the
+ * relevant process type based on the earlier PID check.
+ */
+ if (proc_is_aux)
+ proc = AuxiliaryPidGetProc(pid);
+ else
+ proc = BackendPidGetProc(pid);
+
+ /*
+ * The process ending during memory context processing is not an
+ * error.
+ */
+ if (proc == NULL)
+ {
+ ereport(WARNING,
+ errmsg("PID %d is no longer a PostgreSQL server process",
+ pid));
+ PG_RETURN_NULL();
+ }
+
+ msecs = TimestampDifferenceMilliseconds(start_timestamp, GetCurrentTimestamp());
+
+ /*
+ * If we haven't already exceeded the timeout value, sleep for the
+ * remainder of the timeout on the condition variable.
+ */
+ if (msecs > 0 && msecs < (timeout * 1000))
+ {
+ /*
+ * Wait for the timeout as defined by the user. If no updated
+ * statistics are available within the allowed time then display
+ * previously published statistics if there are any. If no
+ * previous statistics are available then return NULL. The timer
+ * is defined in milliseconds since thats what the condition
+ * variable sleep uses.
+ */
+ if (ConditionVariableTimedSleep(&memCxtState[procNumber].memcxt_cv,
+ ((timeout * 1000) - msecs), WAIT_EVENT_MEM_CXT_PUBLISH))
+ {
+ LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE);
+ /* Displaying previously published statistics if available */
+ if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer))
+ break;
+ else
+ {
+ LWLockRelease(&memCxtState[procNumber].lw_lock);
+ PG_RETURN_NULL();
+ }
+ }
+ }
+ else
+ {
+ LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE);
+ /* Displaying previously published statistics if available */
+ if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer))
+ break;
+ else
+ {
+ LWLockRelease(&memCxtState[procNumber].lw_lock);
+ PG_RETURN_NULL();
+ }
+ }
+ }
+
+ /*
+ * We should only reach here with a valid DSA handle, either containing
+ * updated statistics or previously published statistics (identified by
+ * the timestamp.
+ */
+ Assert(memCxtArea->memstats_dsa_handle != DSA_HANDLE_INVALID);
+ /* Attach to the dsa area if we have not already done so */
+ if (area == NULL)
+ {
+ MemoryContext oldcontext = CurrentMemoryContext;
+
+ MemoryContextSwitchTo(TopMemoryContext);
+ area = dsa_attach(memCxtArea->memstats_dsa_handle);
+ MemoryContextSwitchTo(oldcontext);
+ dsa_pin_mapping(area);
+ }
+
+ /*
+ * Backend has finished publishing the stats, project them.
+ */
+ memcxt_info = (MemoryStatsEntry *)
+ dsa_get_address(area, memCxtState[procNumber].memstats_dsa_pointer);
+
+#define PG_GET_PROCESS_MEMORY_CONTEXTS_COLS 12
+ for (int i = 0; i < memCxtState[procNumber].total_stats; i++)
+ {
+ ArrayType *path_array;
+ int path_length;
+ Datum values[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS];
+ bool nulls[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS];
+ char *name;
+ char *ident;
+ Datum *path_datum = NULL;
+ int *path_int = NULL;
+
+ memset(values, 0, sizeof(values));
+ memset(nulls, 0, sizeof(nulls));
+
+ if (DsaPointerIsValid(memcxt_info[i].name))
+ {
+ name = (char *) dsa_get_address(area, memcxt_info[i].name);
+ values[0] = CStringGetTextDatum(name);
+ }
+ else
+ nulls[0] = true;
+
+ if (DsaPointerIsValid(memcxt_info[i].ident))
+ {
+ ident = (char *) dsa_get_address(area, memcxt_info[i].ident);
+ values[1] = CStringGetTextDatum(ident);
+ }
+ else
+ nulls[1] = true;
+
+ values[2] = CStringGetTextDatum(ContextTypeToString(memcxt_info[i].type));
+
+ path_length = memcxt_info[i].path_length;
+ path_datum = (Datum *) palloc(path_length * sizeof(Datum));
+ if (DsaPointerIsValid(memcxt_info[i].path))
+ {
+ path_int = (int *) dsa_get_address(area, memcxt_info[i].path);
+ for (int j = 0; j < path_length; j++)
+ path_datum[j] = Int32GetDatum(path_int[j]);
+ path_array = construct_array_builtin(path_datum, path_length, INT4OID);
+ values[3] = PointerGetDatum(path_array);
+ }
+ else
+ nulls[3] = true;
+
+ values[4] = Int32GetDatum(memcxt_info[i].levels);
+ values[5] = Int64GetDatum(memcxt_info[i].totalspace);
+ values[6] = Int64GetDatum(memcxt_info[i].nblocks);
+ values[7] = Int64GetDatum(memcxt_info[i].freespace);
+ values[8] = Int64GetDatum(memcxt_info[i].freechunks);
+ values[9] = Int64GetDatum(memcxt_info[i].totalspace -
+ memcxt_info[i].freespace);
+ values[10] = Int32GetDatum(memcxt_info[i].num_agg_stats);
+ values[11] = TimestampTzGetDatum(memCxtState[procNumber].stats_timestamp);
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+ LWLockRelease(&memCxtState[procNumber].lw_lock);
+
+ ConditionVariableCancelSleep();
+
+ PG_RETURN_NULL();
+}
+
+Size
+MemoryContextReportingShmemSize(void)
+{
+ Size sz = 0;
+ Size TotalProcs = 0;
+
+ TotalProcs = add_size(TotalProcs, NUM_AUXILIARY_PROCS);
+ TotalProcs = add_size(TotalProcs, MaxBackends);
+ sz = add_size(sz, mul_size(TotalProcs, sizeof(MemoryStatsBackendState)));
+
+ sz = add_size(sz, sizeof(MemoryStatsCtl));
+
+ return sz;
+}
+
+/*
+ * Initialize shared memory for displaying memory context statistics
+ */
+void
+MemoryContextReportingShmemInit(void)
+{
+ bool found;
+
+ memCxtArea = (MemoryStatsCtl *)
+ ShmemInitStruct("MemoryStatsCtl",
+ sizeof(MemoryStatsCtl), &found);
+
+ if (!found)
+ {
+ LWLockInitialize(&memCxtArea->lw_lock, LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE);
+ memCxtArea->memstats_dsa_handle = DSA_HANDLE_INVALID;
+ }
+
+ memCxtState = (MemoryStatsBackendState *)
+ ShmemInitStruct("MemoryStatsBackendState",
+ ((MaxBackends + NUM_AUXILIARY_PROCS) * sizeof(MemoryStatsBackendState)),
+ &found);
+
+ if (found)
+ return;
+
+ for (int i = 0; i < (MaxBackends + NUM_AUXILIARY_PROCS); i++)
+ {
+ ConditionVariableInit(&memCxtState[i].memcxt_cv);
+ LWLockInitialize(&memCxtState[i].lw_lock, LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC);
+ memCxtState[i].memstats_dsa_pointer = InvalidDsaPointer;
+ }
+}