diff options
Diffstat (limited to 'src/backend/utils')
-rw-r--r-- | src/backend/utils/activity/wait_event_names.txt | 1 | ||||
-rw-r--r-- | src/backend/utils/adt/mcxtfuncs.c | 426 | ||||
-rw-r--r-- | src/backend/utils/init/globals.c | 1 | ||||
-rw-r--r-- | src/backend/utils/init/postinit.c | 7 | ||||
-rw-r--r-- | src/backend/utils/mmgr/mcxt.c | 645 |
5 files changed, 1035 insertions, 45 deletions
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt index 8bce14c38fd..23eaf559c8d 100644 --- a/src/backend/utils/activity/wait_event_names.txt +++ b/src/backend/utils/activity/wait_event_names.txt @@ -161,6 +161,7 @@ WAL_RECEIVER_EXIT "Waiting for the WAL receiver to exit." WAL_RECEIVER_WAIT_START "Waiting for startup process to send initial data for streaming replication." WAL_SUMMARY_READY "Waiting for a new WAL summary to be generated." XACT_GROUP_UPDATE "Waiting for the group leader to update transaction status at transaction end." +MEM_CXT_PUBLISH "Waiting for a process to publish memory information." ABI_compatibility: diff --git a/src/backend/utils/adt/mcxtfuncs.c b/src/backend/utils/adt/mcxtfuncs.c index 396c2f223b4..3ede88e5036 100644 --- a/src/backend/utils/adt/mcxtfuncs.c +++ b/src/backend/utils/adt/mcxtfuncs.c @@ -17,28 +17,25 @@ #include "funcapi.h" #include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "access/twophase.h" +#include "catalog/pg_authid_d.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "utils/acl.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/hsearch.h" +#include "utils/memutils.h" +#include "utils/wait_event_types.h" /* ---------- * The max bytes for showing identifiers of MemoryContext. * ---------- */ #define MEMORY_CONTEXT_IDENT_DISPLAY_SIZE 1024 - -/* - * MemoryContextId - * Used for storage of transient identifiers for - * pg_get_backend_memory_contexts. - */ -typedef struct MemoryContextId -{ - MemoryContext context; - int context_id; -} MemoryContextId; +struct MemoryStatsBackendState *memCxtState = NULL; +struct MemoryStatsCtl *memCxtArea = NULL; /* * int_list_to_array @@ -89,7 +86,7 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore, */ for (MemoryContext cur = context; cur != NULL; cur = cur->parent) { - MemoryContextId *entry; + MemoryStatsContextId *entry; bool found; entry = hash_search(context_id_lookup, &cur, HASH_FIND, &found); @@ -143,24 +140,7 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore, else nulls[1] = true; - switch (context->type) - { - case T_AllocSetContext: - type = "AllocSet"; - break; - case T_GenerationContext: - type = "Generation"; - break; - case T_SlabContext: - type = "Slab"; - break; - case T_BumpContext: - type = "Bump"; - break; - default: - type = "???"; - break; - } + type = ContextTypeToString(context->type); values[2] = CStringGetTextDatum(type); values[3] = Int32GetDatum(list_length(path)); /* level */ @@ -176,6 +156,38 @@ PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore, } /* + * ContextTypeToString + * Returns a textual representation of a context type + * + * This should cover the same types as MemoryContextIsValid. + */ +const char * +ContextTypeToString(NodeTag type) +{ + const char *context_type; + + switch (type) + { + case T_AllocSetContext: + context_type = "AllocSet"; + break; + case T_GenerationContext: + context_type = "Generation"; + break; + case T_SlabContext: + context_type = "Slab"; + break; + case T_BumpContext: + context_type = "Bump"; + break; + default: + context_type = "???"; + break; + } + return context_type; +} + +/* * pg_get_backend_memory_contexts * SQL SRF showing backend memory context. */ @@ -189,7 +201,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS) HTAB *context_id_lookup; ctl.keysize = sizeof(MemoryContext); - ctl.entrysize = sizeof(MemoryContextId); + ctl.entrysize = sizeof(MemoryStatsContextId); ctl.hcxt = CurrentMemoryContext; context_id_lookup = hash_create("pg_get_backend_memory_contexts", @@ -216,7 +228,7 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS) foreach_ptr(MemoryContextData, cur, contexts) { - MemoryContextId *entry; + MemoryStatsContextId *entry; bool found; /* @@ -224,8 +236,8 @@ pg_get_backend_memory_contexts(PG_FUNCTION_ARGS) * PutMemoryContextsStatsTupleStore needs this to populate the "path" * column with the parent context_ids. */ - entry = (MemoryContextId *) hash_search(context_id_lookup, &cur, - HASH_ENTER, &found); + entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &cur, + HASH_ENTER, &found); entry->context_id = context_id++; Assert(!found); @@ -305,3 +317,349 @@ pg_log_backend_memory_contexts(PG_FUNCTION_ARGS) PG_RETURN_BOOL(true); } + +/* + * pg_get_process_memory_contexts + * Signal a backend or an auxiliary process to send its memory contexts, + * wait for the results and display them. + * + * By default, only superusers or users with PG_READ_ALL_STATS are allowed to + * signal a process to return the memory contexts. This is because allowing + * any users to issue this request at an unbounded rate would cause lots of + * requests to be sent, which can lead to denial of service. Additional roles + * can be permitted with GRANT. + * + * On receipt of this signal, a backend or an auxiliary process sets the flag + * in the signal handler, which causes the next CHECK_FOR_INTERRUPTS() + * or process-specific interrupt handler to copy the memory context details + * to a dynamic shared memory space. + * + * We have defined a limit on DSA memory that could be allocated per process - + * if the process has more memory contexts than what can fit in the allocated + * size, the excess contexts are summarized and represented as cumulative total + * at the end of the buffer. + * + * After sending the signal, wait on a condition variable. The publishing + * backend, after copying the data to shared memory, sends signal on that + * condition variable. There is one condition variable per publishing backend. + * Once the condition variable is signalled, check if the latest memory context + * information is available and display. + * + * If the publishing backend does not respond before the condition variable + * times out, which is set to MEMSTATS_WAIT_TIMEOUT, retry given that there is + * time left within the timeout specified by the user, before giving up and + * returning previously published statistics, if any. If no previous statistics + * exist, return NULL. + */ +#define MEMSTATS_WAIT_TIMEOUT 100 +Datum +pg_get_process_memory_contexts(PG_FUNCTION_ARGS) +{ + int pid = PG_GETARG_INT32(0); + bool summary = PG_GETARG_BOOL(1); + double timeout = PG_GETARG_FLOAT8(2); + PGPROC *proc; + ProcNumber procNumber = INVALID_PROC_NUMBER; + bool proc_is_aux = false; + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + MemoryStatsEntry *memcxt_info; + TimestampTz start_timestamp; + + /* + * See if the process with given pid is a backend or an auxiliary process + * and remember the type for when we requery the process later. + */ + proc = BackendPidGetProc(pid); + if (proc == NULL) + { + proc = AuxiliaryPidGetProc(pid); + proc_is_aux = true; + } + + /* + * BackendPidGetProc() and AuxiliaryPidGetProc() return NULL if the pid + * isn't valid; this is however not a problem and leave with a WARNING. + * See comment in pg_log_backend_memory_contexts for a discussion on this. + */ + if (proc == NULL) + { + /* + * This is just a warning so a loop-through-resultset will not abort + * if one backend terminated on its own during the run. + */ + ereport(WARNING, + errmsg("PID %d is not a PostgreSQL server process", pid)); + PG_RETURN_NULL(); + } + + InitMaterializedSRF(fcinfo, 0); + + procNumber = GetNumberFromPGProc(proc); + + LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE); + memCxtState[procNumber].summary = summary; + LWLockRelease(&memCxtState[procNumber].lw_lock); + + start_timestamp = GetCurrentTimestamp(); + + /* + * Send a signal to a PostgreSQL process, informing it we want it to + * produce information about its memory contexts. + */ + if (SendProcSignal(pid, PROCSIG_GET_MEMORY_CONTEXT, procNumber) < 0) + { + ereport(WARNING, + errmsg("could not send signal to process %d: %m", pid)); + PG_RETURN_NULL(); + } + + /* + * Even if the proc has published statistics, the may not be due to the + * current request, but previously published stats. Check if the stats + * are updated by comparing the timestamp, if the stats are newer than our + * previously recorded timestamp from before sending the procsignal, they + * must by definition be updated. Wait for the timeout specified by the + * user, following which display old statistics if available or return + * NULL. + */ + while (1) + { + long msecs; + + /* + * We expect to come out of sleep when the requested process has + * finished publishing the statistics, verified using the valid DSA + * pointer. + * + * Make sure that the information belongs to pid we requested + * information for, Otherwise loop back and wait for the server + * process to finish publishing statistics. + */ + LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE); + + /* + * Note in procnumber.h file says that a procNumber can be re-used for + * a different backend immediately after a backend exits. In case an + * old process' data was there and not updated by the current process + * in the slot identified by the procNumber, the pid of the requested + * process and the proc_id might not match. + */ + if (memCxtState[procNumber].proc_id == pid) + { + /* + * Break if the latest stats have been read, indicated by + * statistics timestamp being newer than the current request + * timestamp. + */ + msecs = TimestampDifferenceMilliseconds(start_timestamp, + memCxtState[procNumber].stats_timestamp); + + if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer) + && msecs > 0) + break; + } + LWLockRelease(&memCxtState[procNumber].lw_lock); + + /* + * Recheck the state of the backend before sleeping on the condition + * variable to ensure the process is still alive. Only check the + * relevant process type based on the earlier PID check. + */ + if (proc_is_aux) + proc = AuxiliaryPidGetProc(pid); + else + proc = BackendPidGetProc(pid); + + /* + * The process ending during memory context processing is not an + * error. + */ + if (proc == NULL) + { + ereport(WARNING, + errmsg("PID %d is no longer a PostgreSQL server process", + pid)); + PG_RETURN_NULL(); + } + + msecs = TimestampDifferenceMilliseconds(start_timestamp, GetCurrentTimestamp()); + + /* + * If we haven't already exceeded the timeout value, sleep for the + * remainder of the timeout on the condition variable. + */ + if (msecs > 0 && msecs < (timeout * 1000)) + { + /* + * Wait for the timeout as defined by the user. If no updated + * statistics are available within the allowed time then display + * previously published statistics if there are any. If no + * previous statistics are available then return NULL. The timer + * is defined in milliseconds since thats what the condition + * variable sleep uses. + */ + if (ConditionVariableTimedSleep(&memCxtState[procNumber].memcxt_cv, + ((timeout * 1000) - msecs), WAIT_EVENT_MEM_CXT_PUBLISH)) + { + LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE); + /* Displaying previously published statistics if available */ + if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer)) + break; + else + { + LWLockRelease(&memCxtState[procNumber].lw_lock); + PG_RETURN_NULL(); + } + } + } + else + { + LWLockAcquire(&memCxtState[procNumber].lw_lock, LW_EXCLUSIVE); + /* Displaying previously published statistics if available */ + if (DsaPointerIsValid(memCxtState[procNumber].memstats_dsa_pointer)) + break; + else + { + LWLockRelease(&memCxtState[procNumber].lw_lock); + PG_RETURN_NULL(); + } + } + } + + /* + * We should only reach here with a valid DSA handle, either containing + * updated statistics or previously published statistics (identified by + * the timestamp. + */ + Assert(memCxtArea->memstats_dsa_handle != DSA_HANDLE_INVALID); + /* Attach to the dsa area if we have not already done so */ + if (area == NULL) + { + MemoryContext oldcontext = CurrentMemoryContext; + + MemoryContextSwitchTo(TopMemoryContext); + area = dsa_attach(memCxtArea->memstats_dsa_handle); + MemoryContextSwitchTo(oldcontext); + dsa_pin_mapping(area); + } + + /* + * Backend has finished publishing the stats, project them. + */ + memcxt_info = (MemoryStatsEntry *) + dsa_get_address(area, memCxtState[procNumber].memstats_dsa_pointer); + +#define PG_GET_PROCESS_MEMORY_CONTEXTS_COLS 12 + for (int i = 0; i < memCxtState[procNumber].total_stats; i++) + { + ArrayType *path_array; + int path_length; + Datum values[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS]; + bool nulls[PG_GET_PROCESS_MEMORY_CONTEXTS_COLS]; + char *name; + char *ident; + Datum *path_datum = NULL; + int *path_int = NULL; + + memset(values, 0, sizeof(values)); + memset(nulls, 0, sizeof(nulls)); + + if (DsaPointerIsValid(memcxt_info[i].name)) + { + name = (char *) dsa_get_address(area, memcxt_info[i].name); + values[0] = CStringGetTextDatum(name); + } + else + nulls[0] = true; + + if (DsaPointerIsValid(memcxt_info[i].ident)) + { + ident = (char *) dsa_get_address(area, memcxt_info[i].ident); + values[1] = CStringGetTextDatum(ident); + } + else + nulls[1] = true; + + values[2] = CStringGetTextDatum(ContextTypeToString(memcxt_info[i].type)); + + path_length = memcxt_info[i].path_length; + path_datum = (Datum *) palloc(path_length * sizeof(Datum)); + if (DsaPointerIsValid(memcxt_info[i].path)) + { + path_int = (int *) dsa_get_address(area, memcxt_info[i].path); + for (int j = 0; j < path_length; j++) + path_datum[j] = Int32GetDatum(path_int[j]); + path_array = construct_array_builtin(path_datum, path_length, INT4OID); + values[3] = PointerGetDatum(path_array); + } + else + nulls[3] = true; + + values[4] = Int32GetDatum(memcxt_info[i].levels); + values[5] = Int64GetDatum(memcxt_info[i].totalspace); + values[6] = Int64GetDatum(memcxt_info[i].nblocks); + values[7] = Int64GetDatum(memcxt_info[i].freespace); + values[8] = Int64GetDatum(memcxt_info[i].freechunks); + values[9] = Int64GetDatum(memcxt_info[i].totalspace - + memcxt_info[i].freespace); + values[10] = Int32GetDatum(memcxt_info[i].num_agg_stats); + values[11] = TimestampTzGetDatum(memCxtState[procNumber].stats_timestamp); + + tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, + values, nulls); + } + LWLockRelease(&memCxtState[procNumber].lw_lock); + + ConditionVariableCancelSleep(); + + PG_RETURN_NULL(); +} + +Size +MemoryContextReportingShmemSize(void) +{ + Size sz = 0; + Size TotalProcs = 0; + + TotalProcs = add_size(TotalProcs, NUM_AUXILIARY_PROCS); + TotalProcs = add_size(TotalProcs, MaxBackends); + sz = add_size(sz, mul_size(TotalProcs, sizeof(MemoryStatsBackendState))); + + sz = add_size(sz, sizeof(MemoryStatsCtl)); + + return sz; +} + +/* + * Initialize shared memory for displaying memory context statistics + */ +void +MemoryContextReportingShmemInit(void) +{ + bool found; + + memCxtArea = (MemoryStatsCtl *) + ShmemInitStruct("MemoryStatsCtl", + sizeof(MemoryStatsCtl), &found); + + if (!found) + { + LWLockInitialize(&memCxtArea->lw_lock, LWTRANCHE_MEMORY_CONTEXT_REPORTING_STATE); + memCxtArea->memstats_dsa_handle = DSA_HANDLE_INVALID; + } + + memCxtState = (MemoryStatsBackendState *) + ShmemInitStruct("MemoryStatsBackendState", + ((MaxBackends + NUM_AUXILIARY_PROCS) * sizeof(MemoryStatsBackendState)), + &found); + + if (found) + return; + + for (int i = 0; i < (MaxBackends + NUM_AUXILIARY_PROCS); i++) + { + ConditionVariableInit(&memCxtState[i].memcxt_cv); + LWLockInitialize(&memCxtState[i].lw_lock, LWTRANCHE_MEMORY_CONTEXT_REPORTING_PROC); + memCxtState[i].memstats_dsa_pointer = InvalidDsaPointer; + } +} diff --git a/src/backend/utils/init/globals.c b/src/backend/utils/init/globals.c index 2152aad97d9..92304a1f124 100644 --- a/src/backend/utils/init/globals.c +++ b/src/backend/utils/init/globals.c @@ -39,6 +39,7 @@ volatile sig_atomic_t TransactionTimeoutPending = false; volatile sig_atomic_t IdleSessionTimeoutPending = false; volatile sig_atomic_t ProcSignalBarrierPending = false; volatile sig_atomic_t LogMemoryContextPending = false; +volatile sig_atomic_t PublishMemoryContextPending = false; volatile sig_atomic_t IdleStatsUpdateTimeoutPending = false; volatile uint32 InterruptHoldoffCount = 0; volatile uint32 QueryCancelHoldoffCount = 0; diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index c09c4d404ba..01309ef3f86 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -667,6 +667,13 @@ BaseInit(void) * drop ephemeral slots, which in turn triggers stats reporting. */ ReplicationSlotInitialize(); + + /* + * The before shmem exit callback frees the DSA memory occupied by the + * latest memory context statistics that could be published by this proc + * if requested. + */ + before_shmem_exit(AtProcExit_memstats_cleanup, 0); } diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c index d98ae9db6be..cf4e22bf1cc 100644 --- a/src/backend/utils/mmgr/mcxt.c +++ b/src/backend/utils/mmgr/mcxt.c @@ -23,6 +23,11 @@ #include "mb/pg_wchar.h" #include "miscadmin.h" +#include "nodes/pg_list.h" +#include "storage/lwlock.h" +#include "storage/ipc.h" +#include "utils/dsa.h" +#include "utils/hsearch.h" #include "utils/memdebug.h" #include "utils/memutils.h" #include "utils/memutils_internal.h" @@ -135,6 +140,17 @@ static const MemoryContextMethods mcxt_methods[] = { }; #undef BOGUS_MCTX +/* + * This is passed to MemoryContextStatsInternal to determine whether + * to print context statistics or not and where to print them logs or + * stderr. + */ +typedef enum PrintDestination +{ + PRINT_STATS_TO_STDERR = 0, + PRINT_STATS_TO_LOGS, + PRINT_STATS_NONE +} PrintDestination; /* * CurrentMemoryContext @@ -156,16 +172,31 @@ MemoryContext CurTransactionContext = NULL; /* This is a transient link to the active portal's memory context: */ MemoryContext PortalContext = NULL; +dsa_area *area = NULL; static void MemoryContextDeleteOnly(MemoryContext context); static void MemoryContextCallResetCallbacks(MemoryContext context); static void MemoryContextStatsInternal(MemoryContext context, int level, int max_level, int max_children, MemoryContextCounters *totals, - bool print_to_stderr); + PrintDestination print_location, + int *num_contexts); static void MemoryContextStatsPrint(MemoryContext context, void *passthru, const char *stats_string, bool print_to_stderr); +static void PublishMemoryContext(MemoryStatsEntry *memcxt_infos, + int curr_id, MemoryContext context, + List *path, + MemoryContextCounters stat, + int num_contexts, dsa_area *area, + int max_levels); +static void compute_contexts_count_and_ids(List *contexts, HTAB *context_id_lookup, + int *stats_count, + bool summary); +static List *compute_context_path(MemoryContext c, HTAB *context_id_lookup); +static void free_memorycontextstate_dsa(dsa_area *area, int total_stats, + dsa_pointer prev_dsa_pointer); +static void end_memorycontext_reporting(void); /* * You should not do memory allocations within a critical section, because @@ -831,11 +862,19 @@ MemoryContextStatsDetail(MemoryContext context, bool print_to_stderr) { MemoryContextCounters grand_totals; + int num_contexts; + PrintDestination print_location; memset(&grand_totals, 0, sizeof(grand_totals)); + if (print_to_stderr) + print_location = PRINT_STATS_TO_STDERR; + else + print_location = PRINT_STATS_TO_LOGS; + + /* num_contexts report number of contexts aggregated in the output */ MemoryContextStatsInternal(context, 0, max_level, max_children, - &grand_totals, print_to_stderr); + &grand_totals, print_location, &num_contexts); if (print_to_stderr) fprintf(stderr, @@ -870,13 +909,14 @@ MemoryContextStatsDetail(MemoryContext context, * One recursion level for MemoryContextStats * * Print stats for this context if possible, but in any case accumulate counts - * into *totals (if not NULL). + * into *totals (if not NULL). The callers should make sure that print_location + * is set to PRINT_STATS_STDERR or PRINT_STATS_TO_LOGS or PRINT_STATS_NONE. */ static void MemoryContextStatsInternal(MemoryContext context, int level, int max_level, int max_children, MemoryContextCounters *totals, - bool print_to_stderr) + PrintDestination print_location, int *num_contexts) { MemoryContext child; int ichild; @@ -884,10 +924,39 @@ MemoryContextStatsInternal(MemoryContext context, int level, Assert(MemoryContextIsValid(context)); /* Examine the context itself */ - context->methods->stats(context, - MemoryContextStatsPrint, - &level, - totals, print_to_stderr); + switch (print_location) + { + case PRINT_STATS_TO_STDERR: + context->methods->stats(context, + MemoryContextStatsPrint, + &level, + totals, true); + break; + + case PRINT_STATS_TO_LOGS: + context->methods->stats(context, + MemoryContextStatsPrint, + &level, + totals, false); + break; + + case PRINT_STATS_NONE: + + /* + * Do not print the statistics if print_location is + * PRINT_STATS_NONE, only compute totals. This is used in + * reporting of memory context statistics via a sql function. Last + * parameter is not relevant. + */ + context->methods->stats(context, + NULL, + NULL, + totals, false); + break; + } + + /* Increment the context count for each of the recursive call */ + *num_contexts = *num_contexts + 1; /* * Examine children. @@ -907,7 +976,7 @@ MemoryContextStatsInternal(MemoryContext context, int level, MemoryContextStatsInternal(child, level + 1, max_level, max_children, totals, - print_to_stderr); + print_location, num_contexts); } } @@ -926,7 +995,13 @@ MemoryContextStatsInternal(MemoryContext context, int level, child = MemoryContextTraverseNext(child, context); } - if (print_to_stderr) + /* + * Add the count of children contexts which are traversed in the + * non-recursive manner. + */ + *num_contexts = *num_contexts + ichild; + + if (print_location == PRINT_STATS_TO_STDERR) { for (int i = 0; i <= level; i++) fprintf(stderr, " "); @@ -939,7 +1014,7 @@ MemoryContextStatsInternal(MemoryContext context, int level, local_totals.freechunks, local_totals.totalspace - local_totals.freespace); } - else + else if (print_location == PRINT_STATS_TO_LOGS) ereport(LOG_SERVER_ONLY, (errhidestmt(true), errhidecontext(true), @@ -1277,6 +1352,22 @@ HandleLogMemoryContextInterrupt(void) } /* + * HandleGetMemoryContextInterrupt + * Handle receipt of an interrupt indicating a request to publish memory + * contexts statistics. + * + * All the actual work is deferred to ProcessGetMemoryContextInterrupt() as + * this cannot be performed in a signal handler. + */ +void +HandleGetMemoryContextInterrupt(void) +{ + InterruptPending = true; + PublishMemoryContextPending = true; + /* latch will be set by procsignal_sigusr1_handler */ +} + +/* * ProcessLogMemoryContextInterrupt * Perform logging of memory contexts of this backend process. * @@ -1313,6 +1404,538 @@ ProcessLogMemoryContextInterrupt(void) MemoryContextStatsDetail(TopMemoryContext, 100, 100, false); } +/* + * ProcessGetMemoryContextInterrupt + * Generate information about memory contexts used by the process. + * + * Performs a breadth first search on the memory context tree, thus parents + * statistics are reported before their children in the monitoring function + * output. + * + * Statistics for all the processes are shared via the same dynamic shared + * area. Statistics written by each process are tracked independently in + * per-process DSA pointers. These pointers are stored in static shared memory. + * + * We calculate maximum number of context's statistics that can be displayed + * using a pre-determined limit for memory available per process for this + * utility maximum size of statistics for each context. The remaining context + * statistics if any are captured as a cumulative total at the end of + * individual context's statistics. + * + * If summary is true, we capture the level 1 and level 2 contexts + * statistics. For that we traverse the memory context tree recursively in + * depth first search manner to cover all the children of a parent context, to + * be able to display a cumulative total of memory consumption by a parent at + * level 2 and all its children. + */ +void +ProcessGetMemoryContextInterrupt(void) +{ + List *contexts; + HASHCTL ctl; + HTAB *context_id_lookup; + int context_id = 0; + MemoryStatsEntry *meminfo; + bool summary = false; + int max_stats; + int idx = MyProcNumber; + int stats_count = 0; + int stats_num = 0; + MemoryContextCounters stat; + int num_individual_stats = 0; + + PublishMemoryContextPending = false; + + /* + * The hash table is used for constructing "path" column of the view, + * similar to its local backend counterpart. + */ + ctl.keysize = sizeof(MemoryContext); + ctl.entrysize = sizeof(MemoryStatsContextId); + ctl.hcxt = CurrentMemoryContext; + + context_id_lookup = hash_create("pg_get_remote_backend_memory_contexts", + 256, + &ctl, + HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); + + /* List of contexts to process in the next round - start at the top. */ + contexts = list_make1(TopMemoryContext); + + /* Compute the number of stats that can fit in the defined limit */ + max_stats = + MEMORY_CONTEXT_REPORT_MAX_PER_BACKEND / MAX_MEMORY_CONTEXT_STATS_SIZE; + LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE); + summary = memCxtState[idx].summary; + LWLockRelease(&memCxtState[idx].lw_lock); + + /* + * Traverse the memory context tree to find total number of contexts. If + * summary is requested report the total number of contexts at level 1 and + * 2 from the top. Also, populate the hash table of context ids. + */ + compute_contexts_count_and_ids(contexts, context_id_lookup, &stats_count, + summary); + + /* + * Allocate memory in this process's DSA for storing statistics of the the + * memory contexts upto max_stats, for contexts that don't fit within a + * limit, a cumulative total is written as the last record in the DSA + * segment. + */ + stats_num = Min(stats_count, max_stats); + + LWLockAcquire(&memCxtArea->lw_lock, LW_EXCLUSIVE); + + /* + * Create a DSA and send handle to the the client process after storing + * the context statistics. If number of contexts exceed a predefined + * limit(8MB), a cumulative total is stored for such contexts. + */ + if (memCxtArea->memstats_dsa_handle == DSA_HANDLE_INVALID) + { + MemoryContext oldcontext = CurrentMemoryContext; + dsa_handle handle; + + MemoryContextSwitchTo(TopMemoryContext); + + area = dsa_create(memCxtArea->lw_lock.tranche); + + handle = dsa_get_handle(area); + MemoryContextSwitchTo(oldcontext); + + dsa_pin_mapping(area); + + /* + * Pin the DSA area, this is to make sure the area remains attachable + * even if current backend exits. This is done so that the statistics + * are published even if the process exits while a client is waiting. + */ + dsa_pin(area); + + /* Set the handle in shared memory */ + memCxtArea->memstats_dsa_handle = handle; + } + + /* + * If DSA exists, created by another process publishing statistics, attach + * to it. + */ + else if (area == NULL) + { + MemoryContext oldcontext = CurrentMemoryContext; + + MemoryContextSwitchTo(TopMemoryContext); + area = dsa_attach(memCxtArea->memstats_dsa_handle); + MemoryContextSwitchTo(oldcontext); + dsa_pin_mapping(area); + } + LWLockRelease(&memCxtArea->lw_lock); + + /* + * Hold the process lock to protect writes to process specific memory. Two + * processes publishing statistics do not block each other. + */ + LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE); + memCxtState[idx].proc_id = MyProcPid; + + if (DsaPointerIsValid(memCxtState[idx].memstats_dsa_pointer)) + { + /* + * Free any previous allocations, free the name, ident and path + * pointers before freeing the pointer that contains them. + */ + free_memorycontextstate_dsa(area, memCxtState[idx].total_stats, + memCxtState[idx].memstats_dsa_pointer); + } + + /* + * Assigning total stats before allocating memory so that memory cleanup + * can run if any subsequent dsa_allocate call to allocate name/ident/path + * fails. + */ + memCxtState[idx].total_stats = stats_num; + memCxtState[idx].memstats_dsa_pointer = + dsa_allocate0(area, stats_num * sizeof(MemoryStatsEntry)); + + meminfo = (MemoryStatsEntry *) + dsa_get_address(area, memCxtState[idx].memstats_dsa_pointer); + + if (summary) + { + int cxt_id = 0; + List *path = NIL; + + /* Copy TopMemoryContext statistics to DSA */ + memset(&stat, 0, sizeof(stat)); + (*TopMemoryContext->methods->stats) (TopMemoryContext, NULL, NULL, + &stat, true); + path = lcons_int(1, path); + PublishMemoryContext(meminfo, cxt_id, TopMemoryContext, path, stat, + 1, area, 100); + cxt_id = cxt_id + 1; + + /* + * Copy statistics for each of TopMemoryContexts children. This + * includes statistics of at most 100 children per node, with each + * child node limited to a depth of 100 in its subtree. + */ + for (MemoryContext c = TopMemoryContext->firstchild; c != NULL; + c = c->nextchild) + { + MemoryContextCounters grand_totals; + int num_contexts = 0; + int level = 0; + + path = NIL; + memset(&grand_totals, 0, sizeof(grand_totals)); + + MemoryContextStatsInternal(c, level, 100, 100, &grand_totals, + PRINT_STATS_NONE, &num_contexts); + + path = compute_context_path(c, context_id_lookup); + + /* + * Register the stats entry first, that way the cleanup handler + * can reach it in case of allocation failures of one or more + * members. + */ + memCxtState[idx].total_stats = cxt_id++; + PublishMemoryContext(meminfo, cxt_id, c, path, + grand_totals, num_contexts, area, 100); + } + memCxtState[idx].total_stats = cxt_id; + + end_memorycontext_reporting(); + + /* Notify waiting backends and return */ + hash_destroy(context_id_lookup); + + return; + } + + foreach_ptr(MemoryContextData, cur, contexts) + { + List *path = NIL; + + /* + * Figure out the transient context_id of this context and each of its + * ancestors, to compute a path for this context. + */ + path = compute_context_path(cur, context_id_lookup); + + /* Examine the context stats */ + memset(&stat, 0, sizeof(stat)); + (*cur->methods->stats) (cur, NULL, NULL, &stat, true); + + /* Account for saving one statistics slot for cumulative reporting */ + if (context_id < (max_stats - 1) || stats_count <= max_stats) + { + /* Copy statistics to DSA memory */ + PublishMemoryContext(meminfo, context_id, cur, path, stat, 1, area, 100); + } + else + { + meminfo[max_stats - 1].totalspace += stat.totalspace; + meminfo[max_stats - 1].nblocks += stat.nblocks; + meminfo[max_stats - 1].freespace += stat.freespace; + meminfo[max_stats - 1].freechunks += stat.freechunks; + } + + /* + * DSA max limit per process is reached, write aggregate of the + * remaining statistics. + * + * We can store contexts from 0 to max_stats - 1. When stats_count is + * greater than max_stats, we stop reporting individual statistics + * when context_id equals max_stats - 2. As we use max_stats - 1 array + * slot for reporting cumulative statistics or "Remaining Totals". + */ + if (stats_count > max_stats && context_id == (max_stats - 2)) + { + char *nameptr; + int namelen = strlen("Remaining Totals"); + + num_individual_stats = context_id + 1; + meminfo[max_stats - 1].name = dsa_allocate(area, namelen + 1); + nameptr = dsa_get_address(area, meminfo[max_stats - 1].name); + strncpy(nameptr, "Remaining Totals", namelen); + meminfo[max_stats - 1].ident = InvalidDsaPointer; + meminfo[max_stats - 1].path = InvalidDsaPointer; + meminfo[max_stats - 1].type = 0; + } + context_id++; + } + + /* + * Statistics are not aggregated, i.e individual statistics reported when + * stats_count <= max_stats. + */ + if (stats_count <= max_stats) + { + memCxtState[idx].total_stats = context_id; + } + /* Report number of aggregated memory contexts */ + else + { + meminfo[max_stats - 1].num_agg_stats = context_id - + num_individual_stats; + + /* + * Total stats equals num_individual_stats + 1 record for cumulative + * statistics. + */ + memCxtState[idx].total_stats = num_individual_stats + 1; + } + + /* Notify waiting backends and return */ + end_memorycontext_reporting(); + + hash_destroy(context_id_lookup); +} + +/* + * Update timestamp and signal all the waiting client backends after copying + * all the statistics. + */ +static void +end_memorycontext_reporting(void) +{ + memCxtState[MyProcNumber].stats_timestamp = GetCurrentTimestamp(); + LWLockRelease(&memCxtState[MyProcNumber].lw_lock); + ConditionVariableBroadcast(&memCxtState[MyProcNumber].memcxt_cv); +} + +/* + * compute_context_path + * + * Append the transient context_id of this context and each of its ancestors + * to a list, in order to compute a path. + */ +static List * +compute_context_path(MemoryContext c, HTAB *context_id_lookup) +{ + bool found; + List *path = NIL; + MemoryContext cur_context; + + for (cur_context = c; cur_context != NULL; cur_context = cur_context->parent) + { + MemoryStatsContextId *cur_entry; + + cur_entry = hash_search(context_id_lookup, &cur_context, HASH_FIND, &found); + + if (!found) + elog(ERROR, "hash table corrupted, can't construct path value"); + + path = lcons_int(cur_entry->context_id, path); + } + + return path; +} + +/* + * Return the number of contexts allocated currently by the backend + * Assign context ids to each of the contexts. + */ +static void +compute_contexts_count_and_ids(List *contexts, HTAB *context_id_lookup, + int *stats_count, bool summary) +{ + foreach_ptr(MemoryContextData, cur, contexts) + { + MemoryStatsContextId *entry; + bool found; + + entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &cur, + HASH_ENTER, &found); + Assert(!found); + + /* + * context id starts with 1 so increment the stats_count before + * assigning. + */ + entry->context_id = ++(*stats_count); + + /* Append the children of the current context to the main list. */ + for (MemoryContext c = cur->firstchild; c != NULL; c = c->nextchild) + { + if (summary) + { + entry = (MemoryStatsContextId *) hash_search(context_id_lookup, &c, + HASH_ENTER, &found); + Assert(!found); + + entry->context_id = ++(*stats_count); + } + + contexts = lappend(contexts, c); + } + + /* + * In summary mode only the first two level (from top) contexts are + * displayed. + */ + if (summary) + break; + } +} + +/* + * PublishMemoryContext + * + * Copy the memory context statistics of a single context to a DSA memory + */ +static void +PublishMemoryContext(MemoryStatsEntry *memcxt_info, int curr_id, + MemoryContext context, List *path, + MemoryContextCounters stat, int num_contexts, + dsa_area *area, int max_levels) +{ + const char *ident = context->ident; + const char *name = context->name; + int *path_list; + + /* + * To be consistent with logging output, we label dynahash contexts with + * just the hash table name as with MemoryContextStatsPrint(). + */ + if (context->ident && strncmp(context->name, "dynahash", 8) == 0) + { + name = context->ident; + ident = NULL; + } + + if (name != NULL) + { + int namelen = strlen(name); + char *nameptr; + + if (strlen(name) >= MEMORY_CONTEXT_IDENT_SHMEM_SIZE) + namelen = pg_mbcliplen(name, namelen, + MEMORY_CONTEXT_IDENT_SHMEM_SIZE - 1); + + memcxt_info[curr_id].name = dsa_allocate(area, namelen + 1); + nameptr = (char *) dsa_get_address(area, memcxt_info[curr_id].name); + strlcpy(nameptr, name, namelen + 1); + } + else + memcxt_info[curr_id].name = InvalidDsaPointer; + + /* Trim and copy the identifier if it is not set to NULL */ + if (ident != NULL) + { + int idlen = strlen(context->ident); + char *identptr; + + /* + * Some identifiers such as SQL query string can be very long, + * truncate oversize identifiers. + */ + if (idlen >= MEMORY_CONTEXT_IDENT_SHMEM_SIZE) + idlen = pg_mbcliplen(ident, idlen, + MEMORY_CONTEXT_IDENT_SHMEM_SIZE - 1); + + memcxt_info[curr_id].ident = dsa_allocate(area, idlen + 1); + identptr = (char *) dsa_get_address(area, memcxt_info[curr_id].ident); + strlcpy(identptr, ident, idlen + 1); + } + else + memcxt_info[curr_id].ident = InvalidDsaPointer; + + /* Allocate DSA memory for storing path information */ + if (path == NIL) + memcxt_info[curr_id].path = InvalidDsaPointer; + else + { + int levels = Min(list_length(path), max_levels); + + memcxt_info[curr_id].path_length = levels; + memcxt_info[curr_id].path = dsa_allocate0(area, levels * sizeof(int)); + memcxt_info[curr_id].levels = list_length(path); + path_list = (int *) dsa_get_address(area, memcxt_info[curr_id].path); + + foreach_int(i, path) + { + path_list[foreach_current_index(i)] = i; + if (--levels == 0) + break; + } + } + memcxt_info[curr_id].type = context->type; + memcxt_info[curr_id].totalspace = stat.totalspace; + memcxt_info[curr_id].nblocks = stat.nblocks; + memcxt_info[curr_id].freespace = stat.freespace; + memcxt_info[curr_id].freechunks = stat.freechunks; + memcxt_info[curr_id].num_agg_stats = num_contexts; +} + +/* + * free_memorycontextstate_dsa + * + * Worker for freeing resources from a MemoryStatsEntry. Callers are + * responsible for ensuring that the DSA pointer is valid. + */ +static void +free_memorycontextstate_dsa(dsa_area *area, int total_stats, + dsa_pointer prev_dsa_pointer) +{ + MemoryStatsEntry *meminfo; + + meminfo = (MemoryStatsEntry *) dsa_get_address(area, prev_dsa_pointer); + Assert(meminfo != NULL); + for (int i = 0; i < total_stats; i++) + { + if (DsaPointerIsValid(meminfo[i].name)) + dsa_free(area, meminfo[i].name); + + if (DsaPointerIsValid(meminfo[i].ident)) + dsa_free(area, meminfo[i].ident); + + if (DsaPointerIsValid(meminfo[i].path)) + dsa_free(area, meminfo[i].path); + } + + dsa_free(area, memCxtState[MyProcNumber].memstats_dsa_pointer); + memCxtState[MyProcNumber].memstats_dsa_pointer = InvalidDsaPointer; +} + +/* + * Free the memory context statistics stored by this process + * in DSA area. + */ +void +AtProcExit_memstats_cleanup(int code, Datum arg) +{ + int idx = MyProcNumber; + + if (memCxtArea->memstats_dsa_handle == DSA_HANDLE_INVALID) + return; + + LWLockAcquire(&memCxtState[idx].lw_lock, LW_EXCLUSIVE); + + if (!DsaPointerIsValid(memCxtState[idx].memstats_dsa_pointer)) + { + LWLockRelease(&memCxtState[idx].lw_lock); + return; + } + + /* If the dsa mapping could not be found, attach to the area */ + if (area == NULL) + area = dsa_attach(memCxtArea->memstats_dsa_handle); + + /* + * Free the memory context statistics, free the name, ident and path + * pointers before freeing the pointer that contains these pointers and + * integer statistics. + */ + free_memorycontextstate_dsa(area, memCxtState[idx].total_stats, + memCxtState[idx].memstats_dsa_pointer); + + dsa_detach(area); + LWLockRelease(&memCxtState[idx].lw_lock); +} + void * palloc(Size size) { |