aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/transam/xlog.c29
-rw-r--r--src/backend/postmaster/checkpointer.c30
-rw-r--r--src/backend/storage/lmgr/lwlock.c56
-rw-r--r--src/backend/storage/lmgr/proc.c3
-rw-r--r--src/backend/storage/smgr/md.c25
-rw-r--r--src/backend/utils/mmgr/mcxt.c40
-rw-r--r--src/include/nodes/memnodes.h3
-rw-r--r--src/include/storage/lwlock.h1
-rw-r--r--src/include/utils/memutils.h2
9 files changed, 147 insertions, 42 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index e5640793eb8..9f417dee298 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -60,6 +60,7 @@
#include "storage/spin.h"
#include "utils/builtins.h"
#include "utils/guc.h"
+#include "utils/memutils.h"
#include "utils/ps_status.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
@@ -736,6 +737,10 @@ static bool bgwriterLaunched = false;
static int MyLockNo = 0;
static bool holdingAllLocks = false;
+#ifdef WAL_DEBUG
+static MemoryContext walDebugCxt = NULL;
+#endif
+
static void readRecoveryCommandFile(void);
static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo);
static bool recoveryStopsBefore(XLogRecord *record);
@@ -1258,6 +1263,7 @@ begin:;
if (XLOG_DEBUG)
{
StringInfoData buf;
+ MemoryContext oldCxt = MemoryContextSwitchTo(walDebugCxt);
initStringInfo(&buf);
appendStringInfo(&buf, "INSERT @ %X/%X: ",
@@ -1282,10 +1288,11 @@ begin:;
appendStringInfoString(&buf, " - ");
RmgrTable[rechdr->xl_rmid].rm_desc(&buf, (XLogRecord *) recordbuf.data);
- pfree(recordbuf.data);
}
elog(LOG, "%s", buf.data);
- pfree(buf.data);
+
+ MemoryContextSwitchTo(oldCxt);
+ MemoryContextReset(walDebugCxt);
}
#endif
@@ -4807,6 +4814,24 @@ XLOGShmemInit(void)
char *allocptr;
int i;
+#ifdef WAL_DEBUG
+ /*
+ * Create a memory context for WAL debugging that's exempt from the
+ * normal "no pallocs in critical section" rule. Yes, that can lead to a
+ * PANIC if an allocation fails, but wal_debug is not for production use
+ * anyway.
+ */
+ if (walDebugCxt == NULL)
+ {
+ walDebugCxt = AllocSetContextCreate(TopMemoryContext,
+ "WAL Debug",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ MemoryContextAllowInCriticalSection(walDebugCxt, true);
+ }
+#endif
+
ControlFile = (ControlFileData *)
ShmemInitStruct("Control File", sizeof(ControlFileData), &foundCFile);
XLogCtl = (XLogCtlData *)
diff --git a/src/backend/postmaster/checkpointer.c b/src/backend/postmaster/checkpointer.c
index 2ac3061d974..6c814ba0be8 100644
--- a/src/backend/postmaster/checkpointer.c
+++ b/src/backend/postmaster/checkpointer.c
@@ -1305,19 +1305,6 @@ AbsorbFsyncRequests(void)
if (!AmCheckpointerProcess())
return;
- /*
- * We have to PANIC if we fail to absorb all the pending requests (eg,
- * because our hashtable runs out of memory). This is because the system
- * cannot run safely if we are unable to fsync what we have been told to
- * fsync. Fortunately, the hashtable is so small that the problem is
- * quite unlikely to arise in practice.
- */
- START_CRIT_SECTION();
-
- /*
- * We try to avoid holding the lock for a long time by copying the request
- * array.
- */
LWLockAcquire(CheckpointerCommLock, LW_EXCLUSIVE);
/* Transfer stats counts into pending pgstats message */
@@ -1327,12 +1314,25 @@ AbsorbFsyncRequests(void)
CheckpointerShmem->num_backend_writes = 0;
CheckpointerShmem->num_backend_fsync = 0;
+ /*
+ * We try to avoid holding the lock for a long time by copying the request
+ * array, and processing the requests after releasing the lock.
+ *
+ * Once we have cleared the requests from shared memory, we have to PANIC
+ * if we then fail to absorb them (eg, because our hashtable runs out of
+ * memory). This is because the system cannot run safely if we are unable
+ * to fsync what we have been told to fsync. Fortunately, the hashtable
+ * is so small that the problem is quite unlikely to arise in practice.
+ */
n = CheckpointerShmem->num_requests;
if (n > 0)
{
requests = (CheckpointerRequest *) palloc(n * sizeof(CheckpointerRequest));
memcpy(requests, CheckpointerShmem->requests, n * sizeof(CheckpointerRequest));
}
+
+ START_CRIT_SECTION();
+
CheckpointerShmem->num_requests = 0;
LWLockRelease(CheckpointerCommLock);
@@ -1340,10 +1340,10 @@ AbsorbFsyncRequests(void)
for (request = requests; n > 0; request++, n--)
RememberFsyncRequest(request->rnode, request->forknum, request->segno);
+ END_CRIT_SECTION();
+
if (requests)
pfree(requests);
-
- END_CRIT_SECTION();
}
/*
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index d23ac62bf84..a62af27d22c 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -104,8 +104,8 @@ typedef struct lwlock_stats
int spin_delay_count;
} lwlock_stats;
-static int counts_for_pid = 0;
static HTAB *lwlock_stats_htab;
+static lwlock_stats lwlock_stats_dummy;
#endif
#ifdef LOCK_DEBUG
@@ -142,21 +142,39 @@ static void
init_lwlock_stats(void)
{
HASHCTL ctl;
+ static MemoryContext lwlock_stats_cxt = NULL;
+ static bool exit_registered = false;
- if (lwlock_stats_htab != NULL)
- {
- hash_destroy(lwlock_stats_htab);
- lwlock_stats_htab = NULL;
- }
+ if (lwlock_stats_cxt != NULL)
+ MemoryContextDelete(lwlock_stats_cxt);
+
+ /*
+ * The LWLock stats will be updated within a critical section, which
+ * requires allocating new hash entries. Allocations within a critical
+ * section are normally not allowed because running out of memory would
+ * lead to a PANIC, but LWLOCK_STATS is debugging code that's not normally
+ * turned on in production, so that's an acceptable risk. The hash entries
+ * are small, so the risk of running out of memory is minimal in practice.
+ */
+ lwlock_stats_cxt = AllocSetContextCreate(TopMemoryContext,
+ "LWLock stats",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ MemoryContextAllowInCriticalSection(lwlock_stats_cxt, true);
MemSet(&ctl, 0, sizeof(ctl));
ctl.keysize = sizeof(lwlock_stats_key);
ctl.entrysize = sizeof(lwlock_stats);
ctl.hash = tag_hash;
+ ctl.hcxt = lwlock_stats_cxt;
lwlock_stats_htab = hash_create("lwlock stats", 16384, &ctl,
- HASH_ELEM | HASH_FUNCTION);
- counts_for_pid = MyProcPid;
- on_shmem_exit(print_lwlock_stats, 0);
+ HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
+ if (!exit_registered)
+ {
+ on_shmem_exit(print_lwlock_stats, 0);
+ exit_registered = true;
+ }
}
static void
@@ -190,9 +208,13 @@ get_lwlock_stats_entry(LWLock *lock)
lwlock_stats *lwstats;
bool found;
- /* Set up local count state first time through in a given process */
- if (counts_for_pid != MyProcPid)
- init_lwlock_stats();
+ /*
+ * During shared memory initialization, the hash table doesn't exist yet.
+ * Stats of that phase aren't very interesting, so just collect operations
+ * on all locks in a single dummy entry.
+ */
+ if (lwlock_stats_htab == NULL)
+ return &lwlock_stats_dummy;
/* Fetch or create the entry. */
key.tranche = lock->tranche;
@@ -361,6 +383,16 @@ CreateLWLocks(void)
LWLockRegisterTranche(0, &MainLWLockTranche);
}
+/*
+ * InitLWLockAccess - initialize backend-local state needed to hold LWLocks
+ */
+void
+InitLWLockAccess(void)
+{
+#ifdef LWLOCK_STATS
+ init_lwlock_stats();
+#endif
+}
/*
* LWLockAssign - assign a dynamically-allocated LWLock number
diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index dfaf10e4b5c..ea88a241443 100644
--- a/src/backend/storage/lmgr/proc.c
+++ b/src/backend/storage/lmgr/proc.c
@@ -411,8 +411,9 @@ InitProcess(void)
/*
* Now that we have a PGPROC, we could try to acquire locks, so initialize
- * the deadlock checker.
+ * local state needed for LWLocks, and the deadlock checker.
*/
+ InitLWLockAccess();
InitDeadLockChecking();
}
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 3c1c81a7295..167d61c1afe 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -115,7 +115,7 @@ typedef struct _MdfdVec
struct _MdfdVec *mdfd_chain; /* next segment, or NULL */
} MdfdVec;
-static MemoryContext MdCxt; /* context for all md.c allocations */
+static MemoryContext MdCxt; /* context for all MdfdVec objects */
/*
@@ -157,6 +157,7 @@ typedef struct
static HTAB *pendingOpsTable = NULL;
static List *pendingUnlinks = NIL;
+static MemoryContext pendingOpsCxt; /* context for the above */
static CycleCtr mdsync_cycle_ctr = 0;
static CycleCtr mdckpt_cycle_ctr = 0;
@@ -209,11 +210,27 @@ mdinit(void)
{
HASHCTL hash_ctl;
+ /*
+ * XXX: The checkpointer needs to add entries to the pending ops table
+ * when absorbing fsync requests. That is done within a critical
+ * section, which isn't usually allowed, but we make an exception.
+ * It means that there's a theoretical possibility that you run out of
+ * memory while absorbing fsync requests, which leads to a PANIC.
+ * Fortunately the hash table is small so that's unlikely to happen in
+ * practice.
+ */
+ pendingOpsCxt = AllocSetContextCreate(MdCxt,
+ "Pending Ops Context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ MemoryContextAllowInCriticalSection(pendingOpsCxt, true);
+
MemSet(&hash_ctl, 0, sizeof(hash_ctl));
hash_ctl.keysize = sizeof(RelFileNode);
hash_ctl.entrysize = sizeof(PendingOperationEntry);
hash_ctl.hash = tag_hash;
- hash_ctl.hcxt = MdCxt;
+ hash_ctl.hcxt = pendingOpsCxt;
pendingOpsTable = hash_create("Pending Ops Table",
100L,
&hash_ctl,
@@ -1516,7 +1533,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
else if (segno == UNLINK_RELATION_REQUEST)
{
/* Unlink request: put it in the linked list */
- MemoryContext oldcxt = MemoryContextSwitchTo(MdCxt);
+ MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
PendingUnlinkEntry *entry;
/* PendingUnlinkEntry doesn't store forknum, since it's always MAIN */
@@ -1533,7 +1550,7 @@ RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
else
{
/* Normal case: enter a request to fsync this segment */
- MemoryContext oldcxt = MemoryContextSwitchTo(MdCxt);
+ MemoryContext oldcxt = MemoryContextSwitchTo(pendingOpsCxt);
PendingOperationEntry *entry;
bool found;
diff --git a/src/backend/utils/mmgr/mcxt.c b/src/backend/utils/mmgr/mcxt.c
index e83e76dc0f3..4185a03e9ff 100644
--- a/src/backend/utils/mmgr/mcxt.c
+++ b/src/backend/utils/mmgr/mcxt.c
@@ -60,15 +60,9 @@ static void MemoryContextStatsInternal(MemoryContext context, int level);
* You should not do memory allocations within a critical section, because
* an out-of-memory error will be escalated to a PANIC. To enforce that
* rule, the allocation functions Assert that.
- *
- * There are a two exceptions: 1) error recovery uses ErrorContext, which
- * has some memory set aside so that you don't run out. And 2) checkpointer
- * currently just hopes for the best, which is wrong and ought to be fixed,
- * but it's a known issue so let's not complain about in the meanwhile.
*/
#define AssertNotInCriticalSection(context) \
- Assert(CritSectionCount == 0 || (context) == ErrorContext || \
- AmCheckpointerProcess())
+ Assert(CritSectionCount == 0 || (context)->allowInCritSection)
/*****************************************************************************
* EXPORTED ROUTINES *
@@ -120,7 +114,10 @@ MemoryContextInit(void)
* require it to contain at least 8K at all times. This is the only case
* where retained memory in a context is *essential* --- we want to be
* sure ErrorContext still has some memory even if we've run out
- * elsewhere!
+ * elsewhere! Also, allow allocations in ErrorContext within a critical
+ * section. Otherwise a PANIC will cause an assertion failure in the
+ * error reporting code, before printing out the real cause of the
+ * failure.
*
* This should be the last step in this function, as elog.c assumes memory
* management works once ErrorContext is non-null.
@@ -130,6 +127,7 @@ MemoryContextInit(void)
8 * 1024,
8 * 1024,
8 * 1024);
+ MemoryContextAllowInCriticalSection(ErrorContext, true);
}
/*
@@ -306,6 +304,26 @@ MemoryContextSetParent(MemoryContext context, MemoryContext new_parent)
}
/*
+ * MemoryContextAllowInCriticalSection
+ * Allow/disallow allocations in this memory context within a critical
+ * section.
+ *
+ * Normally, memory allocations are not allowed within a critical section,
+ * because a failure would lead to PANIC. There are a few exceptions to
+ * that, like allocations related to debugging code that is not supposed to
+ * be enabled in production. This function can be used to exempt specific
+ * memory contexts from the assertion in palloc().
+ */
+void
+MemoryContextAllowInCriticalSection(MemoryContext context, bool allow)
+{
+ AssertArg(MemoryContextIsValid(context));
+#ifdef USE_ASSERT_CHECKING
+ context->allowInCritSection = allow;
+#endif
+}
+
+/*
* GetMemoryChunkSpace
* Given a currently-allocated chunk, determine the total space
* it occupies (including all memory-allocation overhead).
@@ -533,6 +551,7 @@ MemoryContextCreate(NodeTag tag, Size size,
MemoryContext node;
Size needed = size + strlen(name) + 1;
+ /* creating new memory contexts is not allowed in a critical section */
Assert(CritSectionCount == 0);
/* Get space for node and name */
@@ -570,6 +589,11 @@ MemoryContextCreate(NodeTag tag, Size size,
node->parent = parent;
node->nextchild = parent->firstchild;
parent->firstchild = node;
+
+#ifdef USE_ASSERT_CHECKING
+ /* inherit allowInCritSection flag from parent */
+ node->allowInCritSection = parent->allowInCritSection;
+#endif
}
VALGRIND_CREATE_MEMPOOL(node, 0, false);
diff --git a/src/include/nodes/memnodes.h b/src/include/nodes/memnodes.h
index f79ebd423fa..ad77509b0ca 100644
--- a/src/include/nodes/memnodes.h
+++ b/src/include/nodes/memnodes.h
@@ -60,6 +60,9 @@ typedef struct MemoryContextData
MemoryContext nextchild; /* next child of same parent */
char *name; /* context name (just for debugging) */
bool isReset; /* T = no space alloced since last reset */
+#ifdef USE_ASSERT_CHECKING
+ bool allowInCritSection; /* allow palloc in critical section */
+#endif
} MemoryContextData;
/* utils/palloc.h contains typedef struct MemoryContextData *MemoryContext */
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index d588b1434e2..1d90b9f8586 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -182,6 +182,7 @@ extern void LWLockUpdateVar(LWLock *lock, uint64 *valptr, uint64 value);
extern Size LWLockShmemSize(void);
extern void CreateLWLocks(void);
+extern void InitLWLockAccess(void);
/*
* The traditional method for obtaining an lwlock for use by an extension is
diff --git a/src/include/utils/memutils.h b/src/include/utils/memutils.h
index 59d0aecfbbc..2fede860276 100644
--- a/src/include/utils/memutils.h
+++ b/src/include/utils/memutils.h
@@ -101,6 +101,8 @@ extern MemoryContext GetMemoryChunkContext(void *pointer);
extern MemoryContext MemoryContextGetParent(MemoryContext context);
extern bool MemoryContextIsEmpty(MemoryContext context);
extern void MemoryContextStats(MemoryContext context);
+extern void MemoryContextAllowInCriticalSection(MemoryContext context,
+ bool allow);
#ifdef MEMORY_CONTEXT_CHECKING
extern void MemoryContextCheck(MemoryContext context);