aboutsummaryrefslogtreecommitdiff
path: root/src/backend/postmaster/bgwriter.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/postmaster/bgwriter.c')
-rw-r--r--src/backend/postmaster/bgwriter.c456
1 files changed, 456 insertions, 0 deletions
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
new file mode 100644
index 00000000000..ce80a4feff7
--- /dev/null
+++ b/src/backend/postmaster/bgwriter.c
@@ -0,0 +1,456 @@
+/*-------------------------------------------------------------------------
+ *
+ * bgwriter.c
+ *
+ * The background writer (bgwriter) is new in Postgres 7.5. It attempts
+ * to keep regular backends from having to write out dirty shared buffers
+ * (which they would only do when needing to free a shared buffer to read in
+ * another page). In the best scenario all writes from shared buffers will
+ * be issued by the background writer process. However, regular backends are
+ * still empowered to issue writes if the bgwriter fails to maintain enough
+ * clean shared buffers.
+ *
+ * The bgwriter is also charged with handling all checkpoints. It will
+ * automatically dispatch a checkpoint after a certain amount of time has
+ * elapsed since the last one, and it can be signaled to perform requested
+ * checkpoints as well. (The GUC parameter that mandates a checkpoint every
+ * so many WAL segments is implemented by having backends signal the bgwriter
+ * when they fill WAL segments; the bgwriter itself doesn't watch for the
+ * condition.)
+ *
+ * The bgwriter is started by the postmaster as soon as the startup subprocess
+ * finishes. It remains alive until the postmaster commands it to terminate.
+ * Normal termination is by SIGUSR2, which instructs the bgwriter to execute
+ * a shutdown checkpoint and then exit(0). (All backends must be stopped
+ * before SIGUSR2 is issued!) Emergency termination is by SIGQUIT; like any
+ * backend, the bgwriter will simply abort and exit on SIGQUIT.
+ *
+ * If the bgwriter exits unexpectedly, the postmaster treats that the same
+ * as a backend crash: shared memory may be corrupted, so remaining backends
+ * should be killed by SIGQUIT and then a recovery cycle started.
+ *
+ *
+ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.1 2004/05/29 22:48:19 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <signal.h>
+
+#include "access/xlog.h"
+#include "libpq/pqsignal.h"
+#include "miscadmin.h"
+#include "postmaster/bgwriter.h"
+#include "storage/bufmgr.h"
+#include "storage/freespace.h"
+#include "storage/ipc.h"
+#include "storage/pmsignal.h"
+#include "storage/smgr.h"
+#include "tcop/tcopprot.h"
+#include "utils/guc.h"
+
+
+/*
+ * Shared memory area for communication between bgwriter and backends
+ */
+typedef struct
+{
+ pid_t bgwriter_pid; /* PID of bgwriter (0 if not started) */
+ sig_atomic_t checkpoint_count; /* advances when checkpoint done */
+} BgWriterShmemStruct;
+
+static BgWriterShmemStruct *BgWriterShmem;
+
+/*
+ * GUC parameters
+ */
+int BgWriterDelay = 200;
+int BgWriterPercent = 1;
+int BgWriterMaxPages = 100;
+
+int CheckPointTimeout = 300;
+int CheckPointWarning = 30;
+
+/*
+ * Flags set by interrupt handlers for later service in the main loop.
+ */
+static volatile sig_atomic_t got_SIGHUP = false;
+static volatile sig_atomic_t checkpoint_requested = false;
+static volatile sig_atomic_t shutdown_requested = false;
+
+/*
+ * Private state
+ */
+static time_t last_checkpoint_time;
+
+
+static void bg_quickdie(SIGNAL_ARGS);
+static void BgSigHupHandler(SIGNAL_ARGS);
+static void ReqCheckpointHandler(SIGNAL_ARGS);
+static void ReqShutdownHandler(SIGNAL_ARGS);
+
+
+/*
+ * Main entry point for bgwriter process
+ *
+ * This is invoked from BootstrapMain, which has already created the basic
+ * execution environment, but not enabled signals yet.
+ */
+void
+BackgroundWriterMain(void)
+{
+ Assert(BgWriterShmem != NULL);
+ BgWriterShmem->bgwriter_pid = MyProcPid;
+
+ /*
+ * Properly accept or ignore signals the postmaster might send us
+ *
+ * Note: we deliberately ignore SIGTERM, because during a standard Unix
+ * system shutdown cycle, init will SIGTERM all processes at once. We
+ * want to wait for the backends to exit, whereupon the postmaster will
+ * tell us it's okay to shut down (via SIGUSR2).
+ *
+ * SIGUSR1 is presently unused; keep it spare in case someday we want
+ * this process to participate in sinval messaging.
+ */
+ pqsignal(SIGHUP, BgSigHupHandler); /* set flag to read config file */
+ pqsignal(SIGINT, ReqCheckpointHandler); /* request checkpoint */
+ pqsignal(SIGTERM, SIG_IGN); /* ignore SIGTERM */
+ pqsignal(SIGQUIT, bg_quickdie); /* hard crash time */
+ pqsignal(SIGALRM, SIG_IGN);
+ pqsignal(SIGPIPE, SIG_IGN);
+ pqsignal(SIGUSR1, SIG_IGN); /* reserve for sinval */
+ pqsignal(SIGUSR2, ReqShutdownHandler); /* request shutdown */
+
+ /*
+ * Reset some signals that are accepted by postmaster but not here
+ */
+ pqsignal(SIGCHLD, SIG_DFL);
+ pqsignal(SIGTTIN, SIG_DFL);
+ pqsignal(SIGTTOU, SIG_DFL);
+ pqsignal(SIGCONT, SIG_DFL);
+ pqsignal(SIGWINCH, SIG_DFL);
+
+ /* We allow SIGQUIT (quickdie) at all times */
+#ifdef HAVE_SIGPROCMASK
+ sigdelset(&BlockSig, SIGQUIT);
+#else
+ BlockSig &= ~(sigmask(SIGQUIT));
+#endif
+
+ /*
+ * Initialize so that first time-driven checkpoint happens
+ * at the correct time.
+ */
+ last_checkpoint_time = time(NULL);
+
+ /*
+ * If an exception is encountered, processing resumes here.
+ */
+ if (sigsetjmp(Warn_restart, 1) != 0)
+ {
+ /*
+ * Make sure we're not interrupted while cleaning up. Also forget
+ * any pending QueryCancel request, since we're aborting anyway.
+ * Force InterruptHoldoffCount to a known state in case we
+ * ereport'd from inside a holdoff section.
+ */
+ ImmediateInterruptOK = false;
+ QueryCancelPending = false;
+ InterruptHoldoffCount = 1;
+ CritSectionCount = 0; /* should be unnecessary, but... */
+
+ /*
+ * These operations are really just a minimal subset of
+ * AbortTransaction(). We don't have very many resources
+ * to worry about in bgwriter, but we do have LWLocks and buffers.
+ */
+ LWLockReleaseAll();
+ AbortBufferIO();
+ UnlockBuffers();
+
+ /*
+ * Clear flag to indicate that we got out of error recovery mode
+ * successfully. (Flag was set in elog.c before longjmp().)
+ */
+ InError = false;
+
+ /*
+ * Exit interrupt holdoff section we implicitly established above.
+ */
+ RESUME_INTERRUPTS();
+
+ /*
+ * Sleep at least 1 second after any error. A write error is
+ * likely to be repeated, and we don't want to be filling the
+ * error logs as fast as we can. (XXX think about ways to make
+ * progress when the LRU dirty buffer cannot be written...)
+ */
+ pg_usleep(1000000L);
+ }
+
+ Warn_restart_ready = true; /* we can now handle ereport(ERROR) */
+
+ /*
+ * Unblock signals (they were blocked when the postmaster forked us)
+ */
+ PG_SETMASK(&UnBlockSig);
+
+ /*
+ * Loop forever
+ */
+ for (;;)
+ {
+ bool do_checkpoint = false;
+ bool force_checkpoint = false;
+ time_t now;
+ int elapsed_secs;
+ int n;
+ long udelay;
+
+ /*
+ * Process any signals received recently.
+ */
+ if (got_SIGHUP)
+ {
+ got_SIGHUP = false;
+ ProcessConfigFile(PGC_SIGHUP);
+ }
+ if (checkpoint_requested)
+ {
+ checkpoint_requested = false;
+ do_checkpoint = true;
+ force_checkpoint = true;
+ }
+ if (shutdown_requested)
+ {
+ ShutdownXLOG(0, 0);
+ DumpFreeSpaceMap(0, 0);
+ /* Normal exit from the bgwriter is here */
+ proc_exit(0); /* done */
+ }
+
+ /*
+ * Do an unforced checkpoint if too much time has elapsed
+ * since the last one.
+ */
+ now = time(NULL);
+ elapsed_secs = now - last_checkpoint_time;
+ if (elapsed_secs >= CheckPointTimeout)
+ do_checkpoint = true;
+
+ /*
+ * Do a checkpoint if requested, otherwise do one cycle of
+ * dirty-buffer writing.
+ */
+ if (do_checkpoint)
+ {
+ if (CheckPointWarning != 0)
+ {
+ /*
+ * Ideally we should only warn if this checkpoint was
+ * requested due to running out of segment files, and not
+ * if it was manually requested. However we can't tell the
+ * difference with the current signalling mechanism.
+ */
+ if (elapsed_secs < CheckPointWarning)
+ ereport(LOG,
+ (errmsg("checkpoints are occurring too frequently (%d seconds apart)",
+ elapsed_secs),
+ errhint("Consider increasing the configuration parameter \"checkpoint_segments\".")));
+ }
+
+ CreateCheckPoint(false, force_checkpoint);
+
+ /*
+ * Note we record the checkpoint start time not end time as
+ * last_checkpoint_time. This is so that time-driven checkpoints
+ * happen at a predictable spacing.
+ */
+ last_checkpoint_time = now;
+
+ /*
+ * Indicate checkpoint completion to any waiting backends.
+ */
+ BgWriterShmem->checkpoint_count++;
+
+ /*
+ * After any checkpoint, close all smgr files. This is so we
+ * won't hang onto smgr references to deleted files indefinitely.
+ */
+ smgrcloseall();
+
+ /* Nap for configured time before rechecking */
+ n = 1;
+ }
+ else
+ {
+ n = BufferSync(BgWriterPercent, BgWriterMaxPages);
+ }
+
+ /*
+ * Nap for the configured time or sleep for 10 seconds if
+ * there was nothing to do at all.
+ *
+ * On some platforms, signals won't interrupt the sleep. To ensure
+ * we respond reasonably promptly when someone signals us,
+ * break down the sleep into 1-second increments, and check for
+ * interrupts after each nap.
+ */
+ udelay = ((n > 0) ? BgWriterDelay : 10000) * 1000L;
+ while (udelay > 1000000L)
+ {
+ if (got_SIGHUP || checkpoint_requested || shutdown_requested)
+ break;
+ pg_usleep(1000000L);
+ udelay -= 1000000L;
+ }
+ if (!(got_SIGHUP || checkpoint_requested || shutdown_requested))
+ pg_usleep(udelay);
+
+ /*
+ * Emergency bailout if postmaster has died. This is to avoid the
+ * necessity for manual cleanup of all postmaster children.
+ */
+ if (!PostmasterIsAlive(true))
+ exit(1);
+ }
+}
+
+
+/* --------------------------------
+ * signal handler routines
+ * --------------------------------
+ */
+
+/*
+ * bg_quickdie() occurs when signalled SIGQUIT by the postmaster.
+ *
+ * Some backend has bought the farm,
+ * so we need to stop what we're doing and exit.
+ */
+static void
+bg_quickdie(SIGNAL_ARGS)
+{
+ PG_SETMASK(&BlockSig);
+
+ /*
+ * DO NOT proc_exit() -- we're here because shared memory may be
+ * corrupted, so we don't want to try to clean up our transaction.
+ * Just nail the windows shut and get out of town.
+ *
+ * Note we do exit(1) not exit(0). This is to force the postmaster into
+ * a system reset cycle if some idiot DBA sends a manual SIGQUIT to a
+ * random backend. This is necessary precisely because we don't clean
+ * up our shared memory state.
+ */
+ exit(1);
+}
+
+/* SIGHUP: set flag to re-read config file at next convenient time */
+static void
+BgSigHupHandler(SIGNAL_ARGS)
+{
+ got_SIGHUP = true;
+}
+
+/* SIGINT: set flag to run a normal checkpoint right away */
+static void
+ReqCheckpointHandler(SIGNAL_ARGS)
+{
+ checkpoint_requested = true;
+}
+
+/* SIGUSR2: set flag to run a shutdown checkpoint and exit */
+static void
+ReqShutdownHandler(SIGNAL_ARGS)
+{
+ shutdown_requested = true;
+}
+
+
+/* --------------------------------
+ * communication with backends
+ * --------------------------------
+ */
+
+/*
+ * BgWriterShmemSize
+ * Compute space needed for bgwriter-related shared memory
+ */
+int
+BgWriterShmemSize(void)
+{
+ /*
+ * This is not worth measuring right now, but may become so after we
+ * add fsync signaling ...
+ */
+ return MAXALIGN(sizeof(BgWriterShmemStruct));
+}
+
+/*
+ * BgWriterShmemInit
+ * Allocate and initialize bgwriter-related shared memory
+ */
+void
+BgWriterShmemInit(void)
+{
+ bool found;
+
+ BgWriterShmem = (BgWriterShmemStruct *)
+ ShmemInitStruct("Background Writer Data",
+ sizeof(BgWriterShmemStruct),
+ &found);
+ if (BgWriterShmem == NULL)
+ ereport(FATAL,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("insufficient shared memory for bgwriter")));
+ if (found)
+ return; /* already initialized */
+
+ MemSet(BgWriterShmem, 0, sizeof(BgWriterShmemStruct));
+}
+
+/*
+ * RequestCheckpoint
+ * Called in backend processes to request an immediate checkpoint
+ *
+ * If waitforit is true, wait until the checkpoint is completed
+ * before returning; otherwise, just signal the request and return
+ * immediately.
+ */
+void
+RequestCheckpoint(bool waitforit)
+{
+ volatile sig_atomic_t *count_ptr = &BgWriterShmem->checkpoint_count;
+ sig_atomic_t old_count = *count_ptr;
+
+ /*
+ * Send signal to request checkpoint. When waitforit is false,
+ * we consider failure to send the signal to be nonfatal.
+ */
+ if (BgWriterShmem->bgwriter_pid == 0)
+ elog(waitforit ? ERROR : LOG,
+ "could not request checkpoint because bgwriter not running");
+ if (kill(BgWriterShmem->bgwriter_pid, SIGINT) != 0)
+ elog(waitforit ? ERROR : LOG,
+ "could not signal for checkpoint: %m");
+
+ /*
+ * If requested, wait for completion. We detect completion by
+ * observing a change in checkpoint_count in shared memory.
+ */
+ if (waitforit)
+ {
+ while (*count_ptr == old_count)
+ {
+ CHECK_FOR_INTERRUPTS();
+ pg_usleep(1000000L);
+ }
+ }
+}