aboutsummaryrefslogtreecommitdiff
path: root/src/backend/postmaster/postmaster.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/postmaster/postmaster.c')
-rw-r--r--src/backend/postmaster/postmaster.c811
1 files changed, 329 insertions, 482 deletions
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 89a22815098..e7186c01b39 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -37,14 +37,13 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.399 2004/05/28 15:14:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/postmaster/postmaster.c,v 1.400 2004/05/29 22:48:19 tgl Exp $
*
* NOTES
*
* Initialization:
- * The Postmaster sets up a few shared memory data structures
- * for the backends. It should at the very least initialize the
- * lock manager.
+ * The Postmaster sets up shared memory data structures
+ * for the backends.
*
* Synchronization:
* The Postmaster shares memory with the backends but should avoid
@@ -97,6 +96,7 @@
#include "libpq/pqsignal.h"
#include "miscadmin.h"
#include "nodes/nodes.h"
+#include "postmaster/postmaster.h"
#include "pgtime.h"
#include "storage/fd.h"
#include "storage/ipc.h"
@@ -113,22 +113,14 @@
#include "pgstat.h"
-#ifdef HAVE_SIGPROCMASK
-sigset_t UnBlockSig,
- BlockSig,
- AuthBlockSig;
-
-#else
-int UnBlockSig,
- BlockSig,
- AuthBlockSig;
-#endif
-
/*
* List of active backends (or child processes anyway; we don't actually
* know whether a given child has become a backend or is still in the
* authorization phase). This is used mainly to keep track of how many
* children we have and send them appropriate signals when necessary.
+ *
+ * "Special" children such as the startup and bgwriter tasks are not in
+ * this list.
*/
typedef struct bkend
{
@@ -149,15 +141,6 @@ char *UnixSocketDir;
char *ListenAddresses;
/*
- * MaxBackends is the limit on the number of backends we can start.
- * Note that a larger MaxBackends value will increase the size of the
- * shared memory area as well as cause the postmaster to grab more
- * kernel semaphores, even if you never actually use that many
- * backends.
- */
-int MaxBackends;
-
-/*
* ReservedBackends is the number of backends reserved for superuser use.
* This number is taken out of the pool size given by MaxBackends so
* number of backend slots available to non-superusers is
@@ -196,9 +179,6 @@ bool SilentMode = false; /* silent mode (-S) */
int PreAuthDelay = 0;
int AuthenticationTimeout = 60;
-int CheckPointTimeout = 300;
-int CheckPointWarning = 30;
-time_t LastSignalledCheckpoint = 0;
bool log_hostname; /* for ps display and logging */
bool Log_connections = false;
@@ -209,13 +189,11 @@ char *rendezvous_name;
/* list of library:init-function to be preloaded */
char *preload_libraries_string = NULL;
-/* Startup/shutdown state */
+/* PIDs of special child processes; 0 when not running */
static pid_t StartupPID = 0,
- ShutdownPID = 0,
- CheckPointPID = 0,
BgWriterPID = 0;
-static time_t checkpointed = 0;
+/* Startup/shutdown state */
#define NoShutdown 0
#define SmartShutdown 1
#define FastShutdown 2
@@ -232,7 +210,6 @@ bool ClientAuthInProgress = false; /* T during new-client
* Also, the global MyCancelKey passes the cancel key assigned to a given
* backend from the postmaster to that backend (via fork).
*/
-
static unsigned int random_seed = 0;
static int debug_flag = 0;
@@ -263,6 +240,7 @@ static void reaper(SIGNAL_ARGS);
static void sigusr1_handler(SIGNAL_ARGS);
static void dummy_handler(SIGNAL_ARGS);
static void CleanupProc(int pid, int exitstatus);
+static void HandleChildCrash(int pid, int exitstatus);
static void LogChildExit(int lev, const char *procname,
int pid, int exitstatus);
static int BackendRun(Port *port);
@@ -280,7 +258,7 @@ static void RandomSalt(char *cryptSalt, char *md5Salt);
static void SignalChildren(int signal);
static int CountChildren(void);
static bool CreateOptsFile(int argc, char *argv[], char *fullprogname);
-static pid_t SSDataBase(int xlop);
+static pid_t StartChildProcess(int xlop);
static void
postmaster_error(const char *fmt,...)
/* This lets gcc check the format string for consistency. */
@@ -311,10 +289,8 @@ static void ShmemBackendArrayRemove(pid_t pid);
#endif /* EXEC_BACKEND */
-#define StartupDataBase() SSDataBase(BS_XLOG_STARTUP)
-#define CheckPointDataBase() SSDataBase(BS_XLOG_CHECKPOINT)
-#define StartBackgroundWriter() SSDataBase(BS_XLOG_BGWRITER)
-#define ShutdownDataBase() SSDataBase(BS_XLOG_SHUTDOWN)
+#define StartupDataBase() StartChildProcess(BS_XLOG_STARTUP)
+#define StartBackgroundWriter() StartChildProcess(BS_XLOG_BGWRITER)
/*
@@ -325,14 +301,13 @@ PostmasterMain(int argc, char *argv[])
{
int opt;
int status;
- char original_extraoptions[MAXPGPATH];
char *potential_DataDir = NULL;
int i;
- *original_extraoptions = '\0';
-
progname = get_progname(argv[0]);
+ MyProcPid = PostmasterPid = getpid();
+
IsPostmasterEnvironment = true;
/*
@@ -359,8 +334,6 @@ PostmasterMain(int argc, char *argv[])
*/
umask((mode_t) 0077);
- MyProcPid = PostmasterPid = getpid();
-
/*
* Fire up essential subsystems: memory management
*/
@@ -470,12 +443,10 @@ PostmasterMain(int argc, char *argv[])
case 'o':
/*
- * Other options to pass to the backend on the command
- * line -- useful only for debugging.
+ * Other options to pass to the backend on the command line
*/
strcat(ExtraOptions, " ");
strcat(ExtraOptions, optarg);
- strcpy(original_extraoptions, optarg);
break;
case 'p':
SetConfigOption("port", optarg, PGC_POSTMASTER, PGC_S_ARGV);
@@ -656,7 +627,7 @@ PostmasterMain(int argc, char *argv[])
* We want to do this before we try to grab the input sockets, because
* the data directory interlock is more reliable than the socket-file
* interlock (thanks to whoever decided to put socket files in /tmp
- * :-(). For the same reason, it's best to grab the TCP socket before
+ * :-(). For the same reason, it's best to grab the TCP socket(s) before
* the Unix socket.
*/
CreateDataDirLockFile(DataDir, true);
@@ -766,12 +737,13 @@ PostmasterMain(int argc, char *argv[])
BackendList = DLNewList();
#ifdef WIN32
-
/*
- * Initialize the child pid/HANDLE arrays
+ * Initialize the child pid/HANDLE arrays for signal handling.
*/
- win32_childPIDArray = (pid_t *) malloc(NUM_BACKENDARRAY_ELEMS * sizeof(pid_t));
- win32_childHNDArray = (HANDLE *) malloc(NUM_BACKENDARRAY_ELEMS * sizeof(HANDLE));
+ win32_childPIDArray = (pid_t *)
+ malloc(NUM_BACKENDARRAY_ELEMS * sizeof(pid_t));
+ win32_childHNDArray = (HANDLE *)
+ malloc(NUM_BACKENDARRAY_ELEMS * sizeof(HANDLE));
if (!win32_childPIDArray || !win32_childHNDArray)
ereport(FATAL,
(errcode(ERRCODE_OUT_OF_MEMORY),
@@ -791,16 +763,16 @@ PostmasterMain(int argc, char *argv[])
*
* CAUTION: when changing this list, check for side-effects on the signal
* handling setup of child processes. See tcop/postgres.c,
- * bootstrap/bootstrap.c, and postmaster/pgstat.c.
+ * bootstrap/bootstrap.c, postmaster/bgwriter.c, and postmaster/pgstat.c.
*/
pqinitmask();
PG_SETMASK(&BlockSig);
pqsignal(SIGHUP, SIGHUP_handler); /* reread config file and have
* children do same */
- pqsignal(SIGINT, pmdie); /* send SIGTERM and ShutdownDataBase */
+ pqsignal(SIGINT, pmdie); /* send SIGTERM and shut down */
pqsignal(SIGQUIT, pmdie); /* send SIGQUIT and die */
- pqsignal(SIGTERM, pmdie); /* wait for children and ShutdownDataBase */
+ pqsignal(SIGTERM, pmdie); /* wait for children and shut down */
pqsignal(SIGALRM, SIG_IGN); /* ignored */
pqsignal(SIGPIPE, SIG_IGN); /* ignored */
pqsignal(SIGUSR1, sigusr1_handler); /* message from child process */
@@ -823,19 +795,6 @@ PostmasterMain(int argc, char *argv[])
whereToSendOutput = None;
/*
- * On many platforms, the first call of localtime() incurs significant
- * overhead to load timezone info from the system configuration files.
- * By doing it once in the postmaster, we avoid having to do it in
- * every started child process. The savings are not huge, but they
- * add up...
- */
- {
- time_t now = time(NULL);
-
- (void) pg_localtime(&now);
- }
-
- /*
* Initialize and try to startup the statistics collector process
*/
pgstat_init();
@@ -956,10 +915,7 @@ reg_reply(DNSServiceRegistrationReplyErrorType errorCode, void *context)
static void
pmdaemonize(void)
{
-#ifdef WIN32
- /* not supported */
- elog(FATAL, "SilentMode not supported under WIN32");
-#else
+#ifndef WIN32
int i;
pid_t pid;
@@ -989,7 +945,7 @@ pmdaemonize(void)
setitimer(ITIMER_PROF, &prof_itimer, NULL);
#endif
- MyProcPid = getpid(); /* reset MyProcPid to child */
+ MyProcPid = PostmasterPid = getpid(); /* reset PID vars to child */
/* GH: If there's no setsid(), we hopefully don't need silent mode.
* Until there's a better solution.
@@ -1007,7 +963,10 @@ pmdaemonize(void)
dup2(i, 1);
dup2(i, 2);
close(i);
-#endif
+#else /* WIN32 */
+ /* not supported */
+ elog(FATAL, "SilentMode not supported under WIN32");
+#endif /* WIN32 */
}
@@ -1053,19 +1012,21 @@ usage(const char *progname)
/*
- * Main loop of postmaster
+ * Main idle loop of postmaster
*/
static int
ServerLoop(void)
{
fd_set readmask;
int nSockets;
- struct timeval now,
+ time_t now,
+ last_touch_time;
+ struct timeval earlier,
later;
struct timezone tz;
- int i;
- gettimeofday(&now, &tz);
+ gettimeofday(&earlier, &tz);
+ last_touch_time = time(NULL);
nSockets = initMasks(&readmask);
@@ -1074,70 +1035,32 @@ ServerLoop(void)
Port *port;
fd_set rmask;
struct timeval timeout;
+ int selres;
+ int i;
/*
- * The timeout for the select() below is normally set on the basis
- * of the time to the next checkpoint. However, if for some
- * reason we don't have a next-checkpoint time, time out after 60
- * seconds. This keeps checkpoint scheduling from locking up when
- * we get new connection requests infrequently (since we are
- * likely to detect checkpoint completion just after enabling
- * signals below, after we've already made the decision about how
- * long to wait this time).
+ * Wait for something to happen.
+ *
+ * We wait at most one minute, to ensure that the other background
+ * tasks handled below get done even when no requests are arriving.
*/
+ memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
+
timeout.tv_sec = 60;
timeout.tv_usec = 0;
- if (CheckPointPID == 0 && checkpointed &&
- StartupPID == 0 && Shutdown == NoShutdown &&
- !FatalError && random_seed != 0)
- {
- time_t now = time(NULL);
-
- if (CheckPointTimeout + checkpointed > now)
- {
- /*
- * Not time for checkpoint yet, so set select timeout
- */
- timeout.tv_sec = CheckPointTimeout + checkpointed - now;
- }
- else
- {
- /* Time to make the checkpoint... */
- CheckPointPID = CheckPointDataBase();
-
- /*
- * if fork failed, schedule another try at 0.1 normal
- * delay
- */
- if (CheckPointPID == 0)
- {
- timeout.tv_sec = CheckPointTimeout / 10;
- checkpointed = now + timeout.tv_sec - CheckPointTimeout;
- }
- }
- }
+ PG_SETMASK(&UnBlockSig);
- /*
- * If no background writer process is running and we should do
- * background writing, start one. It doesn't matter if this fails,
- * we'll just try again later.
- */
- if (BgWriterPID == 0 && BgWriterPercent > 0 &&
- StartupPID == 0 && Shutdown == NoShutdown &&
- !FatalError && random_seed != 0)
- BgWriterPID = StartBackgroundWriter();
+ selres = select(nSockets, &rmask, NULL, NULL, &timeout);
/*
- * Wait for something to happen.
+ * Block all signals until we wait again. (This makes it safe for
+ * our signal handlers to do nontrivial work.)
*/
- memcpy((char *) &rmask, (char *) &readmask, sizeof(fd_set));
-
- PG_SETMASK(&UnBlockSig);
+ PG_SETMASK(&BlockSig);
- if (select(nSockets, &rmask, NULL, NULL, &timeout) < 0)
+ if (selres < 0)
{
- PG_SETMASK(&BlockSig);
if (errno == EINTR || errno == EWOULDBLOCK)
continue;
ereport(LOG,
@@ -1147,63 +1070,85 @@ ServerLoop(void)
}
/*
- * Block all signals until we wait again. (This makes it safe for
- * our signal handlers to do nontrivial work.)
- */
- PG_SETMASK(&BlockSig);
-
- /*
- * Select a random seed at the time of first receiving a request.
+ * New connection pending on any of our sockets? If so, fork a
+ * child process to deal with it.
*/
- while (random_seed == 0)
+ if (selres > 0)
{
- gettimeofday(&later, &tz);
-
/*
- * We are not sure how much precision is in tv_usec, so we
- * swap the nibbles of 'later' and XOR them with 'now'. On the
- * off chance that the result is 0, we loop until it isn't.
+ * Select a random seed at the time of first receiving a request.
*/
- random_seed = now.tv_usec ^
- ((later.tv_usec << 16) |
- ((later.tv_usec >> 16) & 0xffff));
- }
+ while (random_seed == 0)
+ {
+ gettimeofday(&later, &tz);
- /*
- * New connection pending on any of our sockets? If so, fork a
- * child process to deal with it.
- */
- for (i = 0; i < MAXLISTEN; i++)
- {
- if (ListenSocket[i] == -1)
- break;
- if (FD_ISSET(ListenSocket[i], &rmask))
+ /*
+ * We are not sure how much precision is in tv_usec, so we
+ * swap the nibbles of 'later' and XOR them with 'earlier'. On
+ * the off chance that the result is 0, we loop until it isn't.
+ */
+ random_seed = earlier.tv_usec ^
+ ((later.tv_usec << 16) |
+ ((later.tv_usec >> 16) & 0xffff));
+ }
+
+ for (i = 0; i < MAXLISTEN; i++)
{
- port = ConnCreate(ListenSocket[i]);
- if (port)
+ if (ListenSocket[i] == -1)
+ break;
+ if (FD_ISSET(ListenSocket[i], &rmask))
{
- BackendStartup(port);
-
- /*
- * We no longer need the open socket or port structure
- * in this process
- */
- StreamClose(port->sock);
- ConnFree(port);
+ port = ConnCreate(ListenSocket[i]);
+ if (port)
+ {
+ BackendStartup(port);
+
+ /*
+ * We no longer need the open socket or port structure
+ * in this process
+ */
+ StreamClose(port->sock);
+ ConnFree(port);
+ }
}
}
}
+ /*
+ * If no background writer process is running, and we are not in
+ * a state that prevents it, start one. It doesn't matter if this
+ * fails, we'll just try again later.
+ */
+ if (BgWriterPID == 0 && StartupPID == 0 && !FatalError)
+ {
+ BgWriterPID = StartBackgroundWriter();
+ /* If shutdown is pending, set it going */
+ if (Shutdown > NoShutdown && BgWriterPID != 0)
+ kill(BgWriterPID, SIGUSR2);
+ }
+
/* If we have lost the stats collector, try to start a new one */
if (!pgstat_is_running)
pgstat_start();
+
+ /*
+ * Touch the socket and lock file at least every ten minutes, to ensure
+ * that they are not removed by overzealous /tmp-cleaning tasks.
+ */
+ now = time(NULL);
+ if (now - last_touch_time >= 10 * 60)
+ {
+ TouchSocketFile();
+ TouchSocketLockFile();
+ last_touch_time = now;
+ }
}
}
/*
- * Initialise the masks for select() for the ports
- * we are listening on. Return the number of sockets to listen on.
+ * Initialise the masks for select() for the ports we are listening on.
+ * Return the number of sockets to listen on.
*/
static int
initMasks(fd_set *rmask)
@@ -1543,14 +1488,7 @@ processCancelRequest(Port *port, void *pkt)
backendPID = (int) ntohl(canc->backendPID);
cancelAuthCode = (long) ntohl(canc->cancelAuthCode);
- if (backendPID == CheckPointPID)
- {
- ereport(DEBUG2,
- (errmsg_internal("ignoring cancel request for checkpoint process %d",
- backendPID)));
- return;
- }
- else if (backendPID == BgWriterPID)
+ if (backendPID == BgWriterPID)
{
ereport(DEBUG2,
(errmsg_internal("ignoring cancel request for bgwriter process %d",
@@ -1561,7 +1499,7 @@ processCancelRequest(Port *port, void *pkt)
/*
* See if we have a matching backend. In the EXEC_BACKEND case, we
* can no longer access the postmaster's own backend list, and must
- * rely on the backup array in shared memory.
+ * rely on the duplicate array in shared memory.
*/
#ifndef EXEC_BACKEND
for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
@@ -1687,7 +1625,7 @@ ConnFree(Port *conn)
* them open, of course.
*/
void
-ClosePostmasterPorts(bool pgstat_too)
+ClosePostmasterPorts(void)
{
int i;
@@ -1700,10 +1638,6 @@ ClosePostmasterPorts(bool pgstat_too)
ListenSocket[i] = -1;
}
}
-
- /* Close pgstat control sockets, unless we're starting pgstat itself */
- if (pgstat_too)
- pgstat_close_sockets();
}
@@ -1741,6 +1675,8 @@ SIGHUP_handler(SIGNAL_ARGS)
(errmsg("received SIGHUP, reloading configuration files")));
ProcessConfigFile(PGC_SIGHUP);
SignalChildren(SIGHUP);
+ if (BgWriterPID != 0)
+ kill(BgWriterPID, SIGHUP);
load_hba();
load_ident();
@@ -1748,13 +1684,6 @@ SIGHUP_handler(SIGNAL_ARGS)
/* Update the starting-point file for future children */
write_nondefault_variables(PGC_SIGHUP);
#endif
-
- /*
- * Tell the background writer to terminate so that we will start a
- * new one with a possibly changed config
- */
- if (BgWriterPID != 0)
- kill(BgWriterPID, SIGTERM);
}
PG_SETMASK(&UnBlockSig);
@@ -1780,11 +1709,10 @@ pmdie(SIGNAL_ARGS)
switch (postgres_signal_arg)
{
case SIGTERM:
-
/*
* Smart Shutdown:
*
- * Wait for children to end their work and ShutdownDataBase.
+ * Wait for children to end their work, then shut down.
*/
if (Shutdown >= SmartShutdown)
break;
@@ -1792,36 +1720,28 @@ pmdie(SIGNAL_ARGS)
ereport(LOG,
(errmsg("received smart shutdown request")));
- /* Must tell bgwriter to quit, or it never will... */
- if (BgWriterPID != 0)
- kill(BgWriterPID, SIGTERM);
-
- if (DLGetHead(BackendList)) /* let reaper() handle this */
- break;
+ if (DLGetHead(BackendList))
+ break; /* let reaper() handle this */
/*
- * No children left. Shutdown data base system.
+ * No children left. Begin shutdown of data base system.
*/
- if (StartupPID > 0 || FatalError) /* let reaper() handle
- * this */
- break;
- if (ShutdownPID > 0)
- {
- elog(PANIC, "shutdown process %d already running",
- (int) ShutdownPID);
- abort();
- }
-
- ShutdownPID = ShutdownDataBase();
+ if (StartupPID != 0 || FatalError)
+ break; /* let reaper() handle this */
+ /* Start the bgwriter if not running */
+ if (BgWriterPID == 0)
+ BgWriterPID = StartBackgroundWriter();
+ /* And tell it to shut down */
+ if (BgWriterPID != 0)
+ kill(BgWriterPID, SIGUSR2);
break;
case SIGINT:
-
/*
* Fast Shutdown:
*
* Abort all children with SIGTERM (rollback active transactions
- * and exit) and ShutdownDataBase when they are gone.
+ * and exit) and shut down when they are gone.
*/
if (Shutdown >= FastShutdown)
break;
@@ -1842,33 +1762,34 @@ pmdie(SIGNAL_ARGS)
}
/*
- * No children left. Shutdown data base system.
+ * No children left. Begin shutdown of data base system.
*
- * Unlike the previous case, it is not an error for the shutdown
- * process to be running already (we could get SIGTERM
- * followed shortly later by SIGINT).
+ * Note: if we previously got SIGTERM then we may send SIGUSR2
+ * to the bgwriter a second time here. This should be harmless.
*/
- if (StartupPID > 0 || FatalError) /* let reaper() handle
- * this */
- break;
- if (ShutdownPID == 0)
- ShutdownPID = ShutdownDataBase();
+ if (StartupPID != 0 || FatalError)
+ break; /* let reaper() handle this */
+ /* Start the bgwriter if not running */
+ if (BgWriterPID == 0)
+ BgWriterPID = StartBackgroundWriter();
+ /* And tell it to shut down */
+ if (BgWriterPID != 0)
+ kill(BgWriterPID, SIGUSR2);
break;
case SIGQUIT:
-
/*
* Immediate Shutdown:
*
* abort all children with SIGQUIT and exit without attempt to
- * properly shutdown data base system.
+ * properly shut down data base system.
*/
ereport(LOG,
(errmsg("received immediate shutdown request")));
- if (ShutdownPID > 0)
- kill(ShutdownPID, SIGQUIT);
- if (StartupPID > 0)
+ if (StartupPID != 0)
kill(StartupPID, SIGQUIT);
+ if (BgWriterPID != 0)
+ kill(BgWriterPID, SIGQUIT);
if (DLGetHead(BackendList))
SignalChildren(SIGQUIT);
ExitPostmaster(0);
@@ -1939,22 +1860,11 @@ reaper(SIGNAL_ARGS)
}
/*
- * Check if this child was a shutdown or startup process.
+ * Check if this child was a startup process.
*/
- if (ShutdownPID > 0 && pid == ShutdownPID)
- {
- if (exitstatus != 0)
- {
- LogChildExit(LOG, gettext("shutdown process"),
- pid, exitstatus);
- ExitPostmaster(1);
- }
- /* Normal postmaster exit is here */
- ExitPostmaster(0);
- }
-
- if (StartupPID > 0 && pid == StartupPID)
+ if (StartupPID != 0 && pid == StartupPID)
{
+ StartupPID = 0;
if (exitstatus != 0)
{
LogChildExit(LOG, gettext("startup process"),
@@ -1963,7 +1873,6 @@ reaper(SIGNAL_ARGS)
(errmsg("aborting startup due to startup process failure")));
ExitPostmaster(1);
}
- StartupPID = 0;
/*
* Startup succeeded - we are done with system startup or recovery.
@@ -1971,33 +1880,51 @@ reaper(SIGNAL_ARGS)
FatalError = false;
/*
- * Arrange for first checkpoint to occur after standard delay.
+ * Crank up the background writer. It doesn't matter if this
+ * fails, we'll just try again later.
*/
- CheckPointPID = 0;
- checkpointed = time(NULL);
+ Assert(BgWriterPID == 0);
+ BgWriterPID = StartBackgroundWriter();
/*
* Go to shutdown mode if a shutdown request was pending.
*/
- if (Shutdown > NoShutdown)
+ if (Shutdown > NoShutdown && BgWriterPID != 0)
+ kill(BgWriterPID, SIGUSR2);
+
+ continue;
+ }
+
+ /*
+ * Was it the bgwriter?
+ */
+ if (BgWriterPID != 0 && pid == BgWriterPID)
+ {
+ if (exitstatus == 0 && Shutdown > NoShutdown &&
+ !FatalError && !DLGetHead(BackendList))
{
- if (ShutdownPID > 0)
- {
- elog(PANIC, "startup process %d died while shutdown process %d already running",
- pid, (int) ShutdownPID);
- abort();
- }
- ShutdownPID = ShutdownDataBase();
+ /*
+ * Normal postmaster exit is here: we've seen normal
+ * exit of the bgwriter after it's been told to shut down.
+ * We expect that it wrote a shutdown checkpoint. (If
+ * for some reason it didn't, recovery will occur on next
+ * postmaster start.)
+ */
+ ExitPostmaster(0);
}
-
- goto reaper_done;
+ /*
+ * Any unexpected exit of the bgwriter is treated as a crash.
+ */
+ LogChildExit(DEBUG2, gettext("background writer process"),
+ pid, exitstatus);
+ HandleChildCrash(pid, exitstatus);
+ continue;
}
/*
- * Else do standard child cleanup.
+ * Else do standard backend child cleanup.
*/
CleanupProc(pid, exitstatus);
-
} /* loop over pending child-death reports */
if (FatalError)
@@ -2006,7 +1933,7 @@ reaper(SIGNAL_ARGS)
* Wait for all children exit, then reset shmem and
* StartupDataBase.
*/
- if (DLGetHead(BackendList) || StartupPID > 0 || ShutdownPID > 0)
+ if (DLGetHead(BackendList) || StartupPID != 0 || BgWriterPID != 0)
goto reaper_done;
ereport(LOG,
(errmsg("all server processes terminated; reinitializing")));
@@ -2021,11 +1948,14 @@ reaper(SIGNAL_ARGS)
if (Shutdown > NoShutdown)
{
- if (DLGetHead(BackendList))
- goto reaper_done;
- if (StartupPID > 0 || ShutdownPID > 0)
+ if (DLGetHead(BackendList) || StartupPID != 0)
goto reaper_done;
- ShutdownPID = ShutdownDataBase();
+ /* Start the bgwriter if not running */
+ if (BgWriterPID == 0)
+ BgWriterPID = StartBackgroundWriter();
+ /* And tell it to shut down */
+ if (BgWriterPID != 0)
+ kill(BgWriterPID, SIGUSR2);
}
reaper_done:
@@ -2044,11 +1974,9 @@ static void
CleanupProc(int pid,
int exitstatus) /* child's exit status. */
{
- Dlelem *curr,
- *next;
- Backend *bp;
+ Dlelem *curr;
- LogChildExit(DEBUG2, gettext("child process"), pid, exitstatus);
+ LogChildExit(DEBUG2, gettext("server process"), pid, exitstatus);
/*
* If a backend dies in an ugly way (i.e. exit status not 0) then we
@@ -2056,61 +1984,81 @@ CleanupProc(int pid,
* we assume everything is hunky dory and simply remove the backend
* from the active backend list.
*/
- if (exitstatus == 0)
+ if (exitstatus != 0)
+ {
+ HandleChildCrash(pid, exitstatus);
+ return;
+ }
+
+ for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
{
- curr = DLGetHead(BackendList);
- while (curr)
+ Backend *bp = (Backend *) DLE_VAL(curr);
+
+ if (bp->pid == pid)
{
- bp = (Backend *) DLE_VAL(curr);
- if (bp->pid == pid)
- {
+ DLRemove(curr);
+ free(bp);
+ DLFreeElem(curr);
#ifdef EXEC_BACKEND
- ShmemBackendArrayRemove(bp->pid);
+ ShmemBackendArrayRemove(pid);
#endif
- DLRemove(curr);
- free(bp);
- DLFreeElem(curr);
- break;
- }
- curr = DLGetSucc(curr);
- }
-
- if (pid == CheckPointPID)
- {
- CheckPointPID = 0;
- if (!FatalError)
- {
- checkpointed = time(NULL);
- }
- }
- else if (pid == BgWriterPID)
- BgWriterPID = 0;
- else
+ /* Tell the collector about backend termination */
pgstat_beterm(pid);
-
- return;
+ break;
+ }
}
+}
- /* below here we're dealing with a non-normal exit */
+/*
+ * HandleChildCrash -- cleanup after failed backend or bgwriter.
+ *
+ * The objectives here are to clean up our local state about the child
+ * process, and to signal all other remaining children to quickdie.
+ */
+static void
+HandleChildCrash(int pid,
+ int exitstatus) /* child's exit status. */
+{
+ Dlelem *curr,
+ *next;
+ Backend *bp;
- /* Make log entry unless we did so already */
+ /*
+ * Make log entry unless there was a previous crash (if so, nonzero
+ * exit status is to be expected in SIGQUIT response; don't clutter log)
+ */
if (!FatalError)
{
LogChildExit(LOG,
- (pid == CheckPointPID) ? gettext("checkpoint process") :
- (pid == BgWriterPID) ? gettext("bgwriter process") :
+ (pid == BgWriterPID) ?
+ gettext("background writer process") :
gettext("server process"),
pid, exitstatus);
ereport(LOG,
- (errmsg("terminating any other active server processes")));
+ (errmsg("terminating any other active server processes")));
}
- curr = DLGetHead(BackendList);
- while (curr)
+ /* Process regular backends */
+ for (curr = DLGetHead(BackendList); curr; curr = next)
{
next = DLGetSucc(curr);
bp = (Backend *) DLE_VAL(curr);
- if (bp->pid != pid)
+ if (bp->pid == pid)
+ {
+ /*
+ * Found entry for freshly-dead backend, so remove it.
+ */
+ DLRemove(curr);
+ free(bp);
+ DLFreeElem(curr);
+#ifdef EXEC_BACKEND
+ ShmemBackendArrayRemove(pid);
+#endif
+ /* Tell the collector about backend termination */
+ pgstat_beterm(pid);
+ /* Keep looping so we can signal remaining backends */
+ }
+ else
{
/*
* This backend is still alive. Unless we did so already,
@@ -2130,34 +2078,18 @@ CleanupProc(int pid,
kill(bp->pid, (SendStop ? SIGSTOP : SIGQUIT));
}
}
- else
- {
- /*
- * Found entry for freshly-dead backend, so remove it.
- */
-#ifdef EXEC_BACKEND
- ShmemBackendArrayRemove(bp->pid);
-#endif
- DLRemove(curr);
- free(bp);
- DLFreeElem(curr);
- }
- curr = next;
}
- if (pid == CheckPointPID)
- {
- CheckPointPID = 0;
- checkpointed = 0;
- }
- else if (pid == BgWriterPID)
+ /* Take care of the bgwriter too */
+ if (pid == BgWriterPID)
BgWriterPID = 0;
- else
+ else if (BgWriterPID != 0 && !FatalError)
{
- /*
- * Tell the collector about backend termination
- */
- pgstat_beterm(pid);
+ ereport(DEBUG2,
+ (errmsg_internal("sending %s to process %d",
+ (SendStop ? "SIGSTOP" : "SIGQUIT"),
+ (int) BgWriterPID)));
+ kill(BgWriterPID, (SendStop ? SIGSTOP : SIGQUIT));
}
FatalError = true;
@@ -2204,26 +2136,16 @@ LogChildExit(int lev, const char *procname, int pid, int exitstatus)
static void
SignalChildren(int signal)
{
- Dlelem *curr,
- *next;
- Backend *bp;
+ Dlelem *curr;
- curr = DLGetHead(BackendList);
- while (curr)
+ for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
{
- next = DLGetSucc(curr);
- bp = (Backend *) DLE_VAL(curr);
+ Backend *bp = (Backend *) DLE_VAL(curr);
- if (bp->pid != MyProcPid)
- {
- ereport(DEBUG2,
- (errmsg_internal("sending signal %d to process %d",
- signal,
- (int) bp->pid)));
- kill(bp->pid, signal);
- }
-
- curr = next;
+ ereport(DEBUG4,
+ (errmsg_internal("sending signal %d to process %d",
+ signal, (int) bp->pid)));
+ kill(bp->pid, signal);
}
}
@@ -2346,10 +2268,10 @@ BackendStartup(Port *port)
*/
bn->pid = pid;
bn->cancel_key = MyCancelKey;
+ DLAddHead(BackendList, DLNewElem(bn));
#ifdef EXEC_BACKEND
ShmemBackendArrayAdd(bn);
#endif
- DLAddHead(BackendList, DLNewElem(bn));
return STATUS_OK;
}
@@ -2438,9 +2360,9 @@ BackendRun(Port *port)
/*
* Let's clean up ourselves as the postmaster child, and close the
- * postmaster's other sockets
+ * postmaster's listen sockets
*/
- ClosePostmasterPorts(true);
+ ClosePostmasterPorts();
/* We don't want the postmaster's proc_exit() handlers */
on_exit_reset();
@@ -2833,23 +2755,23 @@ SubPostmasterMain(int argc, char *argv[])
if (strcmp(argv[1], "-forkboot") == 0)
{
/* Close the postmaster's sockets */
- ClosePostmasterPorts(true);
+ ClosePostmasterPorts();
/* Attach process to shared segments */
CreateSharedMemoryAndSemaphores(false, MaxBackends, 0);
BootstrapMain(argc - 2, argv + 2);
- ExitPostmaster(0);
+ proc_exit(0);
}
if (strcmp(argv[1], "-forkbuf") == 0)
{
/* Close the postmaster's sockets */
- ClosePostmasterPorts(false);
+ ClosePostmasterPorts();
/* Do not want to attach to shared memory */
PgstatBufferMain(argc, argv);
- ExitPostmaster(0);
+ proc_exit(0);
}
if (strcmp(argv[1], "-forkcol") == 0)
{
@@ -2861,7 +2783,7 @@ SubPostmasterMain(int argc, char *argv[])
/* Do not want to attach to shared memory */
PgstatCollectorMain(argc, argv);
- ExitPostmaster(0);
+ proc_exit(0);
}
return 1; /* shouldn't get here */
@@ -2886,8 +2808,6 @@ ExitPostmaster(int status)
*
* MUST -- vadim 05-10-1999
*/
- /* Should I use true instead? */
- ClosePostmasterPorts(false);
proc_exit(status);
}
@@ -2902,45 +2822,6 @@ sigusr1_handler(SIGNAL_ARGS)
PG_SETMASK(&BlockSig);
- if (CheckPostmasterSignal(PMSIGNAL_DO_CHECKPOINT))
- {
- if (CheckPointWarning != 0)
- {
- /*
- * This only times checkpoints forced by running out of
- * segment files. Other checkpoints could reduce the
- * frequency of forced checkpoints.
- */
- time_t now = time(NULL);
-
- if (LastSignalledCheckpoint != 0)
- {
- int elapsed_secs = now - LastSignalledCheckpoint;
-
- if (elapsed_secs < CheckPointWarning)
- ereport(LOG,
- (errmsg("checkpoints are occurring too frequently (%d seconds apart)",
- elapsed_secs),
- errhint("Consider increasing the configuration parameter \"checkpoint_segments\".")));
- }
- LastSignalledCheckpoint = now;
- }
-
- /*
- * Request to schedule a checkpoint
- *
- * Ignore request if checkpoint is already running or checkpointing
- * is currently disabled
- */
- if (CheckPointPID == 0 && checkpointed &&
- StartupPID == 0 && Shutdown == NoShutdown &&
- !FatalError && random_seed != 0)
- {
- CheckPointPID = CheckPointDataBase();
- /* note: if fork fails, CheckPointPID stays 0; nothing happens */
- }
- }
-
if (CheckPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE))
{
/*
@@ -2957,7 +2838,7 @@ sigusr1_handler(SIGNAL_ARGS)
* CatchupInterruptHandler). See storage/ipc/sinval[adt].c for the
* use of this.
*/
- if (Shutdown == NoShutdown)
+ if (Shutdown <= SmartShutdown)
SignalChildren(SIGUSR1);
}
@@ -2971,9 +2852,10 @@ sigusr1_handler(SIGNAL_ARGS)
* Dummy signal handler
*
* We use this for signals that we don't actually use in the postmaster,
- * but we do use in backends. If we SIG_IGN such signals in the postmaster,
- * then a newly started backend might drop a signal that arrives before it's
- * able to reconfigure its signal processing. (See notes in postgres.c.)
+ * but we do use in backends. If we were to SIG_IGN such signals in the
+ * postmaster, then a newly started backend might drop a signal that arrives
+ * before it's able to reconfigure its signal processing. (See notes in
+ * tcop/postgres.c.)
*/
static void
dummy_handler(SIGNAL_ARGS)
@@ -3057,37 +2939,28 @@ static int
CountChildren(void)
{
Dlelem *curr;
- Backend *bp;
int cnt = 0;
for (curr = DLGetHead(BackendList); curr; curr = DLGetSucc(curr))
{
- bp = (Backend *) DLE_VAL(curr);
- if (bp->pid != MyProcPid)
- cnt++;
- }
- /* Checkpoint and bgwriter will be in the list, discount them */
- if (CheckPointPID != 0)
- cnt--;
- if (BgWriterPID != 0)
- cnt--;
+ cnt++;
+ }
return cnt;
}
/*
- * SSDataBase -- start a non-backend child process for the postmaster
+ * StartChildProcess -- start a non-backend child process for the postmaster
*
* xlog determines what kind of child will be started. All child types
* initially go to BootstrapMain, which will handle common setup.
*
- * Return value of SSDataBase is subprocess' PID, or 0 if failed to start
- * subprocess (0 is returned only for checkpoint/bgwriter cases).
+ * Return value of StartChildProcess is subprocess' PID, or 0 if failed
+ * to start subprocess.
*/
static pid_t
-SSDataBase(int xlop)
+StartChildProcess(int xlop)
{
- Backend *bn;
pid_t pid;
char *av[10];
int ac = 0;
@@ -3153,7 +3026,7 @@ SSDataBase(int xlop)
IsUnderPostmaster = true; /* we are a postmaster subprocess now */
/* Close the postmaster's sockets */
- ClosePostmasterPorts(true);
+ ClosePostmasterPorts();
/* Lose the postmaster's on-exit routines and port connections */
on_exit_reset();
@@ -3180,17 +3053,9 @@ SSDataBase(int xlop)
ereport(LOG,
(errmsg("could not fork startup process: %m")));
break;
- case BS_XLOG_CHECKPOINT:
- ereport(LOG,
- (errmsg("could not fork checkpoint process: %m")));
- break;
case BS_XLOG_BGWRITER:
ereport(LOG,
- (errmsg("could not fork bgwriter process: %m")));
- break;
- case BS_XLOG_SHUTDOWN:
- ereport(LOG,
- (errmsg("could not fork shutdown process: %m")));
+ (errmsg("could not fork background writer process: %m")));
break;
default:
ereport(LOG,
@@ -3199,50 +3064,17 @@ SSDataBase(int xlop)
}
/*
- * fork failure is fatal during startup/shutdown, but there's no
- * need to choke if a routine checkpoint or starting a background
- * writer fails.
+ * fork failure is fatal during startup, but there's no need
+ * to choke immediately if starting other child types fails.
*/
- if (xlop == BS_XLOG_CHECKPOINT)
- return 0;
- if (xlop == BS_XLOG_BGWRITER)
- return 0;
- ExitPostmaster(1);
+ if (xlop == BS_XLOG_STARTUP)
+ ExitPostmaster(1);
+ return 0;
}
/*
* in parent, successful fork
- *
- * The startup and shutdown processes are not considered normal
- * backends, but the checkpoint and bgwriter processes are. They must
- * be added to the list of backends.
*/
- if (xlop == BS_XLOG_CHECKPOINT || xlop == BS_XLOG_BGWRITER)
- {
- if (!(bn = (Backend *) malloc(sizeof(Backend))))
- {
- ereport(LOG,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
- ExitPostmaster(1);
- }
-
- bn->pid = pid;
- bn->cancel_key = PostmasterRandom();
-#ifdef EXEC_BACKEND
- ShmemBackendArrayAdd(bn);
-#endif
- DLAddHead(BackendList, DLNewElem(bn));
-
- /*
- * Since this code is executed periodically, it's a fine place to
- * do other actions that should happen every now and then on no
- * particular schedule. Such as...
- */
- TouchSocketFile();
- TouchSocketLockFile();
- }
-
return pid;
}
@@ -3316,6 +3148,8 @@ extern int pgStatSock;
#define write_var(var,fp) fwrite((void*)&(var),sizeof(var),1,fp)
#define read_var(var,fp) fread((void*)&(var),sizeof(var),1,fp)
+#define write_array_var(var,fp) fwrite((void*)(var),sizeof(var),1,fp)
+#define read_array_var(var,fp) fread((void*)(var),sizeof(var),1,fp)
static bool
write_backend_variables(char *filename, Port *port)
@@ -3326,9 +3160,9 @@ write_backend_variables(char *filename, Port *port)
/* Calculate name for temp file in caller's buffer */
Assert(DataDir);
- snprintf(filename, MAXPGPATH, "%s/%s/%s.backend_var.%lu",
+ snprintf(filename, MAXPGPATH, "%s/%s/%s.backend_var.%d.%lu",
DataDir, PG_TEMP_FILES_DIR, PG_TEMP_FILE_PREFIX,
- ++tmpBackendFileNum);
+ MyProcPid, ++tmpBackendFileNum);
/* Open file */
fp = AllocateFile(filename, PG_BINARY_W);
@@ -3366,7 +3200,9 @@ write_backend_variables(char *filename, Port *port)
*/
StrNCpy(str_buf, DataDir, MAXPGPATH);
- fwrite((void *) str_buf, MAXPGPATH, 1, fp);
+ write_array_var(str_buf, fp);
+
+ write_array_var(ListenSocket, fp);
write_var(MyCancelKey, fp);
@@ -3386,14 +3222,15 @@ write_backend_variables(char *filename, Port *port)
write_var(debug_flag, fp);
write_var(PostmasterPid, fp);
- fwrite((void *) my_exec_path, MAXPGPATH, 1, fp);
+ StrNCpy(str_buf, my_exec_path, MAXPGPATH);
+ write_array_var(str_buf, fp);
- fwrite((void *) ExtraOptions, sizeof(ExtraOptions), 1, fp);
+ write_array_var(ExtraOptions, fp);
StrNCpy(str_buf, setlocale(LC_COLLATE, NULL), MAXPGPATH);
- fwrite((void *) str_buf, MAXPGPATH, 1, fp);
+ write_array_var(str_buf, fp);
StrNCpy(str_buf, setlocale(LC_CTYPE, NULL), MAXPGPATH);
- fwrite((void *) str_buf, MAXPGPATH, 1, fp);
+ write_array_var(str_buf, fp);
/* Release file */
if (FreeFile(fp))
@@ -3430,9 +3267,11 @@ read_backend_variables(char *filename, Port *port)
read_var(port->cryptSalt, fp);
read_var(port->md5Salt, fp);
- fread((void *) str_buf, MAXPGPATH, 1, fp);
+ read_array_var(str_buf, fp);
SetDataDir(str_buf);
+ read_array_var(ListenSocket, fp);
+
read_var(MyCancelKey, fp);
read_var(UsedShmemSegID, fp);
@@ -3451,13 +3290,14 @@ read_backend_variables(char *filename, Port *port)
read_var(debug_flag, fp);
read_var(PostmasterPid, fp);
- fread((void *) my_exec_path, MAXPGPATH, 1, fp);
+ read_array_var(str_buf, fp);
+ StrNCpy(my_exec_path, str_buf, MAXPGPATH);
- fread((void *) ExtraOptions, sizeof(ExtraOptions), 1, fp);
+ read_array_var(ExtraOptions, fp);
- fread((void *) str_buf, MAXPGPATH, 1, fp);
+ read_array_var(str_buf, fp);
setlocale(LC_COLLATE, str_buf);
- fread((void *) str_buf, MAXPGPATH, 1, fp);
+ read_array_var(str_buf, fp);
setlocale(LC_CTYPE, str_buf);
/* Release file */
@@ -3481,6 +3321,7 @@ ShmemBackendArrayAllocation(void)
size_t size = ShmemBackendArraySize();
ShmemBackendArray = (Backend *) ShmemAlloc(size);
+ /* Mark all slots as empty */
memset(ShmemBackendArray, 0, size);
}
@@ -3489,9 +3330,9 @@ ShmemBackendArrayAdd(Backend *bn)
{
int i;
+ /* Find an empty slot */
for (i = 0; i < NUM_BACKENDARRAY_ELEMS; i++)
{
- /* Find an empty slot */
if (ShmemBackendArray[i].pid == 0)
{
ShmemBackendArray[i] = *bn;
@@ -3500,7 +3341,7 @@ ShmemBackendArrayAdd(Backend *bn)
}
ereport(FATAL,
- (errmsg_internal("unable to add backend entry")));
+ (errmsg_internal("no free slots in shmem backend array")));
}
static void
@@ -3597,13 +3438,14 @@ win32_forkexec(const char *path, char *argv[])
}
/*
- * Note: The following three functions must not be interrupted (eg. by signals).
- * As the Postgres Win32 signalling architecture (currently) requires polling,
- * or APC checking functions which aren't used here, this is not an issue.
+ * Note: The following three functions must not be interrupted (eg. by
+ * signals). As the Postgres Win32 signalling architecture (currently)
+ * requires polling, or APC checking functions which aren't used here, this
+ * is not an issue.
*
- * We keep two separate arrays, instead of a single array of pid/HANDLE structs,
- * to avoid having to re-create a handle array for WaitForMultipleObjects on
- * each call to win32_waitpid.
+ * We keep two separate arrays, instead of a single array of pid/HANDLE
+ * structs, to avoid having to re-create a handle array for
+ * WaitForMultipleObjects on each call to win32_waitpid.
*/
static void
@@ -3662,15 +3504,17 @@ win32_waitpid(int *exitstatus)
*/
int index;
DWORD exitCode;
- DWORD ret = WaitForMultipleObjects(win32_numChildren, win32_childHNDArray, FALSE, 0);
+ DWORD ret;
+ ret = WaitForMultipleObjects(win32_numChildren, win32_childHNDArray,
+ FALSE, 0);
switch (ret)
{
case WAIT_FAILED:
- ereport(ERROR,
- (errmsg_internal("failed to wait on %lu children: %i",
+ ereport(LOG,
+ (errmsg_internal("failed to wait on %lu children: %d",
win32_numChildren, (int) GetLastError())));
- /* Fall through to WAIT_TIMEOUTs return */
+ return -1;
case WAIT_TIMEOUT:
/* No children have finished */
@@ -3685,7 +3529,7 @@ win32_waitpid(int *exitstatus)
index = ret - WAIT_OBJECT_0;
Assert(index >= 0 && index < win32_numChildren);
if (!GetExitCodeProcess(win32_childHNDArray[index], &exitCode))
-
+ {
/*
* If we get this far, this should never happen, but,
* then again... No choice other than to assume a
@@ -3694,6 +3538,7 @@ win32_waitpid(int *exitstatus)
ereport(FATAL,
(errmsg_internal("failed to get exit code for child %lu",
win32_childPIDArray[index])));
+ }
*exitstatus = (int) exitCode;
return win32_childPIDArray[index];
}
@@ -3703,8 +3548,10 @@ win32_waitpid(int *exitstatus)
return -1;
}
-/* Note! Code belows executes on separate threads, one for
- each child process created */
+/*
+ * Note! Code below executes on separate threads, one for
+ * each child process created
+ */
static DWORD WINAPI
win32_sigchld_waiter(LPVOID param)
{