aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/postmaster/postmaster.c156
-rw-r--r--src/port/kill.c20
2 files changed, 137 insertions, 39 deletions
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 87e60621396..64467cb53ca 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -275,6 +275,7 @@ static pid_t StartupPID = 0,
#define NoShutdown 0
#define SmartShutdown 1
#define FastShutdown 2
+#define ImmediateShutdown 3
static int Shutdown = NoShutdown;
@@ -345,6 +346,10 @@ typedef enum
static PMState pmState = PM_INIT;
+/* Start time of abort processing at immediate shutdown or child crash */
+static time_t AbortStartTime;
+#define SIGKILL_CHILDREN_AFTER_SECS 5
+
static bool ReachedNormalRunning = false; /* T if we've reached PM_RUN */
bool ClientAuthInProgress = false; /* T during new-client
@@ -421,6 +426,7 @@ static void RandomSalt(char *md5Salt);
static void signal_child(pid_t pid, int signal);
static bool SignalSomeChildren(int signal, int targets);
static bool SignalUnconnectedWorkers(int signal);
+static void TerminateChildren(int signal);
#define SignalChildren(sig) SignalSomeChildren(sig, BACKEND_TYPE_ALL)
@@ -1427,8 +1433,18 @@ DetermineSleepTime(struct timeval * timeout)
if (Shutdown > NoShutdown ||
(!StartWorkerNeeded && !HaveCrashedWorker))
{
- timeout->tv_sec = 60;
- timeout->tv_usec = 0;
+ if (AbortStartTime > 0)
+ {
+ /* remaining time, but at least 1 second */
+ timeout->tv_sec = Min(SIGKILL_CHILDREN_AFTER_SECS -
+ (time(NULL) - AbortStartTime), 1);
+ timeout->tv_usec = 0;
+ }
+ else
+ {
+ timeout->tv_sec = 60;
+ timeout->tv_usec = 0;
+ }
return;
}
@@ -1660,6 +1676,28 @@ ServerLoop(void)
TouchSocketLockFiles();
last_touch_time = now;
}
+
+ /*
+ * If we already sent SIGQUIT to children and they are slow to shut
+ * down, it's time to send them SIGKILL. This doesn't happen normally,
+ * but under certain conditions backends can get stuck while shutting
+ * down. This is a last measure to get them unwedged.
+ *
+ * Note we also do this during recovery from a process crash.
+ */
+ if ((Shutdown >= ImmediateShutdown || (FatalError && !SendStop)) &&
+ now - AbortStartTime >= SIGKILL_CHILDREN_AFTER_SECS)
+ {
+ /* We were gentle with them before. Not anymore */
+ TerminateChildren(SIGKILL);
+
+ /*
+ * Additionally, unless we're recovering from a process crash, it's
+ * now the time for postmaster to abandon ship.
+ */
+ if (!FatalError)
+ ExitPostmaster(1);
+ }
}
}
@@ -2455,30 +2493,27 @@ pmdie(SIGNAL_ARGS)
/*
* Immediate Shutdown:
*
- * abort all children with SIGQUIT and exit without attempt to
- * properly shut down data base system.
+ * abort all children with SIGQUIT, wait for them to exit,
+ * terminate remaining ones with SIGKILL, then exit without
+ * attempt to properly shut down the data base system.
*/
+ if (Shutdown >= ImmediateShutdown)
+ break;
+ Shutdown = ImmediateShutdown;
ereport(LOG,
(errmsg("received immediate shutdown request")));
- SignalChildren(SIGQUIT);
- if (StartupPID != 0)
- signal_child(StartupPID, SIGQUIT);
- if (BgWriterPID != 0)
- signal_child(BgWriterPID, SIGQUIT);
- if (CheckpointerPID != 0)
- signal_child(CheckpointerPID, SIGQUIT);
- if (WalWriterPID != 0)
- signal_child(WalWriterPID, SIGQUIT);
- if (WalReceiverPID != 0)
- signal_child(WalReceiverPID, SIGQUIT);
- if (AutoVacPID != 0)
- signal_child(AutoVacPID, SIGQUIT);
- if (PgArchPID != 0)
- signal_child(PgArchPID, SIGQUIT);
- if (PgStatPID != 0)
- signal_child(PgStatPID, SIGQUIT);
- SignalUnconnectedWorkers(SIGQUIT);
- ExitPostmaster(0);
+
+ TerminateChildren(SIGQUIT);
+ pmState = PM_WAIT_BACKENDS;
+
+ /* set stopwatch for them to die */
+ AbortStartTime = time(NULL);
+
+ /*
+ * Now wait for backends to exit. If there are none,
+ * PostmasterStateMachine will take the next step.
+ */
+ PostmasterStateMachine();
break;
}
@@ -2952,12 +2987,17 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
dlist_mutable_iter iter;
slist_iter siter;
Backend *bp;
+ bool take_action;
/*
- * Make log entry unless there was a previous crash (if so, nonzero exit
- * status is to be expected in SIGQUIT response; don't clutter log)
+ * We only log messages and send signals if this is the first process crash
+ * and we're not doing an immediate shutdown; otherwise, we're only here to
+ * update postmaster's idea of live processes. If we have already signalled
+ * children, nonzero exit status is to be expected, so don't clutter log.
*/
- if (!FatalError)
+ take_action = !FatalError && Shutdown != ImmediateShutdown;
+
+ if (take_action)
{
LogChildExit(LOG, procname, pid, exitstatus);
ereport(LOG,
@@ -3003,7 +3043,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
* (-s on command line), then we send SIGSTOP instead, so that we
* can get core dumps from all backends by hand.
*/
- if (!FatalError)
+ if (take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
@@ -3055,7 +3095,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
if (bp->bkend_type == BACKEND_TYPE_BGWORKER)
continue;
- if (!FatalError)
+ if (take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
@@ -3069,7 +3109,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
/* Take care of the startup process too */
if (pid == StartupPID)
StartupPID = 0;
- else if (StartupPID != 0 && !FatalError)
+ else if (StartupPID != 0 && take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
@@ -3081,7 +3121,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
/* Take care of the bgwriter too */
if (pid == BgWriterPID)
BgWriterPID = 0;
- else if (BgWriterPID != 0 && !FatalError)
+ else if (BgWriterPID != 0 && take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
@@ -3093,7 +3133,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
/* Take care of the checkpointer too */
if (pid == CheckpointerPID)
CheckpointerPID = 0;
- else if (CheckpointerPID != 0 && !FatalError)
+ else if (CheckpointerPID != 0 && take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
@@ -3105,7 +3145,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
/* Take care of the walwriter too */
if (pid == WalWriterPID)
WalWriterPID = 0;
- else if (WalWriterPID != 0 && !FatalError)
+ else if (WalWriterPID != 0 && take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
@@ -3117,7 +3157,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
/* Take care of the walreceiver too */
if (pid == WalReceiverPID)
WalReceiverPID = 0;
- else if (WalReceiverPID != 0 && !FatalError)
+ else if (WalReceiverPID != 0 && take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
@@ -3129,7 +3169,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
/* Take care of the autovacuum launcher too */
if (pid == AutoVacPID)
AutoVacPID = 0;
- else if (AutoVacPID != 0 && !FatalError)
+ else if (AutoVacPID != 0 && take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
@@ -3144,7 +3184,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
* simplifies the state-machine logic in the case where a shutdown request
* arrives during crash processing.)
*/
- if (PgArchPID != 0 && !FatalError)
+ if (PgArchPID != 0 && take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
@@ -3159,7 +3199,7 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
* simplifies the state-machine logic in the case where a shutdown request
* arrives during crash processing.)
*/
- if (PgStatPID != 0 && !FatalError)
+ if (PgStatPID != 0 && take_action)
{
ereport(DEBUG2,
(errmsg_internal("sending %s to process %d",
@@ -3171,7 +3211,9 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
/* We do NOT restart the syslogger */
- FatalError = true;
+ if (Shutdown != ImmediateShutdown)
+ FatalError = true;
+
/* We now transit into a state of waiting for children to die */
if (pmState == PM_RECOVERY ||
pmState == PM_HOT_STANDBY ||
@@ -3180,6 +3222,13 @@ HandleChildCrash(int pid, int exitstatus, const char *procname)
pmState == PM_WAIT_READONLY ||
pmState == PM_SHUTDOWN)
pmState = PM_WAIT_BACKENDS;
+
+ /*
+ * .. and if this doesn't happen quickly enough, now the clock is ticking
+ * for us to kill them without mercy.
+ */
+ if (AbortStartTime == 0)
+ AbortStartTime = time(NULL);
}
/*
@@ -3316,7 +3365,7 @@ PostmasterStateMachine(void)
WalWriterPID == 0 &&
AutoVacPID == 0)
{
- if (FatalError)
+ if (Shutdown >= ImmediateShutdown || FatalError)
{
/*
* Start waiting for dead_end children to die. This state
@@ -3326,7 +3375,8 @@ PostmasterStateMachine(void)
/*
* We already SIGQUIT'd the archiver and stats processes, if
- * any, when we entered FatalError state.
+ * any, when we started immediate shutdown or entered
+ * FatalError state.
*/
}
else
@@ -3511,6 +3561,7 @@ signal_child(pid_t pid, int signal)
case SIGTERM:
case SIGQUIT:
case SIGSTOP:
+ case SIGKILL:
if (kill(-pid, signal) < 0)
elog(DEBUG3, "kill(%ld,%d) failed: %m", (long) (-pid), signal);
break;
@@ -3598,6 +3649,33 @@ SignalSomeChildren(int signal, int target)
}
/*
+ * Send a termination signal to children. This considers all of our children
+ * processes, except syslogger and dead_end backends.
+ */
+static void
+TerminateChildren(int signal)
+{
+ SignalChildren(signal);
+ if (StartupPID != 0)
+ signal_child(StartupPID, signal);
+ if (BgWriterPID != 0)
+ signal_child(BgWriterPID, signal);
+ if (CheckpointerPID != 0)
+ signal_child(CheckpointerPID, signal);
+ if (WalWriterPID != 0)
+ signal_child(WalWriterPID, signal);
+ if (WalReceiverPID != 0)
+ signal_child(WalReceiverPID, signal);
+ if (AutoVacPID != 0)
+ signal_child(AutoVacPID, signal);
+ if (PgArchPID != 0)
+ signal_child(PgArchPID, signal);
+ if (PgStatPID != 0)
+ signal_child(PgStatPID, signal);
+ SignalUnconnectedWorkers(signal);
+}
+
+/*
* BackendStartup -- start backend process
*
* returns: STATUS_ERROR if the fork failed, STATUS_OK otherwise.
diff --git a/src/port/kill.c b/src/port/kill.c
index 0a810cd2252..d0f23efb177 100644
--- a/src/port/kill.c
+++ b/src/port/kill.c
@@ -38,6 +38,26 @@ pgkill(int pid, int sig)
errno = EINVAL;
return -1;
}
+
+ /* special case for SIGKILL: just ask the system to terminate the target */
+ if (sig == SIGKILL)
+ {
+ HANDLE prochandle;
+
+ if ((prochandle = OpenProcess(PROCESS_TERMINATE, FALSE, (DWORD) pid)) == NULL)
+ {
+ errno = ESRCH;
+ return -1;
+ }
+ if (!TerminateProcess(prochandle, 255))
+ {
+ _dosmaperr(GetLastError());
+ CloseHandle(prochandle);
+ return -1;
+ }
+ CloseHandle(prochandle);
+ return 0;
+ }
snprintf(pipename, sizeof(pipename), "\\\\.\\pipe\\pgsignal_%u", pid);
if (CallNamedPipe(pipename, &sigData, 1, &sigRet, 1, &bytes, 1000))