diff options
author | Thomas Munro <tmunro@postgresql.org> | 2018-11-23 20:16:41 +1300 |
---|---|---|
committer | Thomas Munro <tmunro@postgresql.org> | 2018-11-23 20:46:34 +1300 |
commit | cfdf4dc4fc9635ac8bf6eaaa5dbbcd364ab29f0c (patch) | |
tree | c1e3c40912c5f9274fb9381bea1082f6a2cc2296 /src/backend/replication | |
parent | d392e9bdea957964e1fa6a5481e5adb5904d759a (diff) | |
download | postgresql-cfdf4dc4fc9635ac8bf6eaaa5dbbcd364ab29f0c.tar.gz postgresql-cfdf4dc4fc9635ac8bf6eaaa5dbbcd364ab29f0c.zip |
Add WL_EXIT_ON_PM_DEATH pseudo-event.
Users of the WaitEventSet and WaitLatch() APIs can now choose between
asking for WL_POSTMASTER_DEATH and then handling it explicitly, or asking
for WL_EXIT_ON_PM_DEATH to trigger immediate exit on postmaster death.
This reduces code duplication, since almost all callers want the latter.
Repair all code that was previously ignoring postmaster death completely,
or requesting the event but ignoring it, or requesting the event but then
doing an unconditional PostmasterIsAlive() call every time through its
event loop (which is an expensive syscall on platforms for which we don't
have USE_POSTMASTER_DEATH_SIGNAL support).
Assert that callers of WaitLatchXXX() under the postmaster remember to
ask for either WL_POSTMASTER_DEATH or WL_EXIT_ON_PM_DEATH, to prevent
future bugs.
The only process that doesn't handle postmaster death is syslogger. It
waits until all backends holding the write end of the syslog pipe
(including the postmaster) have closed it by exiting, to be sure to
capture any parting messages. By using the WaitEventSet API directly
it avoids the new assertion, and as a by-product it may be slightly
more efficient on platforms that have epoll().
Author: Thomas Munro
Reviewed-by: Kyotaro Horiguchi, Heikki Linnakangas, Tom Lane
Discussion: https://postgr.es/m/CAEepm%3D1TCviRykkUb69ppWLr_V697rzd1j3eZsRMmbXvETfqbQ%40mail.gmail.com,
https://postgr.es/m/CAEepm=2LqHzizbe7muD7-2yHUbTOoF7Q+qkSD5Q41kuhttRTwA@mail.gmail.com
Diffstat (limited to 'src/backend/replication')
-rw-r--r-- | src/backend/replication/basebackup.c | 2 | ||||
-rw-r--r-- | src/backend/replication/libpqwalreceiver/libpqwalreceiver.c | 13 | ||||
-rw-r--r-- | src/backend/replication/logical/launcher.c | 24 | ||||
-rw-r--r-- | src/backend/replication/logical/tablesync.c | 30 | ||||
-rw-r--r-- | src/backend/replication/logical/worker.c | 6 | ||||
-rw-r--r-- | src/backend/replication/syncrep.c | 18 | ||||
-rw-r--r-- | src/backend/replication/walreceiver.c | 22 | ||||
-rw-r--r-- | src/backend/replication/walsender.c | 49 |
8 files changed, 43 insertions, 121 deletions
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index b20f6c379c6..a7e3db27832 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -1686,7 +1686,7 @@ throttle(size_t increment) * the maximum time to sleep. Thus the cast to long is safe. */ wait_result = WaitLatch(MyLatch, - WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, (long) (sleep / 1000), WAIT_EVENT_BASE_BACKUP_THROTTLE); diff --git a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c index e2b54265d78..9b75711ebd6 100644 --- a/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c +++ b/src/backend/replication/libpqwalreceiver/libpqwalreceiver.c @@ -186,16 +186,11 @@ libpqrcv_connect(const char *conninfo, bool logical, const char *appname, io_flag = WL_SOCKET_WRITEABLE; rc = WaitLatchOrSocket(MyLatch, - WL_POSTMASTER_DEATH | - WL_LATCH_SET | io_flag, + WL_EXIT_ON_PM_DEATH | WL_LATCH_SET | io_flag, PQsocket(conn->streamConn), 0, WAIT_EVENT_LIBPQWALRECEIVER_CONNECT); - /* Emergency bailout? */ - if (rc & WL_POSTMASTER_DEATH) - exit(1); - /* Interrupted? */ if (rc & WL_LATCH_SET) { @@ -610,16 +605,12 @@ libpqrcv_PQexec(PGconn *streamConn, const char *query) * replication connection. */ rc = WaitLatchOrSocket(MyLatch, - WL_POSTMASTER_DEATH | WL_SOCKET_READABLE | + WL_EXIT_ON_PM_DEATH | WL_SOCKET_READABLE | WL_LATCH_SET, PQsocket(streamConn), 0, WAIT_EVENT_LIBPQWALRECEIVER_RECEIVE); - /* Emergency bailout? */ - if (rc & WL_POSTMASTER_DEATH) - exit(1); - /* Interrupted? */ if (rc & WL_LATCH_SET) { diff --git a/src/backend/replication/logical/launcher.c b/src/backend/replication/logical/launcher.c index ce089ac07ca..3a84d8ca86a 100644 --- a/src/backend/replication/logical/launcher.c +++ b/src/backend/replication/logical/launcher.c @@ -221,13 +221,9 @@ WaitForReplicationWorkerAttach(LogicalRepWorker *worker, * about the worker attach. But we don't expect to have to wait long. */ rc = WaitLatch(MyLatch, - WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, 10L, WAIT_EVENT_BGWORKER_STARTUP); - /* emergency bailout if postmaster has died */ - if (rc & WL_POSTMASTER_DEATH) - proc_exit(1); - if (rc & WL_LATCH_SET) { ResetLatch(MyLatch); @@ -498,13 +494,9 @@ logicalrep_worker_stop(Oid subid, Oid relid) /* Wait a bit --- we don't expect to have to wait long. */ rc = WaitLatch(MyLatch, - WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, 10L, WAIT_EVENT_BGWORKER_STARTUP); - /* emergency bailout if postmaster has died */ - if (rc & WL_POSTMASTER_DEATH) - proc_exit(1); - if (rc & WL_LATCH_SET) { ResetLatch(MyLatch); @@ -546,13 +538,9 @@ logicalrep_worker_stop(Oid subid, Oid relid) /* Wait a bit --- we don't expect to have to wait long. */ rc = WaitLatch(MyLatch, - WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, 10L, WAIT_EVENT_BGWORKER_SHUTDOWN); - /* emergency bailout if postmaster has died */ - if (rc & WL_POSTMASTER_DEATH) - proc_exit(1); - if (rc & WL_LATCH_SET) { ResetLatch(MyLatch); @@ -1072,14 +1060,10 @@ ApplyLauncherMain(Datum main_arg) /* Wait for more work. */ rc = WaitLatch(MyLatch, - WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, wait_time, WAIT_EVENT_LOGICAL_LAUNCHER_MAIN); - /* emergency bailout if postmaster has died */ - if (rc & WL_POSTMASTER_DEATH) - proc_exit(1); - if (rc & WL_LATCH_SET) { ResetLatch(MyLatch); diff --git a/src/backend/replication/logical/tablesync.c b/src/backend/replication/logical/tablesync.c index 9e682331d2f..38ae1b9ab85 100644 --- a/src/backend/replication/logical/tablesync.c +++ b/src/backend/replication/logical/tablesync.c @@ -159,7 +159,6 @@ finish_sync_worker(void) static bool wait_for_relation_state_change(Oid relid, char expected_state) { - int rc; char state; for (;;) @@ -192,13 +191,9 @@ wait_for_relation_state_change(Oid relid, char expected_state) if (!worker) return false; - rc = WaitLatch(MyLatch, - WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, - 1000L, WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE); - - /* emergency bailout if postmaster has died */ - if (rc & WL_POSTMASTER_DEATH) - proc_exit(1); + (void) WaitLatch(MyLatch, + WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, + 1000L, WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE); ResetLatch(MyLatch); } @@ -250,13 +245,9 @@ wait_for_worker_state_change(char expected_state) * but use a timeout in case it dies without sending one. */ rc = WaitLatch(MyLatch, - WL_LATCH_SET | WL_TIMEOUT | WL_POSTMASTER_DEATH, + WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, 1000L, WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE); - /* emergency bailout if postmaster has died */ - if (rc & WL_POSTMASTER_DEATH) - proc_exit(1); - if (rc & WL_LATCH_SET) ResetLatch(MyLatch); } @@ -593,7 +584,6 @@ copy_read_data(void *outbuf, int minread, int maxread) while (maxread > 0 && bytesread < minread) { pgsocket fd = PGINVALID_SOCKET; - int rc; int len; char *buf = NULL; @@ -632,14 +622,10 @@ copy_read_data(void *outbuf, int minread, int maxread) /* * Wait for more data or latch. */ - rc = WaitLatchOrSocket(MyLatch, - WL_SOCKET_READABLE | WL_LATCH_SET | - WL_TIMEOUT | WL_POSTMASTER_DEATH, - fd, 1000L, WAIT_EVENT_LOGICAL_SYNC_DATA); - - /* Emergency bailout if postmaster has died */ - if (rc & WL_POSTMASTER_DEATH) - proc_exit(1); + (void) WaitLatchOrSocket(MyLatch, + WL_SOCKET_READABLE | WL_LATCH_SET | + WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, + fd, 1000L, WAIT_EVENT_LOGICAL_SYNC_DATA); ResetLatch(MyLatch); } diff --git a/src/backend/replication/logical/worker.c b/src/backend/replication/logical/worker.c index 3cd1e0d728e..8d5e0946c4b 100644 --- a/src/backend/replication/logical/worker.c +++ b/src/backend/replication/logical/worker.c @@ -1264,14 +1264,10 @@ LogicalRepApplyLoop(XLogRecPtr last_received) rc = WaitLatchOrSocket(MyLatch, WL_SOCKET_READABLE | WL_LATCH_SET | - WL_TIMEOUT | WL_POSTMASTER_DEATH, + WL_TIMEOUT | WL_EXIT_ON_PM_DEATH, fd, wait_time, WAIT_EVENT_LOGICAL_APPLY_MAIN); - /* Emergency bailout if postmaster has died */ - if (rc & WL_POSTMASTER_DEATH) - proc_exit(1); - if (rc & WL_LATCH_SET) { ResetLatch(MyLatch); diff --git a/src/backend/replication/syncrep.c b/src/backend/replication/syncrep.c index af5ad5fe66f..9a13c50ce88 100644 --- a/src/backend/replication/syncrep.c +++ b/src/backend/replication/syncrep.c @@ -214,6 +214,8 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit) */ for (;;) { + int rc; + /* Must reset the latch before testing state. */ ResetLatch(MyLatch); @@ -267,24 +269,24 @@ SyncRepWaitForLSN(XLogRecPtr lsn, bool commit) } /* + * Wait on latch. Any condition that should wake us up will set the + * latch, so no need for timeout. + */ + rc = WaitLatch(MyLatch, WL_LATCH_SET | WL_POSTMASTER_DEATH, -1, + WAIT_EVENT_SYNC_REP); + + /* * If the postmaster dies, we'll probably never get an * acknowledgment, because all the wal sender processes will exit. So * just bail out. */ - if (!PostmasterIsAlive()) + if (rc & WL_POSTMASTER_DEATH) { ProcDiePending = true; whereToSendOutput = DestNone; SyncRepCancelWait(); break; } - - /* - * Wait on latch. Any condition that should wake us up will set the - * latch, so no need for timeout. - */ - WaitLatch(MyLatch, WL_LATCH_SET | WL_POSTMASTER_DEATH, -1, - WAIT_EVENT_SYNC_REP); } /* diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c index cb7bb47c9bd..9643c2ed7b3 100644 --- a/src/backend/replication/walreceiver.c +++ b/src/backend/replication/walreceiver.c @@ -503,7 +503,7 @@ WalReceiverMain(void) */ Assert(wait_fd != PGINVALID_SOCKET); rc = WaitLatchOrSocket(walrcv->latch, - WL_POSTMASTER_DEATH | WL_SOCKET_READABLE | + WL_EXIT_ON_PM_DEATH | WL_SOCKET_READABLE | WL_TIMEOUT | WL_LATCH_SET, wait_fd, NAPTIME_PER_CYCLE, @@ -524,15 +524,6 @@ WalReceiverMain(void) XLogWalRcvSendReply(true, false); } } - if (rc & WL_POSTMASTER_DEATH) - { - /* - * Emergency bailout if postmaster has died. This is to - * avoid the necessity for manual cleanup of all - * postmaster children. - */ - exit(1); - } if (rc & WL_TIMEOUT) { /* @@ -673,13 +664,6 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI) { ResetLatch(walrcv->latch); - /* - * Emergency bailout if postmaster has died. This is to avoid the - * necessity for manual cleanup of all postmaster children. - */ - if (!PostmasterIsAlive()) - exit(1); - ProcessWalRcvInterrupts(); SpinLockAcquire(&walrcv->mutex); @@ -706,8 +690,8 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI) } SpinLockRelease(&walrcv->mutex); - WaitLatch(walrcv->latch, WL_LATCH_SET | WL_POSTMASTER_DEATH, 0, - WAIT_EVENT_WAL_RECEIVER_WAIT_START); + (void) WaitLatch(walrcv->latch, WL_LATCH_SET | WL_EXIT_ON_PM_DEATH, 0, + WAIT_EVENT_WAL_RECEIVER_WAIT_START); } if (update_process_title) diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 50191ba881a..46edb525e88 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -1218,20 +1218,13 @@ WalSndWriteData(LogicalDecodingContext *ctx, XLogRecPtr lsn, TransactionId xid, sleeptime = WalSndComputeSleeptime(GetCurrentTimestamp()); - wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH | + wakeEvents = WL_LATCH_SET | WL_EXIT_ON_PM_DEATH | WL_SOCKET_WRITEABLE | WL_SOCKET_READABLE | WL_TIMEOUT; /* Sleep until something happens or we time out */ - WaitLatchOrSocket(MyLatch, wakeEvents, - MyProcPort->sock, sleeptime, - WAIT_EVENT_WAL_SENDER_WRITE_DATA); - - /* - * Emergency bailout if postmaster has died. This is to avoid the - * necessity for manual cleanup of all postmaster children. - */ - if (!PostmasterIsAlive()) - exit(1); + (void) WaitLatchOrSocket(MyLatch, wakeEvents, + MyProcPort->sock, sleeptime, + WAIT_EVENT_WAL_SENDER_WRITE_DATA); /* Clear any already-pending wakeups */ ResetLatch(MyLatch); @@ -1312,13 +1305,6 @@ WalSndWaitForWal(XLogRecPtr loc) { long sleeptime; - /* - * Emergency bailout if postmaster has died. This is to avoid the - * necessity for manual cleanup of all postmaster children. - */ - if (!PostmasterIsAlive()) - exit(1); - /* Clear any already-pending wakeups */ ResetLatch(MyLatch); @@ -1410,15 +1396,15 @@ WalSndWaitForWal(XLogRecPtr loc) */ sleeptime = WalSndComputeSleeptime(GetCurrentTimestamp()); - wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH | - WL_SOCKET_READABLE | WL_TIMEOUT; + wakeEvents = WL_LATCH_SET | WL_EXIT_ON_PM_DEATH | + WL_SOCKET_READABLE | WL_TIMEOUT; if (pq_is_send_pending()) wakeEvents |= WL_SOCKET_WRITEABLE; - WaitLatchOrSocket(MyLatch, wakeEvents, - MyProcPort->sock, sleeptime, - WAIT_EVENT_WAL_SENDER_WAIT_WAL); + (void) WaitLatchOrSocket(MyLatch, wakeEvents, + MyProcPort->sock, sleeptime, + WAIT_EVENT_WAL_SENDER_WAIT_WAL); } /* reactivate latch so WalSndLoop knows to continue */ @@ -2126,13 +2112,6 @@ WalSndLoop(WalSndSendDataCallback send_data) */ for (;;) { - /* - * Emergency bailout if postmaster has died. This is to avoid the - * necessity for manual cleanup of all postmaster children. - */ - if (!PostmasterIsAlive()) - exit(1); - /* Clear any already-pending wakeups */ ResetLatch(MyLatch); @@ -2222,8 +2201,8 @@ WalSndLoop(WalSndSendDataCallback send_data) long sleeptime; int wakeEvents; - wakeEvents = WL_LATCH_SET | WL_POSTMASTER_DEATH | WL_TIMEOUT | - WL_SOCKET_READABLE; + wakeEvents = WL_LATCH_SET | WL_EXIT_ON_PM_DEATH | WL_TIMEOUT | + WL_SOCKET_READABLE; /* * Use fresh timestamp, not last_processed, to reduce the chance @@ -2235,9 +2214,9 @@ WalSndLoop(WalSndSendDataCallback send_data) wakeEvents |= WL_SOCKET_WRITEABLE; /* Sleep until something happens or we time out */ - WaitLatchOrSocket(MyLatch, wakeEvents, - MyProcPort->sock, sleeptime, - WAIT_EVENT_WAL_SENDER_MAIN); + (void) WaitLatchOrSocket(MyLatch, wakeEvents, + MyProcPort->sock, sleeptime, + WAIT_EVENT_WAL_SENDER_MAIN); } } return; |