aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichael Paquier <michael@paquier.xyz>2018-07-12 10:19:35 +0900
committerMichael Paquier <michael@paquier.xyz>2018-07-12 10:19:35 +0900
commit9a7b7adc130a197e5c993a99e6aaa981f9341a35 (patch)
tree79f81b20a80c672849b570ddc26af8c9bc7602c7 /src
parent39a96512b3ed72de7b24b2667d9575d7e9fcb326 (diff)
downloadpostgresql-9a7b7adc130a197e5c993a99e6aaa981f9341a35.tar.gz
postgresql-9a7b7adc130a197e5c993a99e6aaa981f9341a35.zip
Make logical WAL sender report streaming state appropriately
WAL senders sending logically-decoded data fail to properly report in "streaming" state when starting up, hence as long as one extra record is not replayed, such WAL senders would remain in a "catchup" state, which is inconsistent with the physical cousin. This can be easily reproduced by for example using pg_recvlogical and restarting the upstream server. The TAP tests have been slightly modified to detect the failure and strengthened so as future tests also make sure that a node is in streaming state when waiting for its catchup. Backpatch down to 9.4 where this code has been introduced. Reported-by: Sawada Masahiko Author: Simon Riggs, Sawada Masahiko Reviewed-by: Petr Jelinek, Michael Paquier, Vaishnavi Prabakaran Discussion: https://postgr.es/m/CAD21AoB2ZbCCqOx=bgKMcLrAvs1V0ZMqzs7wBTuDySezTGtMZA@mail.gmail.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/replication/walsender.c20
-rw-r--r--src/test/perl/PostgresNode.pm5
-rw-r--r--src/test/subscription/t/001_rep_changes.pl5
3 files changed, 23 insertions, 7 deletions
diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c
index e47ddca6bca..3a0106bc933 100644
--- a/src/backend/replication/walsender.c
+++ b/src/backend/replication/walsender.c
@@ -2169,7 +2169,7 @@ WalSndLoop(WalSndSendDataCallback send_data)
if (MyWalSnd->state == WALSNDSTATE_CATCHUP)
{
ereport(DEBUG1,
- (errmsg("standby \"%s\" has now caught up with primary",
+ (errmsg("\"%s\" has now caught up with upstream server",
application_name)));
WalSndSetState(WALSNDSTATE_STREAMING);
}
@@ -2758,10 +2758,10 @@ XLogSendLogical(void)
char *errm;
/*
- * Don't know whether we've caught up yet. We'll set it to true in
- * WalSndWaitForWal, if we're actually waiting. We also set to true if
- * XLogReadRecord() had to stop reading but WalSndWaitForWal didn't wait -
- * i.e. when we're shutting down.
+ * Don't know whether we've caught up yet. We'll set WalSndCaughtUp to
+ * true in WalSndWaitForWal, if we're actually waiting. We also set to
+ * true if XLogReadRecord() had to stop reading but WalSndWaitForWal
+ * didn't wait - i.e. when we're shutting down.
*/
WalSndCaughtUp = false;
@@ -2774,6 +2774,9 @@ XLogSendLogical(void)
if (record != NULL)
{
+ /* XXX: Note that logical decoding cannot be used while in recovery */
+ XLogRecPtr flushPtr = GetFlushRecPtr();
+
/*
* Note the lack of any call to LagTrackerWrite() which is handled by
* WalSndUpdateProgress which is called by output plugin through
@@ -2782,6 +2785,13 @@ XLogSendLogical(void)
LogicalDecodingProcessRecord(logical_decoding_ctx, logical_decoding_ctx->reader);
sentPtr = logical_decoding_ctx->reader->EndRecPtr;
+
+ /*
+ * If we have sent a record that is at or beyond the flushed point, we
+ * have caught up.
+ */
+ if (sentPtr >= flushPtr)
+ WalSndCaughtUp = true;
}
else
{
diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm
index a08af65695b..79fb4570758 100644
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -1535,7 +1535,8 @@ also works for logical subscriptions)
until its replication location in pg_stat_replication equals or passes the
upstream's WAL insert point at the time this function is called. By default
the replay_lsn is waited for, but 'mode' may be specified to wait for any of
-sent|write|flush|replay.
+sent|write|flush|replay. The connection catching up must be in a streaming
+state.
If there is no active replication connection from this peer, waits until
poll_query_until timeout.
@@ -1580,7 +1581,7 @@ sub wait_for_catchup
. $lsn_expr . " on "
. $self->name . "\n";
my $query =
- qq[SELECT $lsn_expr <= ${mode}_lsn FROM pg_catalog.pg_stat_replication WHERE application_name = '$standby_name';];
+ qq[SELECT $lsn_expr <= ${mode}_lsn AND state = 'streaming' FROM pg_catalog.pg_stat_replication WHERE application_name = '$standby_name';];
$self->poll_query_until('postgres', $query)
or croak "timed out waiting for catchup";
print "done\n";
diff --git a/src/test/subscription/t/001_rep_changes.pl b/src/test/subscription/t/001_rep_changes.pl
index 503556fd6cd..d94458e00e1 100644
--- a/src/test/subscription/t/001_rep_changes.pl
+++ b/src/test/subscription/t/001_rep_changes.pl
@@ -188,6 +188,11 @@ $node_publisher->safe_psql('postgres',
"INSERT INTO tab_ins SELECT generate_series(1001,1100)");
$node_publisher->safe_psql('postgres', "DELETE FROM tab_rep");
+# Restart the publisher and check the state of the subscriber which
+# should be in a streaming state after catching up.
+$node_publisher->stop('fast');
+$node_publisher->start;
+
$node_publisher->wait_for_catchup($appname);
$result = $node_subscriber->safe_psql('postgres',