diff options
author | Andres Freund <andres@anarazel.de> | 2022-02-22 18:02:34 -0800 |
---|---|---|
committer | Andres Freund <andres@anarazel.de> | 2022-02-22 18:02:34 -0800 |
commit | afdeff10526e29e3fc63b18c08100458780489d9 (patch) | |
tree | 5fcadeac65e88f2ddf001be9da892d1209a09178 /src | |
parent | 9467321649efc1fec28603d4ba35d03202c4ead1 (diff) | |
download | postgresql-afdeff10526e29e3fc63b18c08100458780489d9.tar.gz postgresql-afdeff10526e29e3fc63b18c08100458780489d9.zip |
Add temporary debug info to help debug 019_replslot_limit.pl failures.
I have not been able to reproduce the occasional failures of
019_replslot_limit.pl we are seeing in the buildfarm and not for lack of
trying. The additional logging and increased log level will hopefully help.
Will be reverted once the cause is identified.
Discussion: https://postgr.es/m/20220218231415.c4plkp4i3reqcwip@alap3.anarazel.de
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/replication/slot.c | 21 | ||||
-rw-r--r-- | src/bin/pg_basebackup/pg_basebackup.c | 10 | ||||
-rw-r--r-- | src/test/recovery/t/019_replslot_limit.pl | 5 |
3 files changed, 34 insertions, 2 deletions
diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 5da5fa825a2..3d39fddaaef 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -177,6 +177,10 @@ ReplicationSlotInitialize(void) static void ReplicationSlotShmemExit(int code, Datum arg) { + /* temp debugging aid to analyze 019_replslot_limit failures */ + elog(DEBUG3, "replication slot exit hook, %s active slot", + MyReplicationSlot != NULL ? "with" : "without"); + /* Make sure active replication slots are released */ if (MyReplicationSlot != NULL) ReplicationSlotRelease(); @@ -554,6 +558,9 @@ ReplicationSlotCleanup(void) Assert(MyReplicationSlot == NULL); restart: + /* temp debugging aid to analyze 019_replslot_limit failures */ + elog(DEBUG3, "temporary replication slot cleanup: begin"); + LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); for (i = 0; i < max_replication_slots; i++) { @@ -579,6 +586,8 @@ restart: } LWLockRelease(ReplicationSlotControlLock); + + elog(DEBUG3, "temporary replication slot cleanup: done"); } /* @@ -1284,6 +1293,12 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlot *s, XLogRecPtr oldestLSN, (void) kill(active_pid, SIGTERM); last_signaled_pid = active_pid; } + else + { + /* temp debugging aid to analyze 019_replslot_limit failures */ + elog(DEBUG3, "not signalling process %d during invalidation of slot \"%s\"", + active_pid, NameStr(slotname)); + } /* Wait until the slot is released. */ ConditionVariableSleep(&s->active_cv, @@ -1347,6 +1362,10 @@ InvalidateObsoleteReplicationSlots(XLogSegNo oldestSegno) XLogSegNoOffsetToRecPtr(oldestSegno, 0, wal_segment_size, oldestLSN); restart: + /* temp debugging aid to analyze 019_replslot_limit failures */ + elog(DEBUG3, "begin invalidating obsolete replication slots older than %X/%X", + LSN_FORMAT_ARGS(oldestLSN)); + LWLockAcquire(ReplicationSlotControlLock, LW_SHARED); for (int i = 0; i < max_replication_slots; i++) { @@ -1372,6 +1391,8 @@ restart: ReplicationSlotsComputeRequiredLSN(); } + elog(DEBUG3, "done invalidating obsolete replication slots"); + return invalidated; } diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 08b07d5a06e..8c77c533e64 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -700,8 +700,16 @@ StartLogStreamer(char *startpos, uint32 timeline, char *sysidentifier) bgchild = fork(); if (bgchild == 0) { + int ret; + /* in child process */ - exit(LogStreamerMain(param)); + ret = LogStreamerMain(param); + + /* temp debugging aid to analyze 019_replslot_limit failures */ + if (verbose) + pg_log_info("log streamer with pid %d exiting", getpid()); + + exit(ret); } else if (bgchild < 0) { diff --git a/src/test/recovery/t/019_replslot_limit.pl b/src/test/recovery/t/019_replslot_limit.pl index 4257bd4d35a..0c9da9bf272 100644 --- a/src/test/recovery/t/019_replslot_limit.pl +++ b/src/test/recovery/t/019_replslot_limit.pl @@ -316,13 +316,16 @@ $node_primary3->append_conf( max_wal_size = 2MB log_checkpoints = yes max_slot_wal_keep_size = 1MB + + # temp debugging aid to analyze 019_replslot_limit failures + log_min_messages=debug3 )); $node_primary3->start; $node_primary3->safe_psql('postgres', "SELECT pg_create_physical_replication_slot('rep3')"); # Take backup $backup_name = 'my_backup'; -$node_primary3->backup($backup_name); +$node_primary3->backup($backup_name, backup_options => ['--verbose']); # Create standby my $node_standby3 = PostgreSQL::Test::Cluster->new('standby_3'); $node_standby3->init_from_backup($node_primary3, $backup_name, |