aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2022-04-05 20:44:01 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2022-04-05 20:44:01 -0400
commite37ad5fa4df2319e26a7e779607130feae1a5029 (patch)
treef8909b78476c0fa9657dcfb7ebb68225ae67c2b7
parent75edb919613ee835e7680e40137e494c7856bcf9 (diff)
downloadpostgresql-e37ad5fa4df2319e26a7e779607130feae1a5029.tar.gz
postgresql-e37ad5fa4df2319e26a7e779607130feae1a5029.zip
Remove race condition in 022_crash_temp_files.pl test.
It's possible for the query that "waits for restart" to complete a successful iteration before the postmaster has noticed its SIGKILL'd child and begun the restart cycle. (This is a bit hard to believe perhaps, but it's been seen at least twice in the buildfarm, mainly on ancient platforms that likely have quirky schedulers.) To provide a more secure interlock, wait for the other session we're using to report that it's been forcibly shut down. Patch by me, based on a suggestion from Andres Freund. Back-patch to v14 where this test case came in. Discussion: https://postgr.es/m/1801850.1649047827@sss.pgh.pa.us
-rw-r--r--src/test/recovery/t/022_crash_temp_files.pl34
1 files changed, 30 insertions, 4 deletions
diff --git a/src/test/recovery/t/022_crash_temp_files.pl b/src/test/recovery/t/022_crash_temp_files.pl
index 36906b4aca6..24fb141785d 100644
--- a/src/test/recovery/t/022_crash_temp_files.pl
+++ b/src/test/recovery/t/022_crash_temp_files.pl
@@ -125,11 +125,24 @@ $killme_stderr2 = '';
my $ret = PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'KILL', $pid);
is($ret, 0, 'killed process with KILL');
-# Close psql session
+# Close that psql session
$killme->finish;
+
+# Wait till the other session reports failure, ensuring that the postmaster
+# has noticed its dead child and begun a restart cycle.
+$killme_stdin2 .= qq[
+SELECT pg_sleep($PostgreSQL::Test::Utils::timeout_default);
+];
+ok( pump_until(
+ $killme2,
+ $psql_timeout,
+ \$killme_stderr2,
+ qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m
+ ),
+ "second psql session died successfully after SIGKILL");
$killme2->finish;
-# Wait till server restarts
+# Wait till server finishes restarting
$node->poll_query_until('postgres', undef, '');
# Check for temporary files
@@ -214,11 +227,24 @@ $killme_stderr2 = '';
$ret = PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'KILL', $pid);
is($ret, 0, 'killed process with KILL');
-# Close psql session
+# Close that psql session
$killme->finish;
+
+# Wait till the other session reports failure, ensuring that the postmaster
+# has noticed its dead child and begun a restart cycle.
+$killme_stdin2 .= qq[
+SELECT pg_sleep($PostgreSQL::Test::Utils::timeout_default);
+];
+ok( pump_until(
+ $killme2,
+ $psql_timeout,
+ \$killme_stderr2,
+ qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m
+ ),
+ "second psql session died successfully after SIGKILL");
$killme2->finish;
-# Wait till server restarts
+# Wait till server finishes restarting
$node->poll_query_until('postgres', undef, '');
# Check for temporary files -- should be there