diff options
-rw-r--r-- | src/test/perl/TestLib.pm | 11 | ||||
-rw-r--r-- | src/test/recovery/Makefile | 2 | ||||
-rw-r--r-- | src/test/recovery/t/016_min_consistency.pl | 175 |
3 files changed, 187 insertions, 1 deletions
diff --git a/src/test/perl/TestLib.pm b/src/test/perl/TestLib.pm index b9cb51b9d39..ce59401cefa 100644 --- a/src/test/perl/TestLib.pm +++ b/src/test/perl/TestLib.pm @@ -36,6 +36,7 @@ our @EXPORT = qw( system_or_bail system_log run_log + run_command command_ok command_fails @@ -203,6 +204,16 @@ sub run_log return IPC::Run::run(@_); } +sub run_command +{ + my ($cmd) = @_; + my ($stdout, $stderr); + my $result = IPC::Run::run $cmd, '>', \$stdout, '2>', \$stderr; + chomp($stdout); + chomp($stderr); + return ($stdout, $stderr); +} + # Generate a string made of the given range of ASCII characters sub generate_ascii_string { diff --git a/src/test/recovery/Makefile b/src/test/recovery/Makefile index e66e69521f2..648dd7edf4d 100644 --- a/src/test/recovery/Makefile +++ b/src/test/recovery/Makefile @@ -9,7 +9,7 @@ # #------------------------------------------------------------------------- -EXTRA_INSTALL=contrib/test_decoding +EXTRA_INSTALL=contrib/test_decoding contrib/pageinspect subdir = src/test/recovery top_builddir = ../../.. diff --git a/src/test/recovery/t/016_min_consistency.pl b/src/test/recovery/t/016_min_consistency.pl new file mode 100644 index 00000000000..8f1a89c2d3e --- /dev/null +++ b/src/test/recovery/t/016_min_consistency.pl @@ -0,0 +1,175 @@ +# Test for checking consistency of on-disk pages for a cluster with +# the minimum recovery LSN, ensuring that the updates happen across +# all processes. In this test, the updates from the startup process +# and the checkpointer (which triggers non-startup code paths) are +# both checked. + +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 2; + +# Find the largest LSN in the set of pages part of the given relation +# file. This is used for offline checks of page consistency. The LSN +# is historically stored as a set of two numbers of 4 byte-length +# located at the beginning of each page. +sub find_largest_lsn +{ + my $blocksize = int(shift); + my $filename = shift; + my ($max_hi,$max_lo) = (0,0); + open(my $fh, "<:raw", $filename) + or die "failed to open $filename: $!"; + my ($buf,$len); + while ($len = read($fh, $buf, $blocksize)) + { + $len == $blocksize + or die "read only $len of $blocksize bytes from $filename"; + my ($hi,$lo) = unpack("LL", $buf); + + if ($hi > $max_hi or ($hi == $max_hi and $lo > $max_lo)) + { + ($max_hi,$max_lo) = ($hi,$lo); + } + } + defined($len) or die "read error on $filename: $!"; + close($fh); + + return sprintf("%X/%X", $max_hi, $max_lo); +} + +# Initialize primary node +my $primary = get_new_node('primary'); +$primary->init(allows_streaming => 1); + +# Set shared_buffers to a very low value to enforce discard and flush +# of PostgreSQL buffers on standby, enforcing other processes than the +# startup process to update the minimum recovery LSN in the control +# file. Autovacuum is disabled so as there is no risk of having other +# processes than the checkpointer doing page flushes. +$primary->append_conf("postgresql.conf", <<EOF); +shared_buffers = 128kB +autovacuum = off +EOF + +# Start the primary +$primary->start; + +# setup/start a standby +$primary->backup('bkp'); +my $standby = get_new_node('standby'); +$standby->init_from_backup($primary, 'bkp', has_streaming => 1); +$standby->start; + +# Object creations for the upcoming tests: +# - Base table whose data consistency is checked. +# - pageinspect to look at the page-level contents. +# - Function wrapper on top of pageinspect to scan a range of pages and +# get the maximum LSN present. +$primary->safe_psql('postgres', " +CREATE EXTENSION pageinspect; +-- Function wrapper on top of pageinspect which fetches the largest LSN +-- present in the given page range. +CREATE OR REPLACE FUNCTION max_lsn_range(relname text, + start_blk int, + end_blk int) +RETURNS pg_lsn as \$\$ +DECLARE + max_lsn pg_lsn = '0/0'::pg_lsn; + cur_lsn pg_lsn; +BEGIN + FOR i IN start_blk..end_blk LOOP + EXECUTE 'SELECT lsn FROM page_header(get_raw_page(''' || relname || ''',' || i || '));' INTO cur_lsn; + IF max_lsn < cur_lsn THEN + max_lsn = cur_lsn; + END IF; + END LOOP; + RETURN max_lsn; +END; +\$\$ LANGUAGE plpgsql; +CREATE TABLE test1 (a int) WITH (fillfactor = 10); +INSERT INTO test1 SELECT generate_series(1, 10000);"); + +# Take a checkpoint and enforce post-checkpoint full page writes +# which makes the startup process replay those pages, updating +# minRecoveryPoint. +$primary->safe_psql('postgres', 'CHECKPOINT;'); +$primary->safe_psql('postgres', 'UPDATE test1 SET a = a + 1;'); + +# Fill in the standby's shared buffers with the data filled in +# previously. +$standby->safe_psql('postgres', 'SELECT count(*) FROM test1;'); + +# Update the table again, this does not generate full page writes so +# the standby will replay records associated with it, but the startup +# process will not flush those pages. +$primary->safe_psql('postgres', 'UPDATE test1 SET a = a + 1;'); + +# Extract from the relation the last block created and its relation +# file, this will be used at the end of the test for sanity checks. +my $blocksize = $primary->safe_psql('postgres', + "SELECT setting::int FROM pg_settings WHERE name = 'block_size';"); +my $last_block = $primary->safe_psql('postgres', + "SELECT pg_relation_size('test1')::int / $blocksize - 1;"); +my $relfilenode = $primary->safe_psql('postgres', + "SELECT pg_relation_filepath('test1'::regclass);"); + +# Wait for last record to have been replayed on the standby. +$primary->wait_for_catchup($standby, 'replay', + $primary->lsn('insert')); + +# Issue a restart point on the standby now, which makes the checkpointer +# update minRecoveryPoint. +$standby->safe_psql('postgres', 'CHECKPOINT;'); + +# Now shut down the primary violently so as the standby does not +# receive the shutdown checkpoint, making sure that the startup +# process does not flush any pages on its side. The standby is +# cleanly stopped, which makes the checkpointer update minRecoveryPoint +# with the restart point created at shutdown. +$primary->stop('immediate'); +$standby->stop('fast'); + +# Check the data consistency of the instance while offline. This is +# done by directly scanning the on-disk relation blocks and what +# pg_controldata lets know. +my $standby_data = $standby->data_dir; +my $offline_max_lsn = find_largest_lsn($blocksize, + "$standby_data/$relfilenode"); + +# Fetch minRecoveryPoint from the control file itself +my ($stdout, $stderr) = run_command(['pg_controldata', $standby_data]); +my @control_data = split("\n", $stdout); +my $offline_recovery_lsn = undef; +foreach (@control_data) +{ + if ($_ =~ /^Minimum recovery ending location:\s*(.*)$/mg) + { + $offline_recovery_lsn = $1; + last; + } +} +die "No minRecoveryPoint in control file found\n" + unless defined($offline_recovery_lsn); + +# minRecoveryPoint should never be older than the maximum LSN for all +# the pages on disk. +ok($offline_recovery_lsn ge $offline_max_lsn, + "Check offline that table data is consistent with minRecoveryPoint"); + +# Now restart the standby and check the state of the instance while +# online. Again, all the pages of the relation previously created +# should not have a LSN newer than what minRecoveryPoint has. +$standby->start; + +# Check that the last page of the table, which is the last one which +# has been flushed by the previous checkpoint on the standby, does not +# have a LSN newer than minRecoveryPoint. +my $psql_out; +$standby->psql( + 'postgres', + "SELECT max_lsn_range('test1', 0, $last_block) <= min_recovery_end_lsn FROM pg_control_recovery()", + stdout => \$psql_out); +is($psql_out, 't', + "Check online that table data is consistent with minRecoveryPoint"); |