From dbc60c5593f26dc777a3be032bff4fb4eab1ddd1 Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Sun, 12 Apr 2020 11:26:05 -0400 Subject: Rename pg_validatebackup to pg_verifybackup. Also, use "verify" rather than "validate" to refer to the process being undertaken here. Per discussion, that is a more appropriate term. Discussion: https://www.postgresql.org/message-id/172c9d9b-1d0a-1b94-1456-376b1e017322@2ndquadrant.com Discussion: http://postgr.es/m/CA+TgmobLgMh6p8FmLbj_rv9Uhd7tPrLnAyLgGd2SoSj=qD-bVg@mail.gmail.com --- src/bin/Makefile | 2 +- src/bin/pg_validatebackup/.gitignore | 2 - src/bin/pg_validatebackup/Makefile | 40 -- src/bin/pg_validatebackup/parse_manifest.c | 740 ------------------- src/bin/pg_validatebackup/parse_manifest.h | 45 -- src/bin/pg_validatebackup/pg_validatebackup.c | 905 ------------------------ src/bin/pg_validatebackup/t/001_basic.pl | 30 - src/bin/pg_validatebackup/t/002_algorithm.pl | 58 -- src/bin/pg_validatebackup/t/003_corruption.pl | 288 -------- src/bin/pg_validatebackup/t/004_options.pl | 89 --- src/bin/pg_validatebackup/t/005_bad_manifest.pl | 204 ------ src/bin/pg_validatebackup/t/006_encoding.pl | 27 - src/bin/pg_validatebackup/t/007_wal.pl | 55 -- src/bin/pg_verifybackup/.gitignore | 2 + src/bin/pg_verifybackup/Makefile | 40 ++ src/bin/pg_verifybackup/parse_manifest.c | 740 +++++++++++++++++++ src/bin/pg_verifybackup/parse_manifest.h | 45 ++ src/bin/pg_verifybackup/pg_verifybackup.c | 905 ++++++++++++++++++++++++ src/bin/pg_verifybackup/t/001_basic.pl | 30 + src/bin/pg_verifybackup/t/002_algorithm.pl | 58 ++ src/bin/pg_verifybackup/t/003_corruption.pl | 288 ++++++++ src/bin/pg_verifybackup/t/004_options.pl | 89 +++ src/bin/pg_verifybackup/t/005_bad_manifest.pl | 204 ++++++ src/bin/pg_verifybackup/t/006_encoding.pl | 27 + src/bin/pg_verifybackup/t/007_wal.pl | 55 ++ 25 files changed, 2484 insertions(+), 2484 deletions(-) delete mode 100644 src/bin/pg_validatebackup/.gitignore delete mode 100644 src/bin/pg_validatebackup/Makefile delete mode 100644 src/bin/pg_validatebackup/parse_manifest.c delete mode 100644 src/bin/pg_validatebackup/parse_manifest.h delete mode 100644 src/bin/pg_validatebackup/pg_validatebackup.c delete mode 100644 src/bin/pg_validatebackup/t/001_basic.pl delete mode 100644 src/bin/pg_validatebackup/t/002_algorithm.pl delete mode 100644 src/bin/pg_validatebackup/t/003_corruption.pl delete mode 100644 src/bin/pg_validatebackup/t/004_options.pl delete mode 100644 src/bin/pg_validatebackup/t/005_bad_manifest.pl delete mode 100644 src/bin/pg_validatebackup/t/006_encoding.pl delete mode 100644 src/bin/pg_validatebackup/t/007_wal.pl create mode 100644 src/bin/pg_verifybackup/.gitignore create mode 100644 src/bin/pg_verifybackup/Makefile create mode 100644 src/bin/pg_verifybackup/parse_manifest.c create mode 100644 src/bin/pg_verifybackup/parse_manifest.h create mode 100644 src/bin/pg_verifybackup/pg_verifybackup.c create mode 100644 src/bin/pg_verifybackup/t/001_basic.pl create mode 100644 src/bin/pg_verifybackup/t/002_algorithm.pl create mode 100644 src/bin/pg_verifybackup/t/003_corruption.pl create mode 100644 src/bin/pg_verifybackup/t/004_options.pl create mode 100644 src/bin/pg_verifybackup/t/005_bad_manifest.pl create mode 100644 src/bin/pg_verifybackup/t/006_encoding.pl create mode 100644 src/bin/pg_verifybackup/t/007_wal.pl (limited to 'src') diff --git a/src/bin/Makefile b/src/bin/Makefile index 77bceea4fe6..8b870357a14 100644 --- a/src/bin/Makefile +++ b/src/bin/Makefile @@ -27,7 +27,7 @@ SUBDIRS = \ pg_test_fsync \ pg_test_timing \ pg_upgrade \ - pg_validatebackup \ + pg_verifybackup \ pg_waldump \ pgbench \ psql \ diff --git a/src/bin/pg_validatebackup/.gitignore b/src/bin/pg_validatebackup/.gitignore deleted file mode 100644 index 21e0a92429c..00000000000 --- a/src/bin/pg_validatebackup/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/pg_validatebackup -/tmp_check/ diff --git a/src/bin/pg_validatebackup/Makefile b/src/bin/pg_validatebackup/Makefile deleted file mode 100644 index b1c2b7c1be3..00000000000 --- a/src/bin/pg_validatebackup/Makefile +++ /dev/null @@ -1,40 +0,0 @@ -# src/bin/pg_validatebackup/Makefile - -PGFILEDESC = "pg_validatebackup - validate a backup against a backup manifest" -PGAPPICON = win32 - -subdir = src/bin/pg_validatebackup -top_builddir = ../../.. -include $(top_builddir)/src/Makefile.global - -# We need libpq only because fe_utils does. -LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) - -OBJS = \ - $(WIN32RES) \ - parse_manifest.o \ - pg_validatebackup.o - -all: pg_validatebackup - -pg_validatebackup: $(OBJS) | submake-libpq submake-libpgport submake-libpgfeutils - $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X) - -install: all installdirs - $(INSTALL_PROGRAM) pg_validatebackup$(X) '$(DESTDIR)$(bindir)/pg_validatebackup$(X)' - -installdirs: - $(MKDIR_P) '$(DESTDIR)$(bindir)' - -uninstall: - rm -f '$(DESTDIR)$(bindir)/pg_validatebackup$(X)' - -clean distclean maintainer-clean: - rm -f pg_validatebackup$(X) $(OBJS) - rm -rf tmp_check - -check: - $(prove_check) - -installcheck: - $(prove_installcheck) diff --git a/src/bin/pg_validatebackup/parse_manifest.c b/src/bin/pg_validatebackup/parse_manifest.c deleted file mode 100644 index 0ec9dd6a13f..00000000000 --- a/src/bin/pg_validatebackup/parse_manifest.c +++ /dev/null @@ -1,740 +0,0 @@ -/*------------------------------------------------------------------------- - * - * parse_manifest.c - * Parse a backup manifest in JSON format. - * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * src/bin/pg_validatebackup/parse_manifest.c - * - *------------------------------------------------------------------------- - */ - -#include "postgres_fe.h" - -#include "parse_manifest.h" -#include "common/jsonapi.h" - -/* - * Semantic states for JSON manifest parsing. - */ -typedef enum -{ - JM_EXPECT_TOPLEVEL_START, - JM_EXPECT_TOPLEVEL_END, - JM_EXPECT_TOPLEVEL_FIELD, - JM_EXPECT_VERSION_VALUE, - JM_EXPECT_FILES_START, - JM_EXPECT_FILES_NEXT, - JM_EXPECT_THIS_FILE_FIELD, - JM_EXPECT_THIS_FILE_VALUE, - JM_EXPECT_WAL_RANGES_START, - JM_EXPECT_WAL_RANGES_NEXT, - JM_EXPECT_THIS_WAL_RANGE_FIELD, - JM_EXPECT_THIS_WAL_RANGE_VALUE, - JM_EXPECT_MANIFEST_CHECKSUM_VALUE, - JM_EXPECT_EOF -} JsonManifestSemanticState; - -/* - * Possible fields for one file as described by the manifest. - */ -typedef enum -{ - JMFF_PATH, - JMFF_ENCODED_PATH, - JMFF_SIZE, - JMFF_LAST_MODIFIED, - JMFF_CHECKSUM_ALGORITHM, - JMFF_CHECKSUM -} JsonManifestFileField; - -/* - * Possible fields for one file as described by the manifest. - */ -typedef enum -{ - JMWRF_TIMELINE, - JMWRF_START_LSN, - JMWRF_END_LSN -} JsonManifestWALRangeField; - -/* - * Internal state used while decoding the JSON-format backup manifest. - */ -typedef struct -{ - JsonManifestParseContext *context; - JsonManifestSemanticState state; - - /* These fields are used for parsing objects in the list of files. */ - JsonManifestFileField file_field; - char *pathname; - char *encoded_pathname; - char *size; - char *algorithm; - pg_checksum_type checksum_algorithm; - char *checksum; - - /* These fields are used for parsing objects in the list of WAL ranges. */ - JsonManifestWALRangeField wal_range_field; - char *timeline; - char *start_lsn; - char *end_lsn; - - /* Miscellaneous other stuff. */ - bool saw_version_field; - char *manifest_checksum; -} JsonManifestParseState; - -static void json_manifest_object_start(void *state); -static void json_manifest_object_end(void *state); -static void json_manifest_array_start(void *state); -static void json_manifest_array_end(void *state); -static void json_manifest_object_field_start(void *state, char *fname, - bool isnull); -static void json_manifest_scalar(void *state, char *token, - JsonTokenType tokentype); -static void json_manifest_finalize_file(JsonManifestParseState *parse); -static void json_manifest_finalize_wal_range(JsonManifestParseState *parse); -static void verify_manifest_checksum(JsonManifestParseState *parse, - char *buffer, size_t size); -static void json_manifest_parse_failure(JsonManifestParseContext *context, - char *msg); - -static int hexdecode_char(char c); -static bool hexdecode_string(uint8 *result, char *input, int nbytes); -static bool parse_xlogrecptr(XLogRecPtr *result, char *input); - -/* - * Main entrypoint to parse a JSON-format backup manifest. - * - * Caller should set up the parsing context and then invoke this function. - * For each file whose information is extracted from the manifest, - * context->perfile_cb is invoked. In case of trouble, context->error_cb is - * invoked and is expected not to return. - */ -void -json_parse_manifest(JsonManifestParseContext *context, char *buffer, - size_t size) -{ - JsonLexContext *lex; - JsonParseErrorType json_error; - JsonSemAction sem; - JsonManifestParseState parse; - - /* Set up our private parsing context. */ - parse.context = context; - parse.state = JM_EXPECT_TOPLEVEL_START; - parse.saw_version_field = false; - - /* Create a JSON lexing context. */ - lex = makeJsonLexContextCstringLen(buffer, size, PG_UTF8, true); - - /* Set up semantic actions. */ - sem.semstate = &parse; - sem.object_start = json_manifest_object_start; - sem.object_end = json_manifest_object_end; - sem.array_start = json_manifest_array_start; - sem.array_end = json_manifest_array_end; - sem.object_field_start = json_manifest_object_field_start; - sem.object_field_end = NULL; - sem.array_element_start = NULL; - sem.array_element_end = NULL; - sem.scalar = json_manifest_scalar; - - /* Run the actual JSON parser. */ - json_error = pg_parse_json(lex, &sem); - if (json_error != JSON_SUCCESS) - json_manifest_parse_failure(context, json_errdetail(json_error, lex)); - if (parse.state != JM_EXPECT_EOF) - json_manifest_parse_failure(context, "manifest ended unexpectedly"); - - /* Validate the checksum. */ - verify_manifest_checksum(&parse, buffer, size); -} - -/* - * Invoked at the start of each object in the JSON document. - * - * The document as a whole is expected to be an object; each file and each - * WAL range is also expected to be an object. If we're anywhere else in the - * document, it's an error. - */ -static void -json_manifest_object_start(void *state) -{ - JsonManifestParseState *parse = state; - - switch (parse->state) - { - case JM_EXPECT_TOPLEVEL_START: - parse->state = JM_EXPECT_TOPLEVEL_FIELD; - break; - case JM_EXPECT_FILES_NEXT: - parse->state = JM_EXPECT_THIS_FILE_FIELD; - parse->pathname = NULL; - parse->encoded_pathname = NULL; - parse->size = NULL; - parse->algorithm = NULL; - parse->checksum = NULL; - break; - case JM_EXPECT_WAL_RANGES_NEXT: - parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD; - parse->timeline = NULL; - parse->start_lsn = NULL; - parse->end_lsn = NULL; - break; - default: - json_manifest_parse_failure(parse->context, - "unexpected object start"); - break; - } -} - -/* - * Invoked at the end of each object in the JSON document. - * - * The possible cases here are the same as for json_manifest_object_start. - * There's nothing special to do at the end of the document, but when we - * reach the end of an object representing a particular file or WAL range, - * we must call json_manifest_finalize_file() to save the associated details. - */ -static void -json_manifest_object_end(void *state) -{ - JsonManifestParseState *parse = state; - - switch (parse->state) - { - case JM_EXPECT_TOPLEVEL_END: - parse->state = JM_EXPECT_EOF; - break; - case JM_EXPECT_THIS_FILE_FIELD: - json_manifest_finalize_file(parse); - parse->state = JM_EXPECT_FILES_NEXT; - break; - case JM_EXPECT_THIS_WAL_RANGE_FIELD: - json_manifest_finalize_wal_range(parse); - parse->state = JM_EXPECT_WAL_RANGES_NEXT; - break; - default: - json_manifest_parse_failure(parse->context, - "unexpected object end"); - break; - } -} - -/* - * Invoked at the start of each array in the JSON document. - * - * Within the toplevel object, the value associated with the "Files" key - * should be an array. Similarly for the "WAL-Ranges" key. No other arrays - * are expected. - */ -static void -json_manifest_array_start(void *state) -{ - JsonManifestParseState *parse = state; - - switch (parse->state) - { - case JM_EXPECT_FILES_START: - parse->state = JM_EXPECT_FILES_NEXT; - break; - case JM_EXPECT_WAL_RANGES_START: - parse->state = JM_EXPECT_WAL_RANGES_NEXT; - break; - default: - json_manifest_parse_failure(parse->context, - "unexpected array start"); - break; - } -} - -/* - * Invoked at the end of each array in the JSON document. - * - * The cases here are analogous to those in json_manifest_array_start. - */ -static void -json_manifest_array_end(void *state) -{ - JsonManifestParseState *parse = state; - - switch (parse->state) - { - case JM_EXPECT_FILES_NEXT: - case JM_EXPECT_WAL_RANGES_NEXT: - parse->state = JM_EXPECT_TOPLEVEL_FIELD; - break; - default: - json_manifest_parse_failure(parse->context, - "unexpected array end"); - break; - } -} - -/* - * Invoked at the start of each object field in the JSON document. - */ -static void -json_manifest_object_field_start(void *state, char *fname, bool isnull) -{ - JsonManifestParseState *parse = state; - - switch (parse->state) - { - case JM_EXPECT_TOPLEVEL_FIELD: - - /* - * Inside toplevel object. The version indicator should always be - * the first field. - */ - if (!parse->saw_version_field) - { - if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0) - json_manifest_parse_failure(parse->context, - "expected version indicator"); - parse->state = JM_EXPECT_VERSION_VALUE; - parse->saw_version_field = true; - break; - } - - /* Is this the list of files? */ - if (strcmp(fname, "Files") == 0) - { - parse->state = JM_EXPECT_FILES_START; - break; - } - - /* Is this the list of WAL ranges? */ - if (strcmp(fname, "WAL-Ranges") == 0) - { - parse->state = JM_EXPECT_WAL_RANGES_START; - break; - } - - /* Is this the manifest checksum? */ - if (strcmp(fname, "Manifest-Checksum") == 0) - { - parse->state = JM_EXPECT_MANIFEST_CHECKSUM_VALUE; - break; - } - - /* It's not a field we recognize. */ - json_manifest_parse_failure(parse->context, - "unknown toplevel field"); - break; - - case JM_EXPECT_THIS_FILE_FIELD: - /* Inside object for one file; which key have we got? */ - if (strcmp(fname, "Path") == 0) - parse->file_field = JMFF_PATH; - else if (strcmp(fname, "Encoded-Path") == 0) - parse->file_field = JMFF_ENCODED_PATH; - else if (strcmp(fname, "Size") == 0) - parse->file_field = JMFF_SIZE; - else if (strcmp(fname, "Last-Modified") == 0) - parse->file_field = JMFF_LAST_MODIFIED; - else if (strcmp(fname, "Checksum-Algorithm") == 0) - parse->file_field = JMFF_CHECKSUM_ALGORITHM; - else if (strcmp(fname, "Checksum") == 0) - parse->file_field = JMFF_CHECKSUM; - else - json_manifest_parse_failure(parse->context, - "unexpected file field"); - parse->state = JM_EXPECT_THIS_FILE_VALUE; - break; - - case JM_EXPECT_THIS_WAL_RANGE_FIELD: - /* Inside object for one file; which key have we got? */ - if (strcmp(fname, "Timeline") == 0) - parse->wal_range_field = JMWRF_TIMELINE; - else if (strcmp(fname, "Start-LSN") == 0) - parse->wal_range_field = JMWRF_START_LSN; - else if (strcmp(fname, "End-LSN") == 0) - parse->wal_range_field = JMWRF_END_LSN; - else - json_manifest_parse_failure(parse->context, - "unexpected wal range field"); - parse->state = JM_EXPECT_THIS_WAL_RANGE_VALUE; - break; - - default: - json_manifest_parse_failure(parse->context, - "unexpected object field"); - break; - } -} - -/* - * Invoked at the start of each scalar in the JSON document. - * - * Object field names don't reach this code; those are handled by - * json_manifest_object_field_start. When we're inside of the object for - * a particular file or WAL range, that function will have noticed the name - * of the field, and we'll get the corresponding value here. When we're in - * the toplevel object, the parse state itself tells us which field this is. - * - * In all cases except for PostgreSQL-Backup-Manifest-Version, which we - * can just check on the spot, the goal here is just to save the value in - * the parse state for later use. We don't actually do anything until we - * reach either the end of the object representing this file, or the end - * of the manifest, as the case may be. - */ -static void -json_manifest_scalar(void *state, char *token, JsonTokenType tokentype) -{ - JsonManifestParseState *parse = state; - - switch (parse->state) - { - case JM_EXPECT_VERSION_VALUE: - if (strcmp(token, "1") != 0) - json_manifest_parse_failure(parse->context, - "unexpected manifest version"); - parse->state = JM_EXPECT_TOPLEVEL_FIELD; - break; - - case JM_EXPECT_THIS_FILE_VALUE: - switch (parse->file_field) - { - case JMFF_PATH: - parse->pathname = token; - break; - case JMFF_ENCODED_PATH: - parse->encoded_pathname = token; - break; - case JMFF_SIZE: - parse->size = token; - break; - case JMFF_LAST_MODIFIED: - pfree(token); /* unused */ - break; - case JMFF_CHECKSUM_ALGORITHM: - parse->algorithm = token; - break; - case JMFF_CHECKSUM: - parse->checksum = token; - break; - } - parse->state = JM_EXPECT_THIS_FILE_FIELD; - break; - - case JM_EXPECT_THIS_WAL_RANGE_VALUE: - switch (parse->wal_range_field) - { - case JMWRF_TIMELINE: - parse->timeline = token; - break; - case JMWRF_START_LSN: - parse->start_lsn = token; - break; - case JMWRF_END_LSN: - parse->end_lsn = token; - break; - } - parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD; - break; - - case JM_EXPECT_MANIFEST_CHECKSUM_VALUE: - parse->state = JM_EXPECT_TOPLEVEL_END; - parse->manifest_checksum = token; - break; - - default: - json_manifest_parse_failure(parse->context, "unexpected scalar"); - break; - } -} - -/* - * Do additional parsing and sanity-checking of the details gathered for one - * file, and invoke the per-file callback so that the caller gets those - * details. This happens for each file when the corresponding JSON object is - * completely parsed. - */ -static void -json_manifest_finalize_file(JsonManifestParseState *parse) -{ - JsonManifestParseContext *context = parse->context; - size_t size; - char *ep; - int checksum_string_length; - pg_checksum_type checksum_type; - int checksum_length; - uint8 *checksum_payload; - - /* Pathname and size are required. */ - if (parse->pathname == NULL && parse->encoded_pathname == NULL) - json_manifest_parse_failure(parse->context, "missing pathname"); - if (parse->pathname != NULL && parse->encoded_pathname != NULL) - json_manifest_parse_failure(parse->context, - "both pathname and encoded pathname"); - if (parse->size == NULL) - json_manifest_parse_failure(parse->context, "missing size"); - if (parse->algorithm == NULL && parse->checksum != NULL) - json_manifest_parse_failure(parse->context, - "checksum without algorithm"); - - /* Decode encoded pathname, if that's what we have. */ - if (parse->encoded_pathname != NULL) - { - int encoded_length = strlen(parse->encoded_pathname); - int raw_length = encoded_length / 2; - - parse->pathname = palloc(raw_length + 1); - if (encoded_length % 2 != 0 || - !hexdecode_string((uint8 *) parse->pathname, - parse->encoded_pathname, - raw_length)) - json_manifest_parse_failure(parse->context, - "unable to decode filename"); - parse->pathname[raw_length] = '\0'; - pfree(parse->encoded_pathname); - parse->encoded_pathname = NULL; - } - - /* Parse size. */ - size = strtoul(parse->size, &ep, 10); - if (*ep) - json_manifest_parse_failure(parse->context, - "file size is not an integer"); - - /* Parse the checksum algorithm, if it's present. */ - if (parse->algorithm == NULL) - checksum_type = CHECKSUM_TYPE_NONE; - else if (!pg_checksum_parse_type(parse->algorithm, &checksum_type)) - context->error_cb(context, "unrecognized checksum algorithm: \"%s\"", - parse->algorithm); - - /* Parse the checksum payload, if it's present. */ - checksum_string_length = parse->checksum == NULL ? 0 - : strlen(parse->checksum); - if (checksum_string_length == 0) - { - checksum_length = 0; - checksum_payload = NULL; - } - else - { - checksum_length = checksum_string_length / 2; - checksum_payload = palloc(checksum_length); - if (checksum_string_length % 2 != 0 || - !hexdecode_string(checksum_payload, parse->checksum, - checksum_length)) - context->error_cb(context, - "invalid checksum for file \"%s\": \"%s\"", - parse->pathname, parse->checksum); - } - - /* Invoke the callback with the details we've gathered. */ - context->perfile_cb(context, parse->pathname, size, - checksum_type, checksum_length, checksum_payload); - - /* Free memory we no longer need. */ - if (parse->size != NULL) - { - pfree(parse->size); - parse->size = NULL; - } - if (parse->algorithm != NULL) - { - pfree(parse->algorithm); - parse->algorithm = NULL; - } - if (parse->checksum != NULL) - { - pfree(parse->checksum); - parse->checksum = NULL; - } -} - -/* - * Do additional parsing and sanity-checking of the details gathered for one - * WAL range, and invoke the per-WAL-range callback so that the caller gets - * those details. This happens for each WAL range when the corresponding JSON - * object is completely parsed. - */ -static void -json_manifest_finalize_wal_range(JsonManifestParseState *parse) -{ - JsonManifestParseContext *context = parse->context; - TimeLineID tli; - XLogRecPtr start_lsn, - end_lsn; - char *ep; - - /* Make sure all fields are present. */ - if (parse->timeline == NULL) - json_manifest_parse_failure(parse->context, "missing timeline"); - if (parse->start_lsn == NULL) - json_manifest_parse_failure(parse->context, "missing start LSN"); - if (parse->end_lsn == NULL) - json_manifest_parse_failure(parse->context, "missing end LSN"); - - /* Parse timeline. */ - tli = strtoul(parse->timeline, &ep, 10); - if (*ep) - json_manifest_parse_failure(parse->context, - "timeline is not an integer"); - if (!parse_xlogrecptr(&start_lsn, parse->start_lsn)) - json_manifest_parse_failure(parse->context, - "unable to parse start LSN"); - if (!parse_xlogrecptr(&end_lsn, parse->end_lsn)) - json_manifest_parse_failure(parse->context, - "unable to parse end LSN"); - - /* Invoke the callback with the details we've gathered. */ - context->perwalrange_cb(context, tli, start_lsn, end_lsn); - - /* Free memory we no longer need. */ - if (parse->timeline != NULL) - { - pfree(parse->timeline); - parse->timeline = NULL; - } - if (parse->start_lsn != NULL) - { - pfree(parse->start_lsn); - parse->start_lsn = NULL; - } - if (parse->end_lsn != NULL) - { - pfree(parse->end_lsn); - parse->end_lsn = NULL; - } -} - -/* - * Verify that the manifest checksum is correct. - * - * The last line of the manifest file is excluded from the manifest checksum, - * because the last line is expected to contain the checksum that covers - * the rest of the file. - */ -static void -verify_manifest_checksum(JsonManifestParseState *parse, char *buffer, - size_t size) -{ - JsonManifestParseContext *context = parse->context; - size_t i; - size_t number_of_newlines = 0; - size_t ultimate_newline = 0; - size_t penultimate_newline = 0; - pg_sha256_ctx manifest_ctx; - uint8 manifest_checksum_actual[PG_SHA256_DIGEST_LENGTH]; - uint8 manifest_checksum_expected[PG_SHA256_DIGEST_LENGTH]; - - /* Find the last two newlines in the file. */ - for (i = 0; i < size; ++i) - { - if (buffer[i] == '\n') - { - ++number_of_newlines; - penultimate_newline = ultimate_newline; - ultimate_newline = i; - } - } - - /* - * Make sure that the last newline is right at the end, and that there are - * at least two lines total. We need this to be true in order for the - * following code, which computes the manifest checksum, to work properly. - */ - if (number_of_newlines < 2) - json_manifest_parse_failure(parse->context, - "expected at least 2 lines"); - if (ultimate_newline != size - 1) - json_manifest_parse_failure(parse->context, - "last line not newline-terminated"); - - /* Checksum the rest. */ - pg_sha256_init(&manifest_ctx); - pg_sha256_update(&manifest_ctx, (uint8 *) buffer, penultimate_newline + 1); - pg_sha256_final(&manifest_ctx, manifest_checksum_actual); - - /* Now verify it. */ - if (parse->manifest_checksum == NULL) - context->error_cb(parse->context, "manifest has no checksum"); - if (strlen(parse->manifest_checksum) != PG_SHA256_DIGEST_LENGTH * 2 || - !hexdecode_string(manifest_checksum_expected, parse->manifest_checksum, - PG_SHA256_DIGEST_LENGTH)) - context->error_cb(context, "invalid manifest checksum: \"%s\"", - parse->manifest_checksum); - if (memcmp(manifest_checksum_actual, manifest_checksum_expected, - PG_SHA256_DIGEST_LENGTH) != 0) - context->error_cb(context, "manifest checksum mismatch"); -} - -/* - * Report a parse error. - * - * This is intended to be used for fairly low-level failures that probably - * shouldn't occur unless somebody has deliberately constructed a bad manifest, - * or unless the server is generating bad manifests due to some bug. msg should - * be a short string giving some hint as to what the problem is. - */ -static void -json_manifest_parse_failure(JsonManifestParseContext *context, char *msg) -{ - context->error_cb(context, "could not parse backup manifest: %s", msg); -} - -/* - * Convert a character which represents a hexadecimal digit to an integer. - * - * Returns -1 if the character is not a hexadecimal digit. - */ -static int -hexdecode_char(char c) -{ - if (c >= '0' && c <= '9') - return c - '0'; - if (c >= 'a' && c <= 'f') - return c - 'a' + 10; - if (c >= 'A' && c <= 'F') - return c - 'A' + 10; - - return -1; -} - -/* - * Decode a hex string into a byte string, 2 hex chars per byte. - * - * Returns false if invalid characters are encountered; otherwise true. - */ -static bool -hexdecode_string(uint8 *result, char *input, int nbytes) -{ - int i; - - for (i = 0; i < nbytes; ++i) - { - int n1 = hexdecode_char(input[i * 2]); - int n2 = hexdecode_char(input[i * 2 + 1]); - - if (n1 < 0 || n2 < 0) - return false; - result[i] = n1 * 16 + n2; - } - - return true; -} - -/* - * Parse an XLogRecPtr expressed using the usual string format. - */ -static bool -parse_xlogrecptr(XLogRecPtr *result, char *input) -{ - uint32 hi; - uint32 lo; - - if (sscanf(input, "%X/%X", &hi, &lo) != 2) - return false; - *result = ((uint64) hi) << 32 | lo; - return true; -} diff --git a/src/bin/pg_validatebackup/parse_manifest.h b/src/bin/pg_validatebackup/parse_manifest.h deleted file mode 100644 index f0a4fac36bc..00000000000 --- a/src/bin/pg_validatebackup/parse_manifest.h +++ /dev/null @@ -1,45 +0,0 @@ -/*------------------------------------------------------------------------- - * - * parse_manifest.h - * Parse a backup manifest in JSON format. - * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * src/bin/pg_validatebackup/parse_manifest.h - * - *------------------------------------------------------------------------- - */ - -#ifndef PARSE_MANIFEST_H -#define PARSE_MANIFEST_H - -#include "access/xlogdefs.h" -#include "common/checksum_helper.h" -#include "mb/pg_wchar.h" - -struct JsonManifestParseContext; -typedef struct JsonManifestParseContext JsonManifestParseContext; - -typedef void (*json_manifest_perfile_callback)(JsonManifestParseContext *, - char *pathname, - size_t size, pg_checksum_type checksum_type, - int checksum_length, uint8 *checksum_payload); -typedef void (*json_manifest_perwalrange_callback)(JsonManifestParseContext *, - TimeLineID tli, - XLogRecPtr start_lsn, XLogRecPtr end_lsn); -typedef void (*json_manifest_error_callback)(JsonManifestParseContext *, - char *fmt, ...) pg_attribute_printf(2, 3); - -struct JsonManifestParseContext -{ - void *private_data; - json_manifest_perfile_callback perfile_cb; - json_manifest_perwalrange_callback perwalrange_cb; - json_manifest_error_callback error_cb; -}; - -extern void json_parse_manifest(JsonManifestParseContext *context, - char *buffer, size_t size); - -#endif diff --git a/src/bin/pg_validatebackup/pg_validatebackup.c b/src/bin/pg_validatebackup/pg_validatebackup.c deleted file mode 100644 index f2746385005..00000000000 --- a/src/bin/pg_validatebackup/pg_validatebackup.c +++ /dev/null @@ -1,905 +0,0 @@ -/*------------------------------------------------------------------------- - * - * pg_validatebackup.c - * Validate a backup against a backup manifest. - * - * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group - * Portions Copyright (c) 1994, Regents of the University of California - * - * src/bin/pg_validatebackup/pg_validatebackup.c - * - *------------------------------------------------------------------------- - */ - -#include "postgres_fe.h" - -#include -#include -#include - -#include "common/hashfn.h" -#include "common/logging.h" -#include "fe_utils/simple_list.h" -#include "getopt_long.h" -#include "parse_manifest.h" - -/* - * For efficiency, we'd like our hash table containing information about the - * manifest to start out with approximately the correct number of entries. - * There's no way to know the exact number of entries without reading the whole - * file, but we can get an estimate by dividing the file size by the estimated - * number of bytes per line. - * - * This could be off by about a factor of two in either direction, because the - * checksum algorithm has a big impact on the line lengths; e.g. a SHA512 - * checksum is 128 hex bytes, whereas a CRC-32C value is only 8, and there - * might be no checksum at all. - */ -#define ESTIMATED_BYTES_PER_MANIFEST_LINE 100 - -/* - * How many bytes should we try to read from a file at once? - */ -#define READ_CHUNK_SIZE 4096 - -/* - * Each file described by the manifest file is parsed to produce an object - * like this. - */ -typedef struct manifest_file -{ - uint32 status; /* hash status */ - char *pathname; - size_t size; - pg_checksum_type checksum_type; - int checksum_length; - uint8 *checksum_payload; - bool matched; - bool bad; -} manifest_file; - -/* - * Define a hash table which we can use to store information about the files - * mentioned in the backup manifest. - */ -static uint32 hash_string_pointer(char *s); -#define SH_PREFIX manifest_files -#define SH_ELEMENT_TYPE manifest_file -#define SH_KEY_TYPE char * -#define SH_KEY pathname -#define SH_HASH_KEY(tb, key) hash_string_pointer(key) -#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0) -#define SH_SCOPE static inline -#define SH_RAW_ALLOCATOR pg_malloc0 -#define SH_DECLARE -#define SH_DEFINE -#include "lib/simplehash.h" - -/* - * Each WAL range described by the manifest file is parsed to produce an - * object like this. - */ -typedef struct manifest_wal_range -{ - TimeLineID tli; - XLogRecPtr start_lsn; - XLogRecPtr end_lsn; - struct manifest_wal_range *next; - struct manifest_wal_range *prev; -} manifest_wal_range; - -/* - * Details we need in callbacks that occur while parsing a backup manifest. - */ -typedef struct parser_context -{ - manifest_files_hash *ht; - manifest_wal_range *first_wal_range; - manifest_wal_range *last_wal_range; -} parser_context; - -/* - * All of the context information we need while checking a backup manifest. - */ -typedef struct validator_context -{ - manifest_files_hash *ht; - char *backup_directory; - SimpleStringList ignore_list; - bool exit_on_error; - bool saw_any_error; -} validator_context; - -static void parse_manifest_file(char *manifest_path, - manifest_files_hash **ht_p, - manifest_wal_range **first_wal_range_p); - -static void record_manifest_details_for_file(JsonManifestParseContext *context, - char *pathname, size_t size, - pg_checksum_type checksum_type, - int checksum_length, - uint8 *checksum_payload); -static void record_manifest_details_for_wal_range(JsonManifestParseContext *context, - TimeLineID tli, - XLogRecPtr start_lsn, - XLogRecPtr end_lsn); -static void report_manifest_error(JsonManifestParseContext *context, - char *fmt,...) - pg_attribute_printf(2, 3) pg_attribute_noreturn(); - -static void validate_backup_directory(validator_context *context, - char *relpath, char *fullpath); -static void validate_backup_file(validator_context *context, - char *relpath, char *fullpath); -static void report_extra_backup_files(validator_context *context); -static void validate_backup_checksums(validator_context *context); -static void validate_file_checksum(validator_context *context, - manifest_file *m, char *pathname); -static void parse_required_wal(validator_context *context, - char *pg_waldump_path, - char *wal_directory, - manifest_wal_range *first_wal_range); - -static void report_backup_error(validator_context *context, - const char *pg_restrict fmt,...) - pg_attribute_printf(2, 3); -static void report_fatal_error(const char *pg_restrict fmt,...) - pg_attribute_printf(1, 2) pg_attribute_noreturn(); -static bool should_ignore_relpath(validator_context *context, char *relpath); - -static void usage(void); - -static const char *progname; - -/* - * Main entry point. - */ -int -main(int argc, char **argv) -{ - static struct option long_options[] = { - {"exit-on-error", no_argument, NULL, 'e'}, - {"ignore", required_argument, NULL, 'i'}, - {"manifest-path", required_argument, NULL, 'm'}, - {"no-parse-wal", no_argument, NULL, 'n'}, - {"print-parse-wal", no_argument, NULL, 'p'}, - {"quiet", no_argument, NULL, 'q'}, - {"skip-checksums", no_argument, NULL, 's'}, - {"wal-directory", required_argument, NULL, 'w'}, - {NULL, 0, NULL, 0} - }; - - int c; - validator_context context; - manifest_wal_range *first_wal_range; - char *manifest_path = NULL; - bool no_parse_wal = false; - bool quiet = false; - bool skip_checksums = false; - char *wal_directory = NULL; - char *pg_waldump_path = NULL; - - pg_logging_init(argv[0]); - set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_validatebackup")); - progname = get_progname(argv[0]); - - memset(&context, 0, sizeof(context)); - - if (argc > 1) - { - if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) - { - usage(); - exit(0); - } - if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) - { - puts("pg_validatebackup (PostgreSQL) " PG_VERSION); - exit(0); - } - } - - /* - * Skip certain files in the toplevel directory. - * - * Ignore the backup_manifest file, because it's not included in the - * backup manifest. - * - * Ignore the pg_wal directory, because those files are not included in - * the backup manifest either, since they are fetched separately from the - * backup itself, and validated via a separate mechanism. - * - * Ignore postgresql.auto.conf, recovery.signal, and standby.signal, - * because we expect that those files may sometimes be created or changed - * as part of the backup process. For example, pg_basebackup -R will - * modify postgresql.auto.conf and create standby.signal. - */ - simple_string_list_append(&context.ignore_list, "backup_manifest"); - simple_string_list_append(&context.ignore_list, "pg_wal"); - simple_string_list_append(&context.ignore_list, "postgresql.auto.conf"); - simple_string_list_append(&context.ignore_list, "recovery.signal"); - simple_string_list_append(&context.ignore_list, "standby.signal"); - - while ((c = getopt_long(argc, argv, "ei:m:nqsw:", long_options, NULL)) != -1) - { - switch (c) - { - case 'e': - context.exit_on_error = true; - break; - case 'i': - { - char *arg = pstrdup(optarg); - - canonicalize_path(arg); - simple_string_list_append(&context.ignore_list, arg); - break; - } - case 'm': - manifest_path = pstrdup(optarg); - canonicalize_path(manifest_path); - break; - case 'n': - no_parse_wal = true; - break; - case 'q': - quiet = true; - break; - case 's': - skip_checksums = true; - break; - case 'w': - wal_directory = pstrdup(optarg); - canonicalize_path(wal_directory); - break; - default: - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), - progname); - exit(1); - } - } - - /* Get backup directory name */ - if (optind >= argc) - { - pg_log_fatal("no backup directory specified"); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), - progname); - exit(1); - } - context.backup_directory = pstrdup(argv[optind++]); - canonicalize_path(context.backup_directory); - - /* Complain if any arguments remain */ - if (optind < argc) - { - pg_log_fatal("too many command-line arguments (first is \"%s\")", - argv[optind]); - fprintf(stderr, _("Try \"%s --help\" for more information.\n"), - progname); - exit(1); - } - - /* Unless --no-parse-wal was specified, we will need pg_waldump. */ - if (!no_parse_wal) - { - int ret; - - pg_waldump_path = pg_malloc(MAXPGPATH); - ret = find_other_exec(argv[0], "pg_waldump", - "pg_waldump (PostgreSQL) " PG_VERSION "\n", - pg_waldump_path); - if (ret < 0) - { - char full_path[MAXPGPATH]; - - if (find_my_exec(argv[0], full_path) < 0) - strlcpy(full_path, progname, sizeof(full_path)); - if (ret == -1) - pg_log_fatal("The program \"%s\" is needed by %s but was\n" - "not found in the same directory as \"%s\".\n" - "Check your installation.", - "pg_waldump", "pg_validatebackup", full_path); - else - pg_log_fatal("The program \"%s\" was found by \"%s\" but was\n" - "not the same version as %s.\n" - "Check your installation.", - "pg_waldump", full_path, "pg_validatebackup"); - } - } - - /* By default, look for the manifest in the backup directory. */ - if (manifest_path == NULL) - manifest_path = psprintf("%s/backup_manifest", - context.backup_directory); - - /* By default, look for the WAL in the backup directory, too. */ - if (wal_directory == NULL) - wal_directory = psprintf("%s/pg_wal", context.backup_directory); - - /* - * Try to read the manifest. We treat any errors encountered while parsing - * the manifest as fatal; there doesn't seem to be much point in trying to - * validate the backup directory against a corrupted manifest. - */ - parse_manifest_file(manifest_path, &context.ht, &first_wal_range); - - /* - * Now scan the files in the backup directory. At this stage, we verify - * that every file on disk is present in the manifest and that the sizes - * match. We also set the "matched" flag on every manifest entry that - * corresponds to a file on disk. - */ - validate_backup_directory(&context, NULL, context.backup_directory); - - /* - * The "matched" flag should now be set on every entry in the hash table. - * Any entries for which the bit is not set are files mentioned in the - * manifest that don't exist on disk. - */ - report_extra_backup_files(&context); - - /* - * Now do the expensive work of verifying file checksums, unless we were - * told to skip it. - */ - if (!skip_checksums) - validate_backup_checksums(&context); - - /* - * Try to parse the required ranges of WAL records, unless we were told - * not to do so. - */ - if (!no_parse_wal) - parse_required_wal(&context, pg_waldump_path, - wal_directory, first_wal_range); - - /* - * If everything looks OK, tell the user this, unless we were asked to - * work quietly. - */ - if (!context.saw_any_error && !quiet) - printf("backup successfully verified\n"); - - return context.saw_any_error ? 1 : 0; -} - -/* - * Parse a manifest file. Construct a hash table with information about - * all the files it mentions, and a linked list of all the WAL ranges it - * mentions. - */ -static void -parse_manifest_file(char *manifest_path, manifest_files_hash **ht_p, - manifest_wal_range **first_wal_range_p) -{ - int fd; - struct stat statbuf; - off_t estimate; - uint32 initial_size; - manifest_files_hash *ht; - char *buffer; - int rc; - parser_context private_context; - JsonManifestParseContext context; - - /* Open the manifest file. */ - if ((fd = open(manifest_path, O_RDONLY | PG_BINARY, 0)) < 0) - report_fatal_error("could not open file \"%s\": %m", manifest_path); - - /* Figure out how big the manifest is. */ - if (fstat(fd, &statbuf) != 0) - report_fatal_error("could not stat file \"%s\": %m", manifest_path); - - /* Guess how large to make the hash table based on the manifest size. */ - estimate = statbuf.st_size / ESTIMATED_BYTES_PER_MANIFEST_LINE; - initial_size = Min(PG_UINT32_MAX, Max(estimate, 256)); - - /* Create the hash table. */ - ht = manifest_files_create(initial_size, NULL); - - /* - * Slurp in the whole file. - * - * This is not ideal, but there's currently no easy way to get - * pg_parse_json() to perform incremental parsing. - */ - buffer = pg_malloc(statbuf.st_size); - rc = read(fd, buffer, statbuf.st_size); - if (rc != statbuf.st_size) - { - if (rc < 0) - report_fatal_error("could not read file \"%s\": %m", - manifest_path); - else - report_fatal_error("could not read file \"%s\": read %d of %zu", - manifest_path, rc, (size_t) statbuf.st_size); - } - - /* Close the manifest file. */ - close(fd); - - /* Parse the manifest. */ - private_context.ht = ht; - private_context.first_wal_range = NULL; - private_context.last_wal_range = NULL; - context.private_data = &private_context; - context.perfile_cb = record_manifest_details_for_file; - context.perwalrange_cb = record_manifest_details_for_wal_range; - context.error_cb = report_manifest_error; - json_parse_manifest(&context, buffer, statbuf.st_size); - - /* Done with the buffer. */ - pfree(buffer); - - /* Return the file hash table and WAL range list we constructed. */ - *ht_p = ht; - *first_wal_range_p = private_context.first_wal_range; -} - -/* - * Report an error while parsing the manifest. - * - * We consider all such errors to be fatal errors. The manifest parser - * expects this function not to return. - */ -static void -report_manifest_error(JsonManifestParseContext *context, char *fmt,...) -{ - va_list ap; - - va_start(ap, fmt); - pg_log_generic_v(PG_LOG_FATAL, fmt, ap); - va_end(ap); - - exit(1); -} - -/* - * Record details extracted from the backup manifest for one file. - */ -static void -record_manifest_details_for_file(JsonManifestParseContext *context, - char *pathname, size_t size, - pg_checksum_type checksum_type, - int checksum_length, uint8 *checksum_payload) -{ - parser_context *pcxt = context->private_data; - manifest_files_hash *ht = pcxt->ht; - manifest_file *m; - bool found; - - /* Make a new entry in the hash table for this file. */ - m = manifest_files_insert(ht, pathname, &found); - if (found) - report_fatal_error("duplicate pathname in backup manifest: \"%s\"", - pathname); - - /* Initialize the entry. */ - m->size = size; - m->checksum_type = checksum_type; - m->checksum_length = checksum_length; - m->checksum_payload = checksum_payload; - m->matched = false; - m->bad = false; -} - -/* - * Record details extracted from the backup manifest for one WAL range. - */ -static void -record_manifest_details_for_wal_range(JsonManifestParseContext *context, - TimeLineID tli, - XLogRecPtr start_lsn, XLogRecPtr end_lsn) -{ - parser_context *pcxt = context->private_data; - manifest_wal_range *range; - - /* Allocate and initialize a struct describing this WAL range. */ - range = palloc(sizeof(manifest_wal_range)); - range->tli = tli; - range->start_lsn = start_lsn; - range->end_lsn = end_lsn; - range->prev = pcxt->last_wal_range; - range->next = NULL; - - /* Add it to the end of the list. */ - if (pcxt->first_wal_range == NULL) - pcxt->first_wal_range = range; - else - pcxt->last_wal_range->next = range; - pcxt->last_wal_range = range; -} - -/* - * Validate one directory. - * - * 'relpath' is NULL if we are to validate the top-level backup directory, - * and otherwise the relative path to the directory that is to be validated. - * - * 'fullpath' is the backup directory with 'relpath' appended; i.e. the actual - * filesystem path at which it can be found. - */ -static void -validate_backup_directory(validator_context *context, char *relpath, - char *fullpath) -{ - DIR *dir; - struct dirent *dirent; - - dir = opendir(fullpath); - if (dir == NULL) - { - /* - * If even the toplevel backup directory cannot be found, treat this - * as a fatal error. - */ - if (relpath == NULL) - report_fatal_error("could not open directory \"%s\": %m", fullpath); - - /* - * Otherwise, treat this as a non-fatal error, but ignore any further - * errors related to this path and anything beneath it. - */ - report_backup_error(context, - "could not open directory \"%s\": %m", fullpath); - simple_string_list_append(&context->ignore_list, relpath); - - return; - } - - while (errno = 0, (dirent = readdir(dir)) != NULL) - { - char *filename = dirent->d_name; - char *newfullpath = psprintf("%s/%s", fullpath, filename); - char *newrelpath; - - /* Skip "." and ".." */ - if (filename[0] == '.' && (filename[1] == '\0' - || strcmp(filename, "..") == 0)) - continue; - - if (relpath == NULL) - newrelpath = pstrdup(filename); - else - newrelpath = psprintf("%s/%s", relpath, filename); - - if (!should_ignore_relpath(context, newrelpath)) - validate_backup_file(context, newrelpath, newfullpath); - - pfree(newfullpath); - pfree(newrelpath); - } - - if (closedir(dir)) - { - report_backup_error(context, - "could not close directory \"%s\": %m", fullpath); - return; - } -} - -/* - * Validate one file (which might actually be a directory or a symlink). - * - * The arguments to this function have the same meaning as the arguments to - * validate_backup_directory. - */ -static void -validate_backup_file(validator_context *context, char *relpath, char *fullpath) -{ - struct stat sb; - manifest_file *m; - - if (stat(fullpath, &sb) != 0) - { - report_backup_error(context, - "could not stat file or directory \"%s\": %m", - relpath); - - /* - * Suppress further errors related to this path name and, if it's a - * directory, anything underneath it. - */ - simple_string_list_append(&context->ignore_list, relpath); - - return; - } - - /* If it's a directory, just recurse. */ - if (S_ISDIR(sb.st_mode)) - { - validate_backup_directory(context, relpath, fullpath); - return; - } - - /* If it's not a directory, it should be a plain file. */ - if (!S_ISREG(sb.st_mode)) - { - report_backup_error(context, - "\"%s\" is not a file or directory", - relpath); - return; - } - - /* Check whether there's an entry in the manifest hash. */ - m = manifest_files_lookup(context->ht, relpath); - if (m == NULL) - { - report_backup_error(context, - "\"%s\" is present on disk but not in the manifest", - relpath); - return; - } - - /* Flag this entry as having been encountered in the filesystem. */ - m->matched = true; - - /* Check that the size matches. */ - if (m->size != sb.st_size) - { - report_backup_error(context, - "\"%s\" has size %zu on disk but size %zu in the manifest", - relpath, (size_t) sb.st_size, m->size); - m->bad = true; - } - - /* - * We don't validate checksums at this stage. We first finish validating - * that we have the expected set of files with the expected sizes, and - * only afterwards verify the checksums. That's because computing - * checksums may take a while, and we'd like to report more obvious - * problems quickly. - */ -} - -/* - * Scan the hash table for entries where the 'matched' flag is not set; report - * that such files are present in the manifest but not on disk. - */ -static void -report_extra_backup_files(validator_context *context) -{ - manifest_files_iterator it; - manifest_file *m; - - manifest_files_start_iterate(context->ht, &it); - while ((m = manifest_files_iterate(context->ht, &it)) != NULL) - if (!m->matched && !should_ignore_relpath(context, m->pathname)) - report_backup_error(context, - "\"%s\" is present in the manifest but not on disk", - m->pathname); -} - -/* - * Validate checksums for hash table entries that are otherwise unproblematic. - * If we've already reported some problem related to a hash table entry, or - * if it has no checksum, just skip it. - */ -static void -validate_backup_checksums(validator_context *context) -{ - manifest_files_iterator it; - manifest_file *m; - - manifest_files_start_iterate(context->ht, &it); - while ((m = manifest_files_iterate(context->ht, &it)) != NULL) - { - if (m->matched && !m->bad && m->checksum_type != CHECKSUM_TYPE_NONE && - !should_ignore_relpath(context, m->pathname)) - { - char *fullpath; - - /* Compute the full pathname to the target file. */ - fullpath = psprintf("%s/%s", context->backup_directory, - m->pathname); - - /* Do the actual checksum validation. */ - validate_file_checksum(context, m, fullpath); - - /* Avoid leaking memory. */ - pfree(fullpath); - } - } -} - -/* - * Validate the checksum of a single file. - */ -static void -validate_file_checksum(validator_context *context, manifest_file *m, - char *fullpath) -{ - pg_checksum_context checksum_ctx; - char *relpath = m->pathname; - int fd; - int rc; - size_t bytes_read = 0; - uint8 buffer[READ_CHUNK_SIZE]; - uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH]; - int checksumlen; - - /* Open the target file. */ - if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) < 0) - { - report_backup_error(context, "could not open file \"%s\": %m", - relpath); - return; - } - - /* Initialize checksum context. */ - pg_checksum_init(&checksum_ctx, m->checksum_type); - - /* Read the file chunk by chunk, updating the checksum as we go. */ - while ((rc = read(fd, buffer, READ_CHUNK_SIZE)) > 0) - { - bytes_read += rc; - pg_checksum_update(&checksum_ctx, buffer, rc); - } - if (rc < 0) - report_backup_error(context, "could not read file \"%s\": %m", - relpath); - - /* Close the file. */ - if (close(fd) != 0) - { - report_backup_error(context, "could not close file \"%s\": %m", - relpath); - return; - } - - /* If we didn't manage to read the whole file, bail out now. */ - if (rc < 0) - return; - - /* - * Double-check that we read the expected number of bytes from the file. - * Normally, a file size mismatch would be caught in validate_backup_file - * and this check would never be reached, but this provides additional - * safety and clarity in the event of concurrent modifications or - * filesystem misbehavior. - */ - if (bytes_read != m->size) - { - report_backup_error(context, - "file \"%s\" should contain %zu bytes, but read %zu bytes", - relpath, m->size, bytes_read); - return; - } - - /* Get the final checksum. */ - checksumlen = pg_checksum_final(&checksum_ctx, checksumbuf); - - /* And check it against the manifest. */ - if (checksumlen != m->checksum_length) - report_backup_error(context, - "file \"%s\" has checksum of length %d, but expected %d", - relpath, m->checksum_length, checksumlen); - else if (memcmp(checksumbuf, m->checksum_payload, checksumlen) != 0) - report_backup_error(context, - "checksum mismatch for file \"%s\"", - relpath); -} - -/* - * Attempt to parse the WAL files required to restore from backup using - * pg_waldump. - */ -static void -parse_required_wal(validator_context *context, char *pg_waldump_path, - char *wal_directory, manifest_wal_range *first_wal_range) -{ - manifest_wal_range *this_wal_range = first_wal_range; - - while (this_wal_range != NULL) - { - char *pg_waldump_cmd; - - pg_waldump_cmd = psprintf("\"%s\" --quiet --path=\"%s\" --timeline=%u --start=%X/%X --end=%X/%X\n", - pg_waldump_path, wal_directory, this_wal_range->tli, - (uint32) (this_wal_range->start_lsn >> 32), - (uint32) this_wal_range->start_lsn, - (uint32) (this_wal_range->end_lsn >> 32), - (uint32) this_wal_range->end_lsn); - if (system(pg_waldump_cmd) != 0) - report_backup_error(context, - "WAL parsing failed for timeline %u", - this_wal_range->tli); - - this_wal_range = this_wal_range->next; - } -} - -/* - * Report a problem with the backup. - * - * Update the context to indicate that we saw an error, and exit if the - * context says we should. - */ -static void -report_backup_error(validator_context *context, const char *pg_restrict fmt,...) -{ - va_list ap; - - va_start(ap, fmt); - pg_log_generic_v(PG_LOG_ERROR, fmt, ap); - va_end(ap); - - context->saw_any_error = true; - if (context->exit_on_error) - exit(1); -} - -/* - * Report a fatal error and exit - */ -static void -report_fatal_error(const char *pg_restrict fmt,...) -{ - va_list ap; - - va_start(ap, fmt); - pg_log_generic_v(PG_LOG_FATAL, fmt, ap); - va_end(ap); - - exit(1); -} - -/* - * Is the specified relative path, or some prefix of it, listed in the set - * of paths to ignore? - * - * Note that by "prefix" we mean a parent directory; for this purpose, - * "aa/bb" is not a prefix of "aa/bbb", but it is a prefix of "aa/bb/cc". - */ -static bool -should_ignore_relpath(validator_context *context, char *relpath) -{ - SimpleStringListCell *cell; - - for (cell = context->ignore_list.head; cell != NULL; cell = cell->next) - { - char *r = relpath; - char *v = cell->val; - - while (*v != '\0' && *r == *v) - ++r, ++v; - - if (*v == '\0' && (*r == '\0' || *r == '/')) - return true; - } - - return false; -} - -/* - * Helper function for manifest_files hash table. - */ -static uint32 -hash_string_pointer(char *s) -{ - unsigned char *ss = (unsigned char *) s; - - return hash_bytes(ss, strlen(s)); -} - -/* - * Print out usage information and exit. - */ -static void -usage(void) -{ - printf(_("%s validates a backup against the backup manifest.\n\n"), progname); - printf(_("Usage:\n %s [OPTION]... BACKUPDIR\n\n"), progname); - printf(_("Options:\n")); - printf(_(" -e, --exit-on-error exit immediately on error\n")); - printf(_(" -i, --ignore=RELATIVE_PATH ignore indicated path\n")); - printf(_(" -m, --manifest=PATH use specified path for manifest\n")); - printf(_(" -n, --no-parse-wal do not try to parse WAL files\n")); - printf(_(" -s, --skip-checksums skip checksum verification\n")); - printf(_(" -w, --wal-directory=PATH use specified path for WAL files\n")); - printf(_(" -V, --version output version information, then exit\n")); - printf(_(" -?, --help show this help, then exit\n")); - printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); - printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); -} diff --git a/src/bin/pg_validatebackup/t/001_basic.pl b/src/bin/pg_validatebackup/t/001_basic.pl deleted file mode 100644 index 6d4b8ea01a6..00000000000 --- a/src/bin/pg_validatebackup/t/001_basic.pl +++ /dev/null @@ -1,30 +0,0 @@ -use strict; -use warnings; -use TestLib; -use Test::More tests => 16; - -my $tempdir = TestLib::tempdir; - -program_help_ok('pg_validatebackup'); -program_version_ok('pg_validatebackup'); -program_options_handling_ok('pg_validatebackup'); - -command_fails_like(['pg_validatebackup'], - qr/no backup directory specified/, - 'target directory must be specified'); -command_fails_like(['pg_validatebackup', $tempdir], - qr/could not open file.*\/backup_manifest\"/, - 'pg_validatebackup requires a manifest'); -command_fails_like(['pg_validatebackup', $tempdir, $tempdir], - qr/too many command-line arguments/, - 'multiple target directories not allowed'); - -# create fake manifest file -open(my $fh, '>', "$tempdir/backup_manifest") || die "open: $!"; -close($fh); - -# but then try to use an alternate, nonexisting manifest -command_fails_like(['pg_validatebackup', '-m', "$tempdir/not_the_manifest", - $tempdir], - qr/could not open file.*\/not_the_manifest\"/, - 'pg_validatebackup respects -m flag'); diff --git a/src/bin/pg_validatebackup/t/002_algorithm.pl b/src/bin/pg_validatebackup/t/002_algorithm.pl deleted file mode 100644 index 98871e12a5e..00000000000 --- a/src/bin/pg_validatebackup/t/002_algorithm.pl +++ /dev/null @@ -1,58 +0,0 @@ -# Verify that we can take and validate backups with various checksum types. - -use strict; -use warnings; -use Cwd; -use Config; -use File::Path qw(rmtree); -use PostgresNode; -use TestLib; -use Test::More tests => 19; - -my $master = get_new_node('master'); -$master->init(allows_streaming => 1); -$master->start; - -for my $algorithm (qw(bogus none crc32c sha224 sha256 sha384 sha512)) -{ - my $backup_path = $master->backup_dir . '/' . $algorithm; - my @backup = ('pg_basebackup', '-D', $backup_path, - '--manifest-checksums', $algorithm, - '--no-sync'); - my @validate = ('pg_validatebackup', '-e', $backup_path); - - # A backup with a bogus algorithm should fail. - if ($algorithm eq 'bogus') - { - $master->command_fails(\@backup, - "backup fails with algorithm \"$algorithm\""); - next; - } - - # A backup with a valid algorithm should work. - $master->command_ok(\@backup, "backup ok with algorithm \"$algorithm\""); - - # We expect each real checksum algorithm to be mentioned on every line of - # the backup manifest file except the first and last; for simplicity, we - # just check that it shows up lots of times. When the checksum algorithm - # is none, we just check that the manifest exists. - if ($algorithm eq 'none') - { - ok(-f "$backup_path/backup_manifest", "backup manifest exists"); - } - else - { - my $manifest = slurp_file("$backup_path/backup_manifest"); - my $count_of_algorithm_in_manifest = - (() = $manifest =~ /$algorithm/mig); - cmp_ok($count_of_algorithm_in_manifest, '>', 100, - "$algorithm is mentioned many times in the manifest"); - } - - # Make sure that it validates OK. - $master->command_ok(\@validate, - "validate backup with algorithm \"$algorithm\""); - - # Remove backup immediately to save disk space. - rmtree($backup_path); -} diff --git a/src/bin/pg_validatebackup/t/003_corruption.pl b/src/bin/pg_validatebackup/t/003_corruption.pl deleted file mode 100644 index 09f8b982504..00000000000 --- a/src/bin/pg_validatebackup/t/003_corruption.pl +++ /dev/null @@ -1,288 +0,0 @@ -# Verify that various forms of corruption are detected by pg_validatebackup. - -use strict; -use warnings; -use Cwd; -use Config; -use File::Path qw(rmtree); -use PostgresNode; -use TestLib; -use Test::More tests => 44; - -my $master = get_new_node('master'); -$master->init(allows_streaming => 1); -$master->start; - -# Include a user-defined tablespace in the hopes of detecting problems in that -# area. -my $source_ts_path = TestLib::perl2host(TestLib::tempdir_short()); -my $source_ts_prefix = $source_ts_path; -$source_ts_prefix =~ s!(^[A-Z]:/[^/]*)/.*!$1!; - -$master->safe_psql('postgres', < 'extra_file', - 'mutilate' => \&mutilate_extra_file, - 'fails_like' => - qr/extra_file.*present on disk but not in the manifest/ - }, - { - 'name' => 'extra_tablespace_file', - 'mutilate' => \&mutilate_extra_tablespace_file, - 'fails_like' => - qr/extra_ts_file.*present on disk but not in the manifest/ - }, - { - 'name' => 'missing_file', - 'mutilate' => \&mutilate_missing_file, - 'fails_like' => - qr/pg_xact\/0000.*present in the manifest but not on disk/ - }, - { - 'name' => 'missing_tablespace', - 'mutilate' => \&mutilate_missing_tablespace, - 'fails_like' => - qr/pg_tblspc.*present in the manifest but not on disk/ - }, - { - 'name' => 'append_to_file', - 'mutilate' => \&mutilate_append_to_file, - 'fails_like' => - qr/has size \d+ on disk but size \d+ in the manifest/ - }, - { - 'name' => 'truncate_file', - 'mutilate' => \&mutilate_truncate_file, - 'fails_like' => - qr/has size 0 on disk but size \d+ in the manifest/ - }, - { - 'name' => 'replace_file', - 'mutilate' => \&mutilate_replace_file, - 'fails_like' => qr/checksum mismatch for file/ - }, - { - 'name' => 'bad_manifest', - 'mutilate' => \&mutilate_bad_manifest, - 'fails_like' => qr/manifest checksum mismatch/ - }, - { - 'name' => 'open_file_fails', - 'mutilate' => \&mutilate_open_file_fails, - 'fails_like' => qr/could not open file/, - 'skip_on_windows' => 1 - }, - { - 'name' => 'open_directory_fails', - 'mutilate' => \&mutilate_open_directory_fails, - 'cleanup' => \&cleanup_open_directory_fails, - 'fails_like' => qr/could not open directory/, - 'skip_on_windows' => 1 - }, - { - 'name' => 'search_directory_fails', - 'mutilate' => \&mutilate_search_directory_fails, - 'cleanup' => \&cleanup_search_directory_fails, - 'fails_like' => qr/could not stat file or directory/, - 'skip_on_windows' => 1 - } -); - -for my $scenario (@scenario) -{ - my $name = $scenario->{'name'}; - - SKIP: - { - skip "unix-style permissions not supported on Windows", 4 - if $scenario->{'skip_on_windows'} && $windows_os; - - # Take a backup and check that it validates OK. - my $backup_path = $master->backup_dir . '/' . $name; - my $backup_ts_path = TestLib::perl2host(TestLib::tempdir_short()); - # The tablespace map parameter confuses Msys2, which tries to mangle - # it. Tell it not to. - # See https://www.msys2.org/wiki/Porting/#filesystem-namespaces - local $ENV{MSYS2_ARG_CONV_EXCL} = $source_ts_prefix; - $master->command_ok(['pg_basebackup', '-D', $backup_path, '--no-sync', - '-T', "${source_ts_path}=${backup_ts_path}"], - "base backup ok"); - command_ok(['pg_validatebackup', $backup_path ], - "intact backup validated"); - - # Mutilate the backup in some way. - $scenario->{'mutilate'}->($backup_path); - - # Now check that the backup no longer validates. - command_fails_like(['pg_validatebackup', $backup_path ], - $scenario->{'fails_like'}, - "corrupt backup fails validation: $name"); - - # Run cleanup hook, if provided. - $scenario->{'cleanup'}->($backup_path) - if exists $scenario->{'cleanup'}; - - # Finally, use rmtree to reclaim space. - rmtree($backup_path); - } -} - -sub create_extra_file -{ - my ($backup_path, $relative_path) = @_; - my $pathname = "$backup_path/$relative_path"; - open(my $fh, '>', $pathname) || die "open $pathname: $!"; - print $fh "This is an extra file.\n"; - close($fh); - return; -} - -# Add a file into the root directory of the backup. -sub mutilate_extra_file -{ - my ($backup_path) = @_; - create_extra_file($backup_path, "extra_file"); - return; -} - -# Add a file inside the user-defined tablespace. -sub mutilate_extra_tablespace_file -{ - my ($backup_path) = @_; - my ($tsoid) = grep { $_ ne '.' && $_ ne '..' } - slurp_dir("$backup_path/pg_tblspc"); - my ($catvdir) = grep { $_ ne '.' && $_ ne '..' } - slurp_dir("$backup_path/pg_tblspc/$tsoid"); - my ($tsdboid) = grep { $_ ne '.' && $_ ne '..' } - slurp_dir("$backup_path/pg_tblspc/$tsoid/$catvdir"); - create_extra_file($backup_path, - "pg_tblspc/$tsoid/$catvdir/$tsdboid/extra_ts_file"); - return; -} - -# Remove a file. -sub mutilate_missing_file -{ - my ($backup_path) = @_; - my $pathname = "$backup_path/pg_xact/0000"; - unlink($pathname) || die "$pathname: $!"; - return; -} - -# Remove the symlink to the user-defined tablespace. -sub mutilate_missing_tablespace -{ - my ($backup_path) = @_; - my ($tsoid) = grep { $_ ne '.' && $_ ne '..' } - slurp_dir("$backup_path/pg_tblspc"); - my $pathname = "$backup_path/pg_tblspc/$tsoid"; - if ($windows_os) - { - # rmdir works on some windows setups, unlink on others. - # Instead of trying to implement precise rules, just try one and then - # the other. - unless (rmdir($pathname)) - { - my $err = $!; - unlink($pathname) || die "$pathname: rmdir: $err, unlink: $!"; - } - } - else - { - unlink($pathname) || die "$pathname: $!"; - } - return; -} - -# Append an additional bytes to a file. -sub mutilate_append_to_file -{ - my ($backup_path) = @_; - append_to_file "$backup_path/global/pg_control", 'x'; - return; -} - -# Truncate a file to zero length. -sub mutilate_truncate_file -{ - my ($backup_path) = @_; - my $pathname = "$backup_path/global/pg_control"; - open(my $fh, '>', $pathname) || die "open $pathname: $!"; - close($fh); - return; -} - -# Replace a file's contents without changing the length of the file. This is -# not a particularly efficient way to do this, so we pick a file that's -# expected to be short. -sub mutilate_replace_file -{ - my ($backup_path) = @_; - my $pathname = "$backup_path/PG_VERSION"; - my $contents = slurp_file($pathname); - open(my $fh, '>', $pathname) || die "open $pathname: $!"; - print $fh 'q' x length($contents); - close($fh); - return; -} - -# Corrupt the backup manifest. -sub mutilate_bad_manifest -{ - my ($backup_path) = @_; - append_to_file "$backup_path/backup_manifest", "\n"; - return; -} - -# Create a file that can't be opened. (This is skipped on Windows.) -sub mutilate_open_file_fails -{ - my ($backup_path) = @_; - my $pathname = "$backup_path/PG_VERSION"; - chmod(0, $pathname) || die "chmod $pathname: $!"; - return; -} - -# Create a directory that can't be opened. (This is skipped on Windows.) -sub mutilate_open_directory_fails -{ - my ($backup_path) = @_; - my $pathname = "$backup_path/pg_subtrans"; - chmod(0, $pathname) || die "chmod $pathname: $!"; - return; -} - -# restore permissions on the unreadable directory we created. -sub cleanup_open_directory_fails -{ - my ($backup_path) = @_; - my $pathname = "$backup_path/pg_subtrans"; - chmod(0700, $pathname) || die "chmod $pathname: $!"; - return; -} - -# Create a directory that can't be searched. (This is skipped on Windows.) -sub mutilate_search_directory_fails -{ - my ($backup_path) = @_; - my $pathname = "$backup_path/base"; - chmod(0400, $pathname) || die "chmod $pathname: $!"; - return; -} - -# rmtree can't cope with a mode 400 directory, so change back to 700. -sub cleanup_search_directory_fails -{ - my ($backup_path) = @_; - my $pathname = "$backup_path/base"; - chmod(0700, $pathname) || die "chmod $pathname: $!"; - return; -} diff --git a/src/bin/pg_validatebackup/t/004_options.pl b/src/bin/pg_validatebackup/t/004_options.pl deleted file mode 100644 index 8f185626ed6..00000000000 --- a/src/bin/pg_validatebackup/t/004_options.pl +++ /dev/null @@ -1,89 +0,0 @@ -# Verify the behavior of assorted pg_validatebackup options. - -use strict; -use warnings; -use Cwd; -use Config; -use File::Path qw(rmtree); -use PostgresNode; -use TestLib; -use Test::More tests => 25; - -# Start up the server and take a backup. -my $master = get_new_node('master'); -$master->init(allows_streaming => 1); -$master->start; -my $backup_path = $master->backup_dir . '/test_options'; -$master->command_ok(['pg_basebackup', '-D', $backup_path, '--no-sync' ], - "base backup ok"); - -# Verify that pg_validatebackup -q succeeds and produces no output. -my $stdout; -my $stderr; -my $result = IPC::Run::run ['pg_validatebackup', '-q', $backup_path ], - '>', \$stdout, '2>', \$stderr; -ok($result, "-q succeeds: exit code 0"); -is($stdout, '', "-q succeeds: no stdout"); -is($stderr, '', "-q succeeds: no stderr"); - -# Corrupt the PG_VERSION file. -my $version_pathname = "$backup_path/PG_VERSION"; -my $version_contents = slurp_file($version_pathname); -open(my $fh, '>', $version_pathname) || die "open $version_pathname: $!"; -print $fh 'q' x length($version_contents); -close($fh); - -# Verify that pg_validatebackup -q now fails. -command_fails_like(['pg_validatebackup', '-q', $backup_path ], - qr/checksum mismatch for file \"PG_VERSION\"/, - '-q checksum mismatch'); - -# Since we didn't change the length of the file, validation should succeed -# if we ignore checksums. Check that we get the right message, too. -command_like(['pg_validatebackup', '-s', $backup_path ], - qr/backup successfully verified/, - '-s skips checksumming'); - -# Validation should succeed if we ignore the problem file. -command_like(['pg_validatebackup', '-i', 'PG_VERSION', $backup_path ], - qr/backup successfully verified/, - '-i ignores problem file'); - -# PG_VERSION is already corrupt; let's try also removing all of pg_xact. -rmtree($backup_path . "/pg_xact"); - -# We're ignoring the problem with PG_VERSION, but not the problem with -# pg_xact, so validation should fail here. -command_fails_like(['pg_validatebackup', '-i', 'PG_VERSION', $backup_path ], - qr/pg_xact.*is present in the manifest but not on disk/, - '-i does not ignore all problems'); - -# If we use -i twice, we should be able to ignore all of the problems. -command_like(['pg_validatebackup', '-i', 'PG_VERSION', '-i', 'pg_xact', - $backup_path ], - qr/backup successfully verified/, - 'multiple -i options work'); - -# Verify that when -i is not used, both problems are reported. -$result = IPC::Run::run ['pg_validatebackup', $backup_path ], - '>', \$stdout, '2>', \$stderr; -ok(!$result, "multiple problems: fails"); -like($stderr, qr/pg_xact.*is present in the manifest but not on disk/, - "multiple problems: missing files reported"); -like($stderr, qr/checksum mismatch for file \"PG_VERSION\"/, - "multiple problems: checksum mismatch reported"); - -# Verify that when -e is used, only the problem detected first is reported. -$result = IPC::Run::run ['pg_validatebackup', '-e', $backup_path ], - '>', \$stdout, '2>', \$stderr; -ok(!$result, "-e reports 1 error: fails"); -like($stderr, qr/pg_xact.*is present in the manifest but not on disk/, - "-e reports 1 error: missing files reported"); -unlike($stderr, qr/checksum mismatch for file \"PG_VERSION\"/, - "-e reports 1 error: checksum mismatch not reported"); - -# Test valid manifest with nonexistent backup directory. -command_fails_like(['pg_validatebackup', '-m', "$backup_path/backup_manifest", - "$backup_path/fake" ], - qr/could not open directory/, - 'nonexistent backup directory'); diff --git a/src/bin/pg_validatebackup/t/005_bad_manifest.pl b/src/bin/pg_validatebackup/t/005_bad_manifest.pl deleted file mode 100644 index f52a8b71ea9..00000000000 --- a/src/bin/pg_validatebackup/t/005_bad_manifest.pl +++ /dev/null @@ -1,204 +0,0 @@ -# Test the behavior of pg_validatebackup when the backup manifest has -# problems. - -use strict; -use warnings; -use Cwd; -use Config; -use PostgresNode; -use TestLib; -use Test::More tests => 58; - -my $tempdir = TestLib::tempdir; - -test_bad_manifest('input string ended unexpectedly', - qr/could not parse backup manifest: The input string ended unexpectedly/, - <', "$tempdir/backup_manifest") || die "open: $!"; - print $fh $manifest_contents; - close($fh); - - command_fails_like(['pg_validatebackup', $tempdir], $regexp, - $test_name); - return; -} diff --git a/src/bin/pg_validatebackup/t/006_encoding.pl b/src/bin/pg_validatebackup/t/006_encoding.pl deleted file mode 100644 index 5e3e7152a52..00000000000 --- a/src/bin/pg_validatebackup/t/006_encoding.pl +++ /dev/null @@ -1,27 +0,0 @@ -# Verify that pg_validatebackup handles hex-encoded filenames correctly. - -use strict; -use warnings; -use Cwd; -use Config; -use PostgresNode; -use TestLib; -use Test::More tests => 5; - -my $master = get_new_node('master'); -$master->init(allows_streaming => 1); -$master->start; -my $backup_path = $master->backup_dir . '/test_encoding'; -$master->command_ok(['pg_basebackup', '-D', $backup_path, '--no-sync', - '--manifest-force-encode' ], - "backup ok with forced hex encoding"); - -my $manifest = slurp_file("$backup_path/backup_manifest"); -my $count_of_encoded_path_in_manifest = - (() = $manifest =~ /Encoded-Path/mig); -cmp_ok($count_of_encoded_path_in_manifest, '>', 100, - "many paths are encoded in the manifest"); - -command_like(['pg_validatebackup', '-s', $backup_path ], - qr/backup successfully verified/, - 'backup with forced encoding validated'); diff --git a/src/bin/pg_validatebackup/t/007_wal.pl b/src/bin/pg_validatebackup/t/007_wal.pl deleted file mode 100644 index b755e0f5e7f..00000000000 --- a/src/bin/pg_validatebackup/t/007_wal.pl +++ /dev/null @@ -1,55 +0,0 @@ -# Test pg_validatebackup's WAL validation. - -use strict; -use warnings; -use Cwd; -use Config; -use File::Path qw(rmtree); -use PostgresNode; -use TestLib; -use Test::More tests => 7; - -# Start up the server and take a backup. -my $master = get_new_node('master'); -$master->init(allows_streaming => 1); -$master->start; -my $backup_path = $master->backup_dir . '/test_wal'; -$master->command_ok(['pg_basebackup', '-D', $backup_path, '--no-sync' ], - "base backup ok"); - -# Rename pg_wal. -my $original_pg_wal = $backup_path . '/pg_wal'; -my $relocated_pg_wal = $master->backup_dir . '/relocated_pg_wal'; -rename($original_pg_wal, $relocated_pg_wal) || die "rename pg_wal: $!"; - -# WAL validation should fail. -command_fails_like(['pg_validatebackup', $backup_path ], - qr/WAL parsing failed for timeline 1/, - 'missing pg_wal causes failure'); - -# Should work if we skip WAL verification. -command_ok(['pg_validatebackup', '-n', $backup_path ], - 'missing pg_wal OK if not verifying WAL'); - -# Should also work if we specify the correct WAL location. -command_ok(['pg_validatebackup', '-w', $relocated_pg_wal, $backup_path ], - '-w can be used to specify WAL directory'); - -# Move directory back to original location. -rename($relocated_pg_wal, $original_pg_wal) || die "rename pg_wal back: $!"; - -# Get a list of files in that directory that look like WAL files. -my @walfiles = grep { /^[0-9A-F]{24}$/ } slurp_dir($original_pg_wal); - -# Replace the contents of one of the files with garbage of equal length. -my $wal_corruption_target = $original_pg_wal . '/' . $walfiles[0]; -my $wal_size = -s $wal_corruption_target; -open(my $fh, '>', $wal_corruption_target) - || die "open $wal_corruption_target: $!"; -print $fh 'w' x $wal_size; -close($fh); - -# WAL validation should fail. -command_fails_like(['pg_validatebackup', $backup_path ], - qr/WAL parsing failed for timeline 1/, - 'corrupt WAL file causes failure'); diff --git a/src/bin/pg_verifybackup/.gitignore b/src/bin/pg_verifybackup/.gitignore new file mode 100644 index 00000000000..910b227ce99 --- /dev/null +++ b/src/bin/pg_verifybackup/.gitignore @@ -0,0 +1,2 @@ +/pg_verifybackup +/tmp_check/ diff --git a/src/bin/pg_verifybackup/Makefile b/src/bin/pg_verifybackup/Makefile new file mode 100644 index 00000000000..c07643b1297 --- /dev/null +++ b/src/bin/pg_verifybackup/Makefile @@ -0,0 +1,40 @@ +# src/bin/pg_verifybackup/Makefile + +PGFILEDESC = "pg_verifybackup - verify a backup against using a backup manifest" +PGAPPICON = win32 + +subdir = src/bin/pg_verifybackup +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +# We need libpq only because fe_utils does. +LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) + +OBJS = \ + $(WIN32RES) \ + parse_manifest.o \ + pg_verifybackup.o + +all: pg_verifybackup + +pg_verifybackup: $(OBJS) | submake-libpq submake-libpgport submake-libpgfeutils + $(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X) + +install: all installdirs + $(INSTALL_PROGRAM) pg_verifybackup$(X) '$(DESTDIR)$(bindir)/pg_verifybackup$(X)' + +installdirs: + $(MKDIR_P) '$(DESTDIR)$(bindir)' + +uninstall: + rm -f '$(DESTDIR)$(bindir)/pg_verifybackup$(X)' + +clean distclean maintainer-clean: + rm -f pg_verifybackup$(X) $(OBJS) + rm -rf tmp_check + +check: + $(prove_check) + +installcheck: + $(prove_installcheck) diff --git a/src/bin/pg_verifybackup/parse_manifest.c b/src/bin/pg_verifybackup/parse_manifest.c new file mode 100644 index 00000000000..faee423c7ec --- /dev/null +++ b/src/bin/pg_verifybackup/parse_manifest.c @@ -0,0 +1,740 @@ +/*------------------------------------------------------------------------- + * + * parse_manifest.c + * Parse a backup manifest in JSON format. + * + * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/bin/pg_verifybackup/parse_manifest.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres_fe.h" + +#include "parse_manifest.h" +#include "common/jsonapi.h" + +/* + * Semantic states for JSON manifest parsing. + */ +typedef enum +{ + JM_EXPECT_TOPLEVEL_START, + JM_EXPECT_TOPLEVEL_END, + JM_EXPECT_TOPLEVEL_FIELD, + JM_EXPECT_VERSION_VALUE, + JM_EXPECT_FILES_START, + JM_EXPECT_FILES_NEXT, + JM_EXPECT_THIS_FILE_FIELD, + JM_EXPECT_THIS_FILE_VALUE, + JM_EXPECT_WAL_RANGES_START, + JM_EXPECT_WAL_RANGES_NEXT, + JM_EXPECT_THIS_WAL_RANGE_FIELD, + JM_EXPECT_THIS_WAL_RANGE_VALUE, + JM_EXPECT_MANIFEST_CHECKSUM_VALUE, + JM_EXPECT_EOF +} JsonManifestSemanticState; + +/* + * Possible fields for one file as described by the manifest. + */ +typedef enum +{ + JMFF_PATH, + JMFF_ENCODED_PATH, + JMFF_SIZE, + JMFF_LAST_MODIFIED, + JMFF_CHECKSUM_ALGORITHM, + JMFF_CHECKSUM +} JsonManifestFileField; + +/* + * Possible fields for one file as described by the manifest. + */ +typedef enum +{ + JMWRF_TIMELINE, + JMWRF_START_LSN, + JMWRF_END_LSN +} JsonManifestWALRangeField; + +/* + * Internal state used while decoding the JSON-format backup manifest. + */ +typedef struct +{ + JsonManifestParseContext *context; + JsonManifestSemanticState state; + + /* These fields are used for parsing objects in the list of files. */ + JsonManifestFileField file_field; + char *pathname; + char *encoded_pathname; + char *size; + char *algorithm; + pg_checksum_type checksum_algorithm; + char *checksum; + + /* These fields are used for parsing objects in the list of WAL ranges. */ + JsonManifestWALRangeField wal_range_field; + char *timeline; + char *start_lsn; + char *end_lsn; + + /* Miscellaneous other stuff. */ + bool saw_version_field; + char *manifest_checksum; +} JsonManifestParseState; + +static void json_manifest_object_start(void *state); +static void json_manifest_object_end(void *state); +static void json_manifest_array_start(void *state); +static void json_manifest_array_end(void *state); +static void json_manifest_object_field_start(void *state, char *fname, + bool isnull); +static void json_manifest_scalar(void *state, char *token, + JsonTokenType tokentype); +static void json_manifest_finalize_file(JsonManifestParseState *parse); +static void json_manifest_finalize_wal_range(JsonManifestParseState *parse); +static void verify_manifest_checksum(JsonManifestParseState *parse, + char *buffer, size_t size); +static void json_manifest_parse_failure(JsonManifestParseContext *context, + char *msg); + +static int hexdecode_char(char c); +static bool hexdecode_string(uint8 *result, char *input, int nbytes); +static bool parse_xlogrecptr(XLogRecPtr *result, char *input); + +/* + * Main entrypoint to parse a JSON-format backup manifest. + * + * Caller should set up the parsing context and then invoke this function. + * For each file whose information is extracted from the manifest, + * context->perfile_cb is invoked. In case of trouble, context->error_cb is + * invoked and is expected not to return. + */ +void +json_parse_manifest(JsonManifestParseContext *context, char *buffer, + size_t size) +{ + JsonLexContext *lex; + JsonParseErrorType json_error; + JsonSemAction sem; + JsonManifestParseState parse; + + /* Set up our private parsing context. */ + parse.context = context; + parse.state = JM_EXPECT_TOPLEVEL_START; + parse.saw_version_field = false; + + /* Create a JSON lexing context. */ + lex = makeJsonLexContextCstringLen(buffer, size, PG_UTF8, true); + + /* Set up semantic actions. */ + sem.semstate = &parse; + sem.object_start = json_manifest_object_start; + sem.object_end = json_manifest_object_end; + sem.array_start = json_manifest_array_start; + sem.array_end = json_manifest_array_end; + sem.object_field_start = json_manifest_object_field_start; + sem.object_field_end = NULL; + sem.array_element_start = NULL; + sem.array_element_end = NULL; + sem.scalar = json_manifest_scalar; + + /* Run the actual JSON parser. */ + json_error = pg_parse_json(lex, &sem); + if (json_error != JSON_SUCCESS) + json_manifest_parse_failure(context, json_errdetail(json_error, lex)); + if (parse.state != JM_EXPECT_EOF) + json_manifest_parse_failure(context, "manifest ended unexpectedly"); + + /* Verify the manifest checksum. */ + verify_manifest_checksum(&parse, buffer, size); +} + +/* + * Invoked at the start of each object in the JSON document. + * + * The document as a whole is expected to be an object; each file and each + * WAL range is also expected to be an object. If we're anywhere else in the + * document, it's an error. + */ +static void +json_manifest_object_start(void *state) +{ + JsonManifestParseState *parse = state; + + switch (parse->state) + { + case JM_EXPECT_TOPLEVEL_START: + parse->state = JM_EXPECT_TOPLEVEL_FIELD; + break; + case JM_EXPECT_FILES_NEXT: + parse->state = JM_EXPECT_THIS_FILE_FIELD; + parse->pathname = NULL; + parse->encoded_pathname = NULL; + parse->size = NULL; + parse->algorithm = NULL; + parse->checksum = NULL; + break; + case JM_EXPECT_WAL_RANGES_NEXT: + parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD; + parse->timeline = NULL; + parse->start_lsn = NULL; + parse->end_lsn = NULL; + break; + default: + json_manifest_parse_failure(parse->context, + "unexpected object start"); + break; + } +} + +/* + * Invoked at the end of each object in the JSON document. + * + * The possible cases here are the same as for json_manifest_object_start. + * There's nothing special to do at the end of the document, but when we + * reach the end of an object representing a particular file or WAL range, + * we must call json_manifest_finalize_file() to save the associated details. + */ +static void +json_manifest_object_end(void *state) +{ + JsonManifestParseState *parse = state; + + switch (parse->state) + { + case JM_EXPECT_TOPLEVEL_END: + parse->state = JM_EXPECT_EOF; + break; + case JM_EXPECT_THIS_FILE_FIELD: + json_manifest_finalize_file(parse); + parse->state = JM_EXPECT_FILES_NEXT; + break; + case JM_EXPECT_THIS_WAL_RANGE_FIELD: + json_manifest_finalize_wal_range(parse); + parse->state = JM_EXPECT_WAL_RANGES_NEXT; + break; + default: + json_manifest_parse_failure(parse->context, + "unexpected object end"); + break; + } +} + +/* + * Invoked at the start of each array in the JSON document. + * + * Within the toplevel object, the value associated with the "Files" key + * should be an array. Similarly for the "WAL-Ranges" key. No other arrays + * are expected. + */ +static void +json_manifest_array_start(void *state) +{ + JsonManifestParseState *parse = state; + + switch (parse->state) + { + case JM_EXPECT_FILES_START: + parse->state = JM_EXPECT_FILES_NEXT; + break; + case JM_EXPECT_WAL_RANGES_START: + parse->state = JM_EXPECT_WAL_RANGES_NEXT; + break; + default: + json_manifest_parse_failure(parse->context, + "unexpected array start"); + break; + } +} + +/* + * Invoked at the end of each array in the JSON document. + * + * The cases here are analogous to those in json_manifest_array_start. + */ +static void +json_manifest_array_end(void *state) +{ + JsonManifestParseState *parse = state; + + switch (parse->state) + { + case JM_EXPECT_FILES_NEXT: + case JM_EXPECT_WAL_RANGES_NEXT: + parse->state = JM_EXPECT_TOPLEVEL_FIELD; + break; + default: + json_manifest_parse_failure(parse->context, + "unexpected array end"); + break; + } +} + +/* + * Invoked at the start of each object field in the JSON document. + */ +static void +json_manifest_object_field_start(void *state, char *fname, bool isnull) +{ + JsonManifestParseState *parse = state; + + switch (parse->state) + { + case JM_EXPECT_TOPLEVEL_FIELD: + + /* + * Inside toplevel object. The version indicator should always be + * the first field. + */ + if (!parse->saw_version_field) + { + if (strcmp(fname, "PostgreSQL-Backup-Manifest-Version") != 0) + json_manifest_parse_failure(parse->context, + "expected version indicator"); + parse->state = JM_EXPECT_VERSION_VALUE; + parse->saw_version_field = true; + break; + } + + /* Is this the list of files? */ + if (strcmp(fname, "Files") == 0) + { + parse->state = JM_EXPECT_FILES_START; + break; + } + + /* Is this the list of WAL ranges? */ + if (strcmp(fname, "WAL-Ranges") == 0) + { + parse->state = JM_EXPECT_WAL_RANGES_START; + break; + } + + /* Is this the manifest checksum? */ + if (strcmp(fname, "Manifest-Checksum") == 0) + { + parse->state = JM_EXPECT_MANIFEST_CHECKSUM_VALUE; + break; + } + + /* It's not a field we recognize. */ + json_manifest_parse_failure(parse->context, + "unknown toplevel field"); + break; + + case JM_EXPECT_THIS_FILE_FIELD: + /* Inside object for one file; which key have we got? */ + if (strcmp(fname, "Path") == 0) + parse->file_field = JMFF_PATH; + else if (strcmp(fname, "Encoded-Path") == 0) + parse->file_field = JMFF_ENCODED_PATH; + else if (strcmp(fname, "Size") == 0) + parse->file_field = JMFF_SIZE; + else if (strcmp(fname, "Last-Modified") == 0) + parse->file_field = JMFF_LAST_MODIFIED; + else if (strcmp(fname, "Checksum-Algorithm") == 0) + parse->file_field = JMFF_CHECKSUM_ALGORITHM; + else if (strcmp(fname, "Checksum") == 0) + parse->file_field = JMFF_CHECKSUM; + else + json_manifest_parse_failure(parse->context, + "unexpected file field"); + parse->state = JM_EXPECT_THIS_FILE_VALUE; + break; + + case JM_EXPECT_THIS_WAL_RANGE_FIELD: + /* Inside object for one file; which key have we got? */ + if (strcmp(fname, "Timeline") == 0) + parse->wal_range_field = JMWRF_TIMELINE; + else if (strcmp(fname, "Start-LSN") == 0) + parse->wal_range_field = JMWRF_START_LSN; + else if (strcmp(fname, "End-LSN") == 0) + parse->wal_range_field = JMWRF_END_LSN; + else + json_manifest_parse_failure(parse->context, + "unexpected wal range field"); + parse->state = JM_EXPECT_THIS_WAL_RANGE_VALUE; + break; + + default: + json_manifest_parse_failure(parse->context, + "unexpected object field"); + break; + } +} + +/* + * Invoked at the start of each scalar in the JSON document. + * + * Object field names don't reach this code; those are handled by + * json_manifest_object_field_start. When we're inside of the object for + * a particular file or WAL range, that function will have noticed the name + * of the field, and we'll get the corresponding value here. When we're in + * the toplevel object, the parse state itself tells us which field this is. + * + * In all cases except for PostgreSQL-Backup-Manifest-Version, which we + * can just check on the spot, the goal here is just to save the value in + * the parse state for later use. We don't actually do anything until we + * reach either the end of the object representing this file, or the end + * of the manifest, as the case may be. + */ +static void +json_manifest_scalar(void *state, char *token, JsonTokenType tokentype) +{ + JsonManifestParseState *parse = state; + + switch (parse->state) + { + case JM_EXPECT_VERSION_VALUE: + if (strcmp(token, "1") != 0) + json_manifest_parse_failure(parse->context, + "unexpected manifest version"); + parse->state = JM_EXPECT_TOPLEVEL_FIELD; + break; + + case JM_EXPECT_THIS_FILE_VALUE: + switch (parse->file_field) + { + case JMFF_PATH: + parse->pathname = token; + break; + case JMFF_ENCODED_PATH: + parse->encoded_pathname = token; + break; + case JMFF_SIZE: + parse->size = token; + break; + case JMFF_LAST_MODIFIED: + pfree(token); /* unused */ + break; + case JMFF_CHECKSUM_ALGORITHM: + parse->algorithm = token; + break; + case JMFF_CHECKSUM: + parse->checksum = token; + break; + } + parse->state = JM_EXPECT_THIS_FILE_FIELD; + break; + + case JM_EXPECT_THIS_WAL_RANGE_VALUE: + switch (parse->wal_range_field) + { + case JMWRF_TIMELINE: + parse->timeline = token; + break; + case JMWRF_START_LSN: + parse->start_lsn = token; + break; + case JMWRF_END_LSN: + parse->end_lsn = token; + break; + } + parse->state = JM_EXPECT_THIS_WAL_RANGE_FIELD; + break; + + case JM_EXPECT_MANIFEST_CHECKSUM_VALUE: + parse->state = JM_EXPECT_TOPLEVEL_END; + parse->manifest_checksum = token; + break; + + default: + json_manifest_parse_failure(parse->context, "unexpected scalar"); + break; + } +} + +/* + * Do additional parsing and sanity-checking of the details gathered for one + * file, and invoke the per-file callback so that the caller gets those + * details. This happens for each file when the corresponding JSON object is + * completely parsed. + */ +static void +json_manifest_finalize_file(JsonManifestParseState *parse) +{ + JsonManifestParseContext *context = parse->context; + size_t size; + char *ep; + int checksum_string_length; + pg_checksum_type checksum_type; + int checksum_length; + uint8 *checksum_payload; + + /* Pathname and size are required. */ + if (parse->pathname == NULL && parse->encoded_pathname == NULL) + json_manifest_parse_failure(parse->context, "missing pathname"); + if (parse->pathname != NULL && parse->encoded_pathname != NULL) + json_manifest_parse_failure(parse->context, + "both pathname and encoded pathname"); + if (parse->size == NULL) + json_manifest_parse_failure(parse->context, "missing size"); + if (parse->algorithm == NULL && parse->checksum != NULL) + json_manifest_parse_failure(parse->context, + "checksum without algorithm"); + + /* Decode encoded pathname, if that's what we have. */ + if (parse->encoded_pathname != NULL) + { + int encoded_length = strlen(parse->encoded_pathname); + int raw_length = encoded_length / 2; + + parse->pathname = palloc(raw_length + 1); + if (encoded_length % 2 != 0 || + !hexdecode_string((uint8 *) parse->pathname, + parse->encoded_pathname, + raw_length)) + json_manifest_parse_failure(parse->context, + "unable to decode filename"); + parse->pathname[raw_length] = '\0'; + pfree(parse->encoded_pathname); + parse->encoded_pathname = NULL; + } + + /* Parse size. */ + size = strtoul(parse->size, &ep, 10); + if (*ep) + json_manifest_parse_failure(parse->context, + "file size is not an integer"); + + /* Parse the checksum algorithm, if it's present. */ + if (parse->algorithm == NULL) + checksum_type = CHECKSUM_TYPE_NONE; + else if (!pg_checksum_parse_type(parse->algorithm, &checksum_type)) + context->error_cb(context, "unrecognized checksum algorithm: \"%s\"", + parse->algorithm); + + /* Parse the checksum payload, if it's present. */ + checksum_string_length = parse->checksum == NULL ? 0 + : strlen(parse->checksum); + if (checksum_string_length == 0) + { + checksum_length = 0; + checksum_payload = NULL; + } + else + { + checksum_length = checksum_string_length / 2; + checksum_payload = palloc(checksum_length); + if (checksum_string_length % 2 != 0 || + !hexdecode_string(checksum_payload, parse->checksum, + checksum_length)) + context->error_cb(context, + "invalid checksum for file \"%s\": \"%s\"", + parse->pathname, parse->checksum); + } + + /* Invoke the callback with the details we've gathered. */ + context->perfile_cb(context, parse->pathname, size, + checksum_type, checksum_length, checksum_payload); + + /* Free memory we no longer need. */ + if (parse->size != NULL) + { + pfree(parse->size); + parse->size = NULL; + } + if (parse->algorithm != NULL) + { + pfree(parse->algorithm); + parse->algorithm = NULL; + } + if (parse->checksum != NULL) + { + pfree(parse->checksum); + parse->checksum = NULL; + } +} + +/* + * Do additional parsing and sanity-checking of the details gathered for one + * WAL range, and invoke the per-WAL-range callback so that the caller gets + * those details. This happens for each WAL range when the corresponding JSON + * object is completely parsed. + */ +static void +json_manifest_finalize_wal_range(JsonManifestParseState *parse) +{ + JsonManifestParseContext *context = parse->context; + TimeLineID tli; + XLogRecPtr start_lsn, + end_lsn; + char *ep; + + /* Make sure all fields are present. */ + if (parse->timeline == NULL) + json_manifest_parse_failure(parse->context, "missing timeline"); + if (parse->start_lsn == NULL) + json_manifest_parse_failure(parse->context, "missing start LSN"); + if (parse->end_lsn == NULL) + json_manifest_parse_failure(parse->context, "missing end LSN"); + + /* Parse timeline. */ + tli = strtoul(parse->timeline, &ep, 10); + if (*ep) + json_manifest_parse_failure(parse->context, + "timeline is not an integer"); + if (!parse_xlogrecptr(&start_lsn, parse->start_lsn)) + json_manifest_parse_failure(parse->context, + "unable to parse start LSN"); + if (!parse_xlogrecptr(&end_lsn, parse->end_lsn)) + json_manifest_parse_failure(parse->context, + "unable to parse end LSN"); + + /* Invoke the callback with the details we've gathered. */ + context->perwalrange_cb(context, tli, start_lsn, end_lsn); + + /* Free memory we no longer need. */ + if (parse->timeline != NULL) + { + pfree(parse->timeline); + parse->timeline = NULL; + } + if (parse->start_lsn != NULL) + { + pfree(parse->start_lsn); + parse->start_lsn = NULL; + } + if (parse->end_lsn != NULL) + { + pfree(parse->end_lsn); + parse->end_lsn = NULL; + } +} + +/* + * Verify that the manifest checksum is correct. + * + * The last line of the manifest file is excluded from the manifest checksum, + * because the last line is expected to contain the checksum that covers + * the rest of the file. + */ +static void +verify_manifest_checksum(JsonManifestParseState *parse, char *buffer, + size_t size) +{ + JsonManifestParseContext *context = parse->context; + size_t i; + size_t number_of_newlines = 0; + size_t ultimate_newline = 0; + size_t penultimate_newline = 0; + pg_sha256_ctx manifest_ctx; + uint8 manifest_checksum_actual[PG_SHA256_DIGEST_LENGTH]; + uint8 manifest_checksum_expected[PG_SHA256_DIGEST_LENGTH]; + + /* Find the last two newlines in the file. */ + for (i = 0; i < size; ++i) + { + if (buffer[i] == '\n') + { + ++number_of_newlines; + penultimate_newline = ultimate_newline; + ultimate_newline = i; + } + } + + /* + * Make sure that the last newline is right at the end, and that there are + * at least two lines total. We need this to be true in order for the + * following code, which computes the manifest checksum, to work properly. + */ + if (number_of_newlines < 2) + json_manifest_parse_failure(parse->context, + "expected at least 2 lines"); + if (ultimate_newline != size - 1) + json_manifest_parse_failure(parse->context, + "last line not newline-terminated"); + + /* Checksum the rest. */ + pg_sha256_init(&manifest_ctx); + pg_sha256_update(&manifest_ctx, (uint8 *) buffer, penultimate_newline + 1); + pg_sha256_final(&manifest_ctx, manifest_checksum_actual); + + /* Now verify it. */ + if (parse->manifest_checksum == NULL) + context->error_cb(parse->context, "manifest has no checksum"); + if (strlen(parse->manifest_checksum) != PG_SHA256_DIGEST_LENGTH * 2 || + !hexdecode_string(manifest_checksum_expected, parse->manifest_checksum, + PG_SHA256_DIGEST_LENGTH)) + context->error_cb(context, "invalid manifest checksum: \"%s\"", + parse->manifest_checksum); + if (memcmp(manifest_checksum_actual, manifest_checksum_expected, + PG_SHA256_DIGEST_LENGTH) != 0) + context->error_cb(context, "manifest checksum mismatch"); +} + +/* + * Report a parse error. + * + * This is intended to be used for fairly low-level failures that probably + * shouldn't occur unless somebody has deliberately constructed a bad manifest, + * or unless the server is generating bad manifests due to some bug. msg should + * be a short string giving some hint as to what the problem is. + */ +static void +json_manifest_parse_failure(JsonManifestParseContext *context, char *msg) +{ + context->error_cb(context, "could not parse backup manifest: %s", msg); +} + +/* + * Convert a character which represents a hexadecimal digit to an integer. + * + * Returns -1 if the character is not a hexadecimal digit. + */ +static int +hexdecode_char(char c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + + return -1; +} + +/* + * Decode a hex string into a byte string, 2 hex chars per byte. + * + * Returns false if invalid characters are encountered; otherwise true. + */ +static bool +hexdecode_string(uint8 *result, char *input, int nbytes) +{ + int i; + + for (i = 0; i < nbytes; ++i) + { + int n1 = hexdecode_char(input[i * 2]); + int n2 = hexdecode_char(input[i * 2 + 1]); + + if (n1 < 0 || n2 < 0) + return false; + result[i] = n1 * 16 + n2; + } + + return true; +} + +/* + * Parse an XLogRecPtr expressed using the usual string format. + */ +static bool +parse_xlogrecptr(XLogRecPtr *result, char *input) +{ + uint32 hi; + uint32 lo; + + if (sscanf(input, "%X/%X", &hi, &lo) != 2) + return false; + *result = ((uint64) hi) << 32 | lo; + return true; +} diff --git a/src/bin/pg_verifybackup/parse_manifest.h b/src/bin/pg_verifybackup/parse_manifest.h new file mode 100644 index 00000000000..49254bfb324 --- /dev/null +++ b/src/bin/pg_verifybackup/parse_manifest.h @@ -0,0 +1,45 @@ +/*------------------------------------------------------------------------- + * + * parse_manifest.h + * Parse a backup manifest in JSON format. + * + * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/bin/pg_verifybackup/parse_manifest.h + * + *------------------------------------------------------------------------- + */ + +#ifndef PARSE_MANIFEST_H +#define PARSE_MANIFEST_H + +#include "access/xlogdefs.h" +#include "common/checksum_helper.h" +#include "mb/pg_wchar.h" + +struct JsonManifestParseContext; +typedef struct JsonManifestParseContext JsonManifestParseContext; + +typedef void (*json_manifest_perfile_callback)(JsonManifestParseContext *, + char *pathname, + size_t size, pg_checksum_type checksum_type, + int checksum_length, uint8 *checksum_payload); +typedef void (*json_manifest_perwalrange_callback)(JsonManifestParseContext *, + TimeLineID tli, + XLogRecPtr start_lsn, XLogRecPtr end_lsn); +typedef void (*json_manifest_error_callback)(JsonManifestParseContext *, + char *fmt, ...) pg_attribute_printf(2, 3); + +struct JsonManifestParseContext +{ + void *private_data; + json_manifest_perfile_callback perfile_cb; + json_manifest_perwalrange_callback perwalrange_cb; + json_manifest_error_callback error_cb; +}; + +extern void json_parse_manifest(JsonManifestParseContext *context, + char *buffer, size_t size); + +#endif diff --git a/src/bin/pg_verifybackup/pg_verifybackup.c b/src/bin/pg_verifybackup/pg_verifybackup.c new file mode 100644 index 00000000000..9c0a8c55507 --- /dev/null +++ b/src/bin/pg_verifybackup/pg_verifybackup.c @@ -0,0 +1,905 @@ +/*------------------------------------------------------------------------- + * + * pg_verifybackup.c + * Verify a backup against a backup manifest. + * + * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/bin/pg_verifybackup/pg_verifybackup.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres_fe.h" + +#include +#include +#include + +#include "common/hashfn.h" +#include "common/logging.h" +#include "fe_utils/simple_list.h" +#include "getopt_long.h" +#include "parse_manifest.h" + +/* + * For efficiency, we'd like our hash table containing information about the + * manifest to start out with approximately the correct number of entries. + * There's no way to know the exact number of entries without reading the whole + * file, but we can get an estimate by dividing the file size by the estimated + * number of bytes per line. + * + * This could be off by about a factor of two in either direction, because the + * checksum algorithm has a big impact on the line lengths; e.g. a SHA512 + * checksum is 128 hex bytes, whereas a CRC-32C value is only 8, and there + * might be no checksum at all. + */ +#define ESTIMATED_BYTES_PER_MANIFEST_LINE 100 + +/* + * How many bytes should we try to read from a file at once? + */ +#define READ_CHUNK_SIZE 4096 + +/* + * Each file described by the manifest file is parsed to produce an object + * like this. + */ +typedef struct manifest_file +{ + uint32 status; /* hash status */ + char *pathname; + size_t size; + pg_checksum_type checksum_type; + int checksum_length; + uint8 *checksum_payload; + bool matched; + bool bad; +} manifest_file; + +/* + * Define a hash table which we can use to store information about the files + * mentioned in the backup manifest. + */ +static uint32 hash_string_pointer(char *s); +#define SH_PREFIX manifest_files +#define SH_ELEMENT_TYPE manifest_file +#define SH_KEY_TYPE char * +#define SH_KEY pathname +#define SH_HASH_KEY(tb, key) hash_string_pointer(key) +#define SH_EQUAL(tb, a, b) (strcmp(a, b) == 0) +#define SH_SCOPE static inline +#define SH_RAW_ALLOCATOR pg_malloc0 +#define SH_DECLARE +#define SH_DEFINE +#include "lib/simplehash.h" + +/* + * Each WAL range described by the manifest file is parsed to produce an + * object like this. + */ +typedef struct manifest_wal_range +{ + TimeLineID tli; + XLogRecPtr start_lsn; + XLogRecPtr end_lsn; + struct manifest_wal_range *next; + struct manifest_wal_range *prev; +} manifest_wal_range; + +/* + * Details we need in callbacks that occur while parsing a backup manifest. + */ +typedef struct parser_context +{ + manifest_files_hash *ht; + manifest_wal_range *first_wal_range; + manifest_wal_range *last_wal_range; +} parser_context; + +/* + * All of the context information we need while checking a backup manifest. + */ +typedef struct verifier_context +{ + manifest_files_hash *ht; + char *backup_directory; + SimpleStringList ignore_list; + bool exit_on_error; + bool saw_any_error; +} verifier_context; + +static void parse_manifest_file(char *manifest_path, + manifest_files_hash **ht_p, + manifest_wal_range **first_wal_range_p); + +static void record_manifest_details_for_file(JsonManifestParseContext *context, + char *pathname, size_t size, + pg_checksum_type checksum_type, + int checksum_length, + uint8 *checksum_payload); +static void record_manifest_details_for_wal_range(JsonManifestParseContext *context, + TimeLineID tli, + XLogRecPtr start_lsn, + XLogRecPtr end_lsn); +static void report_manifest_error(JsonManifestParseContext *context, + char *fmt,...) + pg_attribute_printf(2, 3) pg_attribute_noreturn(); + +static void verify_backup_directory(verifier_context *context, + char *relpath, char *fullpath); +static void verify_backup_file(verifier_context *context, + char *relpath, char *fullpath); +static void report_extra_backup_files(verifier_context *context); +static void verify_backup_checksums(verifier_context *context); +static void verify_file_checksum(verifier_context *context, + manifest_file *m, char *pathname); +static void parse_required_wal(verifier_context *context, + char *pg_waldump_path, + char *wal_directory, + manifest_wal_range *first_wal_range); + +static void report_backup_error(verifier_context *context, + const char *pg_restrict fmt,...) + pg_attribute_printf(2, 3); +static void report_fatal_error(const char *pg_restrict fmt,...) + pg_attribute_printf(1, 2) pg_attribute_noreturn(); +static bool should_ignore_relpath(verifier_context *context, char *relpath); + +static void usage(void); + +static const char *progname; + +/* + * Main entry point. + */ +int +main(int argc, char **argv) +{ + static struct option long_options[] = { + {"exit-on-error", no_argument, NULL, 'e'}, + {"ignore", required_argument, NULL, 'i'}, + {"manifest-path", required_argument, NULL, 'm'}, + {"no-parse-wal", no_argument, NULL, 'n'}, + {"print-parse-wal", no_argument, NULL, 'p'}, + {"quiet", no_argument, NULL, 'q'}, + {"skip-checksums", no_argument, NULL, 's'}, + {"wal-directory", required_argument, NULL, 'w'}, + {NULL, 0, NULL, 0} + }; + + int c; + verifier_context context; + manifest_wal_range *first_wal_range; + char *manifest_path = NULL; + bool no_parse_wal = false; + bool quiet = false; + bool skip_checksums = false; + char *wal_directory = NULL; + char *pg_waldump_path = NULL; + + pg_logging_init(argv[0]); + set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_verifybackup")); + progname = get_progname(argv[0]); + + memset(&context, 0, sizeof(context)); + + if (argc > 1) + { + if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) + { + usage(); + exit(0); + } + if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) + { + puts("pg_verifybackup (PostgreSQL) " PG_VERSION); + exit(0); + } + } + + /* + * Skip certain files in the toplevel directory. + * + * Ignore the backup_manifest file, because it's not included in the + * backup manifest. + * + * Ignore the pg_wal directory, because those files are not included in + * the backup manifest either, since they are fetched separately from the + * backup itself, and verified via a separate mechanism. + * + * Ignore postgresql.auto.conf, recovery.signal, and standby.signal, + * because we expect that those files may sometimes be created or changed + * as part of the backup process. For example, pg_basebackup -R will + * modify postgresql.auto.conf and create standby.signal. + */ + simple_string_list_append(&context.ignore_list, "backup_manifest"); + simple_string_list_append(&context.ignore_list, "pg_wal"); + simple_string_list_append(&context.ignore_list, "postgresql.auto.conf"); + simple_string_list_append(&context.ignore_list, "recovery.signal"); + simple_string_list_append(&context.ignore_list, "standby.signal"); + + while ((c = getopt_long(argc, argv, "ei:m:nqsw:", long_options, NULL)) != -1) + { + switch (c) + { + case 'e': + context.exit_on_error = true; + break; + case 'i': + { + char *arg = pstrdup(optarg); + + canonicalize_path(arg); + simple_string_list_append(&context.ignore_list, arg); + break; + } + case 'm': + manifest_path = pstrdup(optarg); + canonicalize_path(manifest_path); + break; + case 'n': + no_parse_wal = true; + break; + case 'q': + quiet = true; + break; + case 's': + skip_checksums = true; + break; + case 'w': + wal_directory = pstrdup(optarg); + canonicalize_path(wal_directory); + break; + default: + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); + exit(1); + } + } + + /* Get backup directory name */ + if (optind >= argc) + { + pg_log_fatal("no backup directory specified"); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); + exit(1); + } + context.backup_directory = pstrdup(argv[optind++]); + canonicalize_path(context.backup_directory); + + /* Complain if any arguments remain */ + if (optind < argc) + { + pg_log_fatal("too many command-line arguments (first is \"%s\")", + argv[optind]); + fprintf(stderr, _("Try \"%s --help\" for more information.\n"), + progname); + exit(1); + } + + /* Unless --no-parse-wal was specified, we will need pg_waldump. */ + if (!no_parse_wal) + { + int ret; + + pg_waldump_path = pg_malloc(MAXPGPATH); + ret = find_other_exec(argv[0], "pg_waldump", + "pg_waldump (PostgreSQL) " PG_VERSION "\n", + pg_waldump_path); + if (ret < 0) + { + char full_path[MAXPGPATH]; + + if (find_my_exec(argv[0], full_path) < 0) + strlcpy(full_path, progname, sizeof(full_path)); + if (ret == -1) + pg_log_fatal("The program \"%s\" is needed by %s but was\n" + "not found in the same directory as \"%s\".\n" + "Check your installation.", + "pg_waldump", "pg_verifybackup", full_path); + else + pg_log_fatal("The program \"%s\" was found by \"%s\" but was\n" + "not the same version as %s.\n" + "Check your installation.", + "pg_waldump", full_path, "pg_verifybackup"); + } + } + + /* By default, look for the manifest in the backup directory. */ + if (manifest_path == NULL) + manifest_path = psprintf("%s/backup_manifest", + context.backup_directory); + + /* By default, look for the WAL in the backup directory, too. */ + if (wal_directory == NULL) + wal_directory = psprintf("%s/pg_wal", context.backup_directory); + + /* + * Try to read the manifest. We treat any errors encountered while parsing + * the manifest as fatal; there doesn't seem to be much point in trying to + * verify the backup directory against a corrupted manifest. + */ + parse_manifest_file(manifest_path, &context.ht, &first_wal_range); + + /* + * Now scan the files in the backup directory. At this stage, we verify + * that every file on disk is present in the manifest and that the sizes + * match. We also set the "matched" flag on every manifest entry that + * corresponds to a file on disk. + */ + verify_backup_directory(&context, NULL, context.backup_directory); + + /* + * The "matched" flag should now be set on every entry in the hash table. + * Any entries for which the bit is not set are files mentioned in the + * manifest that don't exist on disk. + */ + report_extra_backup_files(&context); + + /* + * Now do the expensive work of verifying file checksums, unless we were + * told to skip it. + */ + if (!skip_checksums) + verify_backup_checksums(&context); + + /* + * Try to parse the required ranges of WAL records, unless we were told + * not to do so. + */ + if (!no_parse_wal) + parse_required_wal(&context, pg_waldump_path, + wal_directory, first_wal_range); + + /* + * If everything looks OK, tell the user this, unless we were asked to + * work quietly. + */ + if (!context.saw_any_error && !quiet) + printf("backup successfully verified\n"); + + return context.saw_any_error ? 1 : 0; +} + +/* + * Parse a manifest file. Construct a hash table with information about + * all the files it mentions, and a linked list of all the WAL ranges it + * mentions. + */ +static void +parse_manifest_file(char *manifest_path, manifest_files_hash **ht_p, + manifest_wal_range **first_wal_range_p) +{ + int fd; + struct stat statbuf; + off_t estimate; + uint32 initial_size; + manifest_files_hash *ht; + char *buffer; + int rc; + parser_context private_context; + JsonManifestParseContext context; + + /* Open the manifest file. */ + if ((fd = open(manifest_path, O_RDONLY | PG_BINARY, 0)) < 0) + report_fatal_error("could not open file \"%s\": %m", manifest_path); + + /* Figure out how big the manifest is. */ + if (fstat(fd, &statbuf) != 0) + report_fatal_error("could not stat file \"%s\": %m", manifest_path); + + /* Guess how large to make the hash table based on the manifest size. */ + estimate = statbuf.st_size / ESTIMATED_BYTES_PER_MANIFEST_LINE; + initial_size = Min(PG_UINT32_MAX, Max(estimate, 256)); + + /* Create the hash table. */ + ht = manifest_files_create(initial_size, NULL); + + /* + * Slurp in the whole file. + * + * This is not ideal, but there's currently no easy way to get + * pg_parse_json() to perform incremental parsing. + */ + buffer = pg_malloc(statbuf.st_size); + rc = read(fd, buffer, statbuf.st_size); + if (rc != statbuf.st_size) + { + if (rc < 0) + report_fatal_error("could not read file \"%s\": %m", + manifest_path); + else + report_fatal_error("could not read file \"%s\": read %d of %zu", + manifest_path, rc, (size_t) statbuf.st_size); + } + + /* Close the manifest file. */ + close(fd); + + /* Parse the manifest. */ + private_context.ht = ht; + private_context.first_wal_range = NULL; + private_context.last_wal_range = NULL; + context.private_data = &private_context; + context.perfile_cb = record_manifest_details_for_file; + context.perwalrange_cb = record_manifest_details_for_wal_range; + context.error_cb = report_manifest_error; + json_parse_manifest(&context, buffer, statbuf.st_size); + + /* Done with the buffer. */ + pfree(buffer); + + /* Return the file hash table and WAL range list we constructed. */ + *ht_p = ht; + *first_wal_range_p = private_context.first_wal_range; +} + +/* + * Report an error while parsing the manifest. + * + * We consider all such errors to be fatal errors. The manifest parser + * expects this function not to return. + */ +static void +report_manifest_error(JsonManifestParseContext *context, char *fmt,...) +{ + va_list ap; + + va_start(ap, fmt); + pg_log_generic_v(PG_LOG_FATAL, fmt, ap); + va_end(ap); + + exit(1); +} + +/* + * Record details extracted from the backup manifest for one file. + */ +static void +record_manifest_details_for_file(JsonManifestParseContext *context, + char *pathname, size_t size, + pg_checksum_type checksum_type, + int checksum_length, uint8 *checksum_payload) +{ + parser_context *pcxt = context->private_data; + manifest_files_hash *ht = pcxt->ht; + manifest_file *m; + bool found; + + /* Make a new entry in the hash table for this file. */ + m = manifest_files_insert(ht, pathname, &found); + if (found) + report_fatal_error("duplicate pathname in backup manifest: \"%s\"", + pathname); + + /* Initialize the entry. */ + m->size = size; + m->checksum_type = checksum_type; + m->checksum_length = checksum_length; + m->checksum_payload = checksum_payload; + m->matched = false; + m->bad = false; +} + +/* + * Record details extracted from the backup manifest for one WAL range. + */ +static void +record_manifest_details_for_wal_range(JsonManifestParseContext *context, + TimeLineID tli, + XLogRecPtr start_lsn, XLogRecPtr end_lsn) +{ + parser_context *pcxt = context->private_data; + manifest_wal_range *range; + + /* Allocate and initialize a struct describing this WAL range. */ + range = palloc(sizeof(manifest_wal_range)); + range->tli = tli; + range->start_lsn = start_lsn; + range->end_lsn = end_lsn; + range->prev = pcxt->last_wal_range; + range->next = NULL; + + /* Add it to the end of the list. */ + if (pcxt->first_wal_range == NULL) + pcxt->first_wal_range = range; + else + pcxt->last_wal_range->next = range; + pcxt->last_wal_range = range; +} + +/* + * Verify one directory. + * + * 'relpath' is NULL if we are to verify the top-level backup directory, + * and otherwise the relative path to the directory that is to be verified. + * + * 'fullpath' is the backup directory with 'relpath' appended; i.e. the actual + * filesystem path at which it can be found. + */ +static void +verify_backup_directory(verifier_context *context, char *relpath, + char *fullpath) +{ + DIR *dir; + struct dirent *dirent; + + dir = opendir(fullpath); + if (dir == NULL) + { + /* + * If even the toplevel backup directory cannot be found, treat this + * as a fatal error. + */ + if (relpath == NULL) + report_fatal_error("could not open directory \"%s\": %m", fullpath); + + /* + * Otherwise, treat this as a non-fatal error, but ignore any further + * errors related to this path and anything beneath it. + */ + report_backup_error(context, + "could not open directory \"%s\": %m", fullpath); + simple_string_list_append(&context->ignore_list, relpath); + + return; + } + + while (errno = 0, (dirent = readdir(dir)) != NULL) + { + char *filename = dirent->d_name; + char *newfullpath = psprintf("%s/%s", fullpath, filename); + char *newrelpath; + + /* Skip "." and ".." */ + if (filename[0] == '.' && (filename[1] == '\0' + || strcmp(filename, "..") == 0)) + continue; + + if (relpath == NULL) + newrelpath = pstrdup(filename); + else + newrelpath = psprintf("%s/%s", relpath, filename); + + if (!should_ignore_relpath(context, newrelpath)) + verify_backup_file(context, newrelpath, newfullpath); + + pfree(newfullpath); + pfree(newrelpath); + } + + if (closedir(dir)) + { + report_backup_error(context, + "could not close directory \"%s\": %m", fullpath); + return; + } +} + +/* + * Verify one file (which might actually be a directory or a symlink). + * + * The arguments to this function have the same meaning as the arguments to + * verify_backup_directory. + */ +static void +verify_backup_file(verifier_context *context, char *relpath, char *fullpath) +{ + struct stat sb; + manifest_file *m; + + if (stat(fullpath, &sb) != 0) + { + report_backup_error(context, + "could not stat file or directory \"%s\": %m", + relpath); + + /* + * Suppress further errors related to this path name and, if it's a + * directory, anything underneath it. + */ + simple_string_list_append(&context->ignore_list, relpath); + + return; + } + + /* If it's a directory, just recurse. */ + if (S_ISDIR(sb.st_mode)) + { + verify_backup_directory(context, relpath, fullpath); + return; + } + + /* If it's not a directory, it should be a plain file. */ + if (!S_ISREG(sb.st_mode)) + { + report_backup_error(context, + "\"%s\" is not a file or directory", + relpath); + return; + } + + /* Check whether there's an entry in the manifest hash. */ + m = manifest_files_lookup(context->ht, relpath); + if (m == NULL) + { + report_backup_error(context, + "\"%s\" is present on disk but not in the manifest", + relpath); + return; + } + + /* Flag this entry as having been encountered in the filesystem. */ + m->matched = true; + + /* Check that the size matches. */ + if (m->size != sb.st_size) + { + report_backup_error(context, + "\"%s\" has size %zu on disk but size %zu in the manifest", + relpath, (size_t) sb.st_size, m->size); + m->bad = true; + } + + /* + * We don't verify checksums at this stage. We first finish verifying + * that we have the expected set of files with the expected sizes, and + * only afterwards verify the checksums. That's because computing + * checksums may take a while, and we'd like to report more obvious + * problems quickly. + */ +} + +/* + * Scan the hash table for entries where the 'matched' flag is not set; report + * that such files are present in the manifest but not on disk. + */ +static void +report_extra_backup_files(verifier_context *context) +{ + manifest_files_iterator it; + manifest_file *m; + + manifest_files_start_iterate(context->ht, &it); + while ((m = manifest_files_iterate(context->ht, &it)) != NULL) + if (!m->matched && !should_ignore_relpath(context, m->pathname)) + report_backup_error(context, + "\"%s\" is present in the manifest but not on disk", + m->pathname); +} + +/* + * Verify checksums for hash table entries that are otherwise unproblematic. + * If we've already reported some problem related to a hash table entry, or + * if it has no checksum, just skip it. + */ +static void +verify_backup_checksums(verifier_context *context) +{ + manifest_files_iterator it; + manifest_file *m; + + manifest_files_start_iterate(context->ht, &it); + while ((m = manifest_files_iterate(context->ht, &it)) != NULL) + { + if (m->matched && !m->bad && m->checksum_type != CHECKSUM_TYPE_NONE && + !should_ignore_relpath(context, m->pathname)) + { + char *fullpath; + + /* Compute the full pathname to the target file. */ + fullpath = psprintf("%s/%s", context->backup_directory, + m->pathname); + + /* Do the actual checksum verification. */ + verify_file_checksum(context, m, fullpath); + + /* Avoid leaking memory. */ + pfree(fullpath); + } + } +} + +/* + * Verify the checksum of a single file. + */ +static void +verify_file_checksum(verifier_context *context, manifest_file *m, + char *fullpath) +{ + pg_checksum_context checksum_ctx; + char *relpath = m->pathname; + int fd; + int rc; + size_t bytes_read = 0; + uint8 buffer[READ_CHUNK_SIZE]; + uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH]; + int checksumlen; + + /* Open the target file. */ + if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) < 0) + { + report_backup_error(context, "could not open file \"%s\": %m", + relpath); + return; + } + + /* Initialize checksum context. */ + pg_checksum_init(&checksum_ctx, m->checksum_type); + + /* Read the file chunk by chunk, updating the checksum as we go. */ + while ((rc = read(fd, buffer, READ_CHUNK_SIZE)) > 0) + { + bytes_read += rc; + pg_checksum_update(&checksum_ctx, buffer, rc); + } + if (rc < 0) + report_backup_error(context, "could not read file \"%s\": %m", + relpath); + + /* Close the file. */ + if (close(fd) != 0) + { + report_backup_error(context, "could not close file \"%s\": %m", + relpath); + return; + } + + /* If we didn't manage to read the whole file, bail out now. */ + if (rc < 0) + return; + + /* + * Double-check that we read the expected number of bytes from the file. + * Normally, a file size mismatch would be caught in verify_backup_file + * and this check would never be reached, but this provides additional + * safety and clarity in the event of concurrent modifications or + * filesystem misbehavior. + */ + if (bytes_read != m->size) + { + report_backup_error(context, + "file \"%s\" should contain %zu bytes, but read %zu bytes", + relpath, m->size, bytes_read); + return; + } + + /* Get the final checksum. */ + checksumlen = pg_checksum_final(&checksum_ctx, checksumbuf); + + /* And check it against the manifest. */ + if (checksumlen != m->checksum_length) + report_backup_error(context, + "file \"%s\" has checksum of length %d, but expected %d", + relpath, m->checksum_length, checksumlen); + else if (memcmp(checksumbuf, m->checksum_payload, checksumlen) != 0) + report_backup_error(context, + "checksum mismatch for file \"%s\"", + relpath); +} + +/* + * Attempt to parse the WAL files required to restore from backup using + * pg_waldump. + */ +static void +parse_required_wal(verifier_context *context, char *pg_waldump_path, + char *wal_directory, manifest_wal_range *first_wal_range) +{ + manifest_wal_range *this_wal_range = first_wal_range; + + while (this_wal_range != NULL) + { + char *pg_waldump_cmd; + + pg_waldump_cmd = psprintf("\"%s\" --quiet --path=\"%s\" --timeline=%u --start=%X/%X --end=%X/%X\n", + pg_waldump_path, wal_directory, this_wal_range->tli, + (uint32) (this_wal_range->start_lsn >> 32), + (uint32) this_wal_range->start_lsn, + (uint32) (this_wal_range->end_lsn >> 32), + (uint32) this_wal_range->end_lsn); + if (system(pg_waldump_cmd) != 0) + report_backup_error(context, + "WAL parsing failed for timeline %u", + this_wal_range->tli); + + this_wal_range = this_wal_range->next; + } +} + +/* + * Report a problem with the backup. + * + * Update the context to indicate that we saw an error, and exit if the + * context says we should. + */ +static void +report_backup_error(verifier_context *context, const char *pg_restrict fmt,...) +{ + va_list ap; + + va_start(ap, fmt); + pg_log_generic_v(PG_LOG_ERROR, fmt, ap); + va_end(ap); + + context->saw_any_error = true; + if (context->exit_on_error) + exit(1); +} + +/* + * Report a fatal error and exit + */ +static void +report_fatal_error(const char *pg_restrict fmt,...) +{ + va_list ap; + + va_start(ap, fmt); + pg_log_generic_v(PG_LOG_FATAL, fmt, ap); + va_end(ap); + + exit(1); +} + +/* + * Is the specified relative path, or some prefix of it, listed in the set + * of paths to ignore? + * + * Note that by "prefix" we mean a parent directory; for this purpose, + * "aa/bb" is not a prefix of "aa/bbb", but it is a prefix of "aa/bb/cc". + */ +static bool +should_ignore_relpath(verifier_context *context, char *relpath) +{ + SimpleStringListCell *cell; + + for (cell = context->ignore_list.head; cell != NULL; cell = cell->next) + { + char *r = relpath; + char *v = cell->val; + + while (*v != '\0' && *r == *v) + ++r, ++v; + + if (*v == '\0' && (*r == '\0' || *r == '/')) + return true; + } + + return false; +} + +/* + * Helper function for manifest_files hash table. + */ +static uint32 +hash_string_pointer(char *s) +{ + unsigned char *ss = (unsigned char *) s; + + return hash_bytes(ss, strlen(s)); +} + +/* + * Print out usage information and exit. + */ +static void +usage(void) +{ + printf(_("%s verifies a backup against the backup manifest.\n\n"), progname); + printf(_("Usage:\n %s [OPTION]... BACKUPDIR\n\n"), progname); + printf(_("Options:\n")); + printf(_(" -e, --exit-on-error exit immediately on error\n")); + printf(_(" -i, --ignore=RELATIVE_PATH ignore indicated path\n")); + printf(_(" -m, --manifest=PATH use specified path for manifest\n")); + printf(_(" -n, --no-parse-wal do not try to parse WAL files\n")); + printf(_(" -s, --skip-checksums skip checksum verification\n")); + printf(_(" -w, --wal-directory=PATH use specified path for WAL files\n")); + printf(_(" -V, --version output version information, then exit\n")); + printf(_(" -?, --help show this help, then exit\n")); + printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); + printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); +} diff --git a/src/bin/pg_verifybackup/t/001_basic.pl b/src/bin/pg_verifybackup/t/001_basic.pl new file mode 100644 index 00000000000..0d4d71aaa10 --- /dev/null +++ b/src/bin/pg_verifybackup/t/001_basic.pl @@ -0,0 +1,30 @@ +use strict; +use warnings; +use TestLib; +use Test::More tests => 16; + +my $tempdir = TestLib::tempdir; + +program_help_ok('pg_verifybackup'); +program_version_ok('pg_verifybackup'); +program_options_handling_ok('pg_verifybackup'); + +command_fails_like(['pg_verifybackup'], + qr/no backup directory specified/, + 'target directory must be specified'); +command_fails_like(['pg_verifybackup', $tempdir], + qr/could not open file.*\/backup_manifest\"/, + 'pg_verifybackup requires a manifest'); +command_fails_like(['pg_verifybackup', $tempdir, $tempdir], + qr/too many command-line arguments/, + 'multiple target directories not allowed'); + +# create fake manifest file +open(my $fh, '>', "$tempdir/backup_manifest") || die "open: $!"; +close($fh); + +# but then try to use an alternate, nonexisting manifest +command_fails_like(['pg_verifybackup', '-m', "$tempdir/not_the_manifest", + $tempdir], + qr/could not open file.*\/not_the_manifest\"/, + 'pg_verifybackup respects -m flag'); diff --git a/src/bin/pg_verifybackup/t/002_algorithm.pl b/src/bin/pg_verifybackup/t/002_algorithm.pl new file mode 100644 index 00000000000..ee82dcee376 --- /dev/null +++ b/src/bin/pg_verifybackup/t/002_algorithm.pl @@ -0,0 +1,58 @@ +# Verify that we can take and verify backups with various checksum types. + +use strict; +use warnings; +use Cwd; +use Config; +use File::Path qw(rmtree); +use PostgresNode; +use TestLib; +use Test::More tests => 19; + +my $master = get_new_node('master'); +$master->init(allows_streaming => 1); +$master->start; + +for my $algorithm (qw(bogus none crc32c sha224 sha256 sha384 sha512)) +{ + my $backup_path = $master->backup_dir . '/' . $algorithm; + my @backup = ('pg_basebackup', '-D', $backup_path, + '--manifest-checksums', $algorithm, + '--no-sync'); + my @verify = ('pg_verifybackup', '-e', $backup_path); + + # A backup with a bogus algorithm should fail. + if ($algorithm eq 'bogus') + { + $master->command_fails(\@backup, + "backup fails with algorithm \"$algorithm\""); + next; + } + + # A backup with a valid algorithm should work. + $master->command_ok(\@backup, "backup ok with algorithm \"$algorithm\""); + + # We expect each real checksum algorithm to be mentioned on every line of + # the backup manifest file except the first and last; for simplicity, we + # just check that it shows up lots of times. When the checksum algorithm + # is none, we just check that the manifest exists. + if ($algorithm eq 'none') + { + ok(-f "$backup_path/backup_manifest", "backup manifest exists"); + } + else + { + my $manifest = slurp_file("$backup_path/backup_manifest"); + my $count_of_algorithm_in_manifest = + (() = $manifest =~ /$algorithm/mig); + cmp_ok($count_of_algorithm_in_manifest, '>', 100, + "$algorithm is mentioned many times in the manifest"); + } + + # Make sure that it verifies OK. + $master->command_ok(\@verify, + "verify backup with algorithm \"$algorithm\""); + + # Remove backup immediately to save disk space. + rmtree($backup_path); +} diff --git a/src/bin/pg_verifybackup/t/003_corruption.pl b/src/bin/pg_verifybackup/t/003_corruption.pl new file mode 100644 index 00000000000..113959420ee --- /dev/null +++ b/src/bin/pg_verifybackup/t/003_corruption.pl @@ -0,0 +1,288 @@ +# Verify that various forms of corruption are detected by pg_verifybackup. + +use strict; +use warnings; +use Cwd; +use Config; +use File::Path qw(rmtree); +use PostgresNode; +use TestLib; +use Test::More tests => 44; + +my $master = get_new_node('master'); +$master->init(allows_streaming => 1); +$master->start; + +# Include a user-defined tablespace in the hopes of detecting problems in that +# area. +my $source_ts_path = TestLib::perl2host(TestLib::tempdir_short()); +my $source_ts_prefix = $source_ts_path; +$source_ts_prefix =~ s!(^[A-Z]:/[^/]*)/.*!$1!; + +$master->safe_psql('postgres', < 'extra_file', + 'mutilate' => \&mutilate_extra_file, + 'fails_like' => + qr/extra_file.*present on disk but not in the manifest/ + }, + { + 'name' => 'extra_tablespace_file', + 'mutilate' => \&mutilate_extra_tablespace_file, + 'fails_like' => + qr/extra_ts_file.*present on disk but not in the manifest/ + }, + { + 'name' => 'missing_file', + 'mutilate' => \&mutilate_missing_file, + 'fails_like' => + qr/pg_xact\/0000.*present in the manifest but not on disk/ + }, + { + 'name' => 'missing_tablespace', + 'mutilate' => \&mutilate_missing_tablespace, + 'fails_like' => + qr/pg_tblspc.*present in the manifest but not on disk/ + }, + { + 'name' => 'append_to_file', + 'mutilate' => \&mutilate_append_to_file, + 'fails_like' => + qr/has size \d+ on disk but size \d+ in the manifest/ + }, + { + 'name' => 'truncate_file', + 'mutilate' => \&mutilate_truncate_file, + 'fails_like' => + qr/has size 0 on disk but size \d+ in the manifest/ + }, + { + 'name' => 'replace_file', + 'mutilate' => \&mutilate_replace_file, + 'fails_like' => qr/checksum mismatch for file/ + }, + { + 'name' => 'bad_manifest', + 'mutilate' => \&mutilate_bad_manifest, + 'fails_like' => qr/manifest checksum mismatch/ + }, + { + 'name' => 'open_file_fails', + 'mutilate' => \&mutilate_open_file_fails, + 'fails_like' => qr/could not open file/, + 'skip_on_windows' => 1 + }, + { + 'name' => 'open_directory_fails', + 'mutilate' => \&mutilate_open_directory_fails, + 'cleanup' => \&cleanup_open_directory_fails, + 'fails_like' => qr/could not open directory/, + 'skip_on_windows' => 1 + }, + { + 'name' => 'search_directory_fails', + 'mutilate' => \&mutilate_search_directory_fails, + 'cleanup' => \&cleanup_search_directory_fails, + 'fails_like' => qr/could not stat file or directory/, + 'skip_on_windows' => 1 + } +); + +for my $scenario (@scenario) +{ + my $name = $scenario->{'name'}; + + SKIP: + { + skip "unix-style permissions not supported on Windows", 4 + if $scenario->{'skip_on_windows'} && $windows_os; + + # Take a backup and check that it verifies OK. + my $backup_path = $master->backup_dir . '/' . $name; + my $backup_ts_path = TestLib::perl2host(TestLib::tempdir_short()); + # The tablespace map parameter confuses Msys2, which tries to mangle + # it. Tell it not to. + # See https://www.msys2.org/wiki/Porting/#filesystem-namespaces + local $ENV{MSYS2_ARG_CONV_EXCL} = $source_ts_prefix; + $master->command_ok(['pg_basebackup', '-D', $backup_path, '--no-sync', + '-T', "${source_ts_path}=${backup_ts_path}"], + "base backup ok"); + command_ok(['pg_verifybackup', $backup_path ], + "intact backup verified"); + + # Mutilate the backup in some way. + $scenario->{'mutilate'}->($backup_path); + + # Now check that the backup no longer verifies. + command_fails_like(['pg_verifybackup', $backup_path ], + $scenario->{'fails_like'}, + "corrupt backup fails verification: $name"); + + # Run cleanup hook, if provided. + $scenario->{'cleanup'}->($backup_path) + if exists $scenario->{'cleanup'}; + + # Finally, use rmtree to reclaim space. + rmtree($backup_path); + } +} + +sub create_extra_file +{ + my ($backup_path, $relative_path) = @_; + my $pathname = "$backup_path/$relative_path"; + open(my $fh, '>', $pathname) || die "open $pathname: $!"; + print $fh "This is an extra file.\n"; + close($fh); + return; +} + +# Add a file into the root directory of the backup. +sub mutilate_extra_file +{ + my ($backup_path) = @_; + create_extra_file($backup_path, "extra_file"); + return; +} + +# Add a file inside the user-defined tablespace. +sub mutilate_extra_tablespace_file +{ + my ($backup_path) = @_; + my ($tsoid) = grep { $_ ne '.' && $_ ne '..' } + slurp_dir("$backup_path/pg_tblspc"); + my ($catvdir) = grep { $_ ne '.' && $_ ne '..' } + slurp_dir("$backup_path/pg_tblspc/$tsoid"); + my ($tsdboid) = grep { $_ ne '.' && $_ ne '..' } + slurp_dir("$backup_path/pg_tblspc/$tsoid/$catvdir"); + create_extra_file($backup_path, + "pg_tblspc/$tsoid/$catvdir/$tsdboid/extra_ts_file"); + return; +} + +# Remove a file. +sub mutilate_missing_file +{ + my ($backup_path) = @_; + my $pathname = "$backup_path/pg_xact/0000"; + unlink($pathname) || die "$pathname: $!"; + return; +} + +# Remove the symlink to the user-defined tablespace. +sub mutilate_missing_tablespace +{ + my ($backup_path) = @_; + my ($tsoid) = grep { $_ ne '.' && $_ ne '..' } + slurp_dir("$backup_path/pg_tblspc"); + my $pathname = "$backup_path/pg_tblspc/$tsoid"; + if ($windows_os) + { + # rmdir works on some windows setups, unlink on others. + # Instead of trying to implement precise rules, just try one and then + # the other. + unless (rmdir($pathname)) + { + my $err = $!; + unlink($pathname) || die "$pathname: rmdir: $err, unlink: $!"; + } + } + else + { + unlink($pathname) || die "$pathname: $!"; + } + return; +} + +# Append an additional bytes to a file. +sub mutilate_append_to_file +{ + my ($backup_path) = @_; + append_to_file "$backup_path/global/pg_control", 'x'; + return; +} + +# Truncate a file to zero length. +sub mutilate_truncate_file +{ + my ($backup_path) = @_; + my $pathname = "$backup_path/global/pg_control"; + open(my $fh, '>', $pathname) || die "open $pathname: $!"; + close($fh); + return; +} + +# Replace a file's contents without changing the length of the file. This is +# not a particularly efficient way to do this, so we pick a file that's +# expected to be short. +sub mutilate_replace_file +{ + my ($backup_path) = @_; + my $pathname = "$backup_path/PG_VERSION"; + my $contents = slurp_file($pathname); + open(my $fh, '>', $pathname) || die "open $pathname: $!"; + print $fh 'q' x length($contents); + close($fh); + return; +} + +# Corrupt the backup manifest. +sub mutilate_bad_manifest +{ + my ($backup_path) = @_; + append_to_file "$backup_path/backup_manifest", "\n"; + return; +} + +# Create a file that can't be opened. (This is skipped on Windows.) +sub mutilate_open_file_fails +{ + my ($backup_path) = @_; + my $pathname = "$backup_path/PG_VERSION"; + chmod(0, $pathname) || die "chmod $pathname: $!"; + return; +} + +# Create a directory that can't be opened. (This is skipped on Windows.) +sub mutilate_open_directory_fails +{ + my ($backup_path) = @_; + my $pathname = "$backup_path/pg_subtrans"; + chmod(0, $pathname) || die "chmod $pathname: $!"; + return; +} + +# restore permissions on the unreadable directory we created. +sub cleanup_open_directory_fails +{ + my ($backup_path) = @_; + my $pathname = "$backup_path/pg_subtrans"; + chmod(0700, $pathname) || die "chmod $pathname: $!"; + return; +} + +# Create a directory that can't be searched. (This is skipped on Windows.) +sub mutilate_search_directory_fails +{ + my ($backup_path) = @_; + my $pathname = "$backup_path/base"; + chmod(0400, $pathname) || die "chmod $pathname: $!"; + return; +} + +# rmtree can't cope with a mode 400 directory, so change back to 700. +sub cleanup_search_directory_fails +{ + my ($backup_path) = @_; + my $pathname = "$backup_path/base"; + chmod(0700, $pathname) || die "chmod $pathname: $!"; + return; +} diff --git a/src/bin/pg_verifybackup/t/004_options.pl b/src/bin/pg_verifybackup/t/004_options.pl new file mode 100644 index 00000000000..9bae8eb565b --- /dev/null +++ b/src/bin/pg_verifybackup/t/004_options.pl @@ -0,0 +1,89 @@ +# Verify the behavior of assorted pg_verifybackup options. + +use strict; +use warnings; +use Cwd; +use Config; +use File::Path qw(rmtree); +use PostgresNode; +use TestLib; +use Test::More tests => 25; + +# Start up the server and take a backup. +my $master = get_new_node('master'); +$master->init(allows_streaming => 1); +$master->start; +my $backup_path = $master->backup_dir . '/test_options'; +$master->command_ok(['pg_basebackup', '-D', $backup_path, '--no-sync' ], + "base backup ok"); + +# Verify that pg_verifybackup -q succeeds and produces no output. +my $stdout; +my $stderr; +my $result = IPC::Run::run ['pg_verifybackup', '-q', $backup_path ], + '>', \$stdout, '2>', \$stderr; +ok($result, "-q succeeds: exit code 0"); +is($stdout, '', "-q succeeds: no stdout"); +is($stderr, '', "-q succeeds: no stderr"); + +# Corrupt the PG_VERSION file. +my $version_pathname = "$backup_path/PG_VERSION"; +my $version_contents = slurp_file($version_pathname); +open(my $fh, '>', $version_pathname) || die "open $version_pathname: $!"; +print $fh 'q' x length($version_contents); +close($fh); + +# Verify that pg_verifybackup -q now fails. +command_fails_like(['pg_verifybackup', '-q', $backup_path ], + qr/checksum mismatch for file \"PG_VERSION\"/, + '-q checksum mismatch'); + +# Since we didn't change the length of the file, verification should succeed +# if we ignore checksums. Check that we get the right message, too. +command_like(['pg_verifybackup', '-s', $backup_path ], + qr/backup successfully verified/, + '-s skips checksumming'); + +# Validation should succeed if we ignore the problem file. +command_like(['pg_verifybackup', '-i', 'PG_VERSION', $backup_path ], + qr/backup successfully verified/, + '-i ignores problem file'); + +# PG_VERSION is already corrupt; let's try also removing all of pg_xact. +rmtree($backup_path . "/pg_xact"); + +# We're ignoring the problem with PG_VERSION, but not the problem with +# pg_xact, so verification should fail here. +command_fails_like(['pg_verifybackup', '-i', 'PG_VERSION', $backup_path ], + qr/pg_xact.*is present in the manifest but not on disk/, + '-i does not ignore all problems'); + +# If we use -i twice, we should be able to ignore all of the problems. +command_like(['pg_verifybackup', '-i', 'PG_VERSION', '-i', 'pg_xact', + $backup_path ], + qr/backup successfully verified/, + 'multiple -i options work'); + +# Verify that when -i is not used, both problems are reported. +$result = IPC::Run::run ['pg_verifybackup', $backup_path ], + '>', \$stdout, '2>', \$stderr; +ok(!$result, "multiple problems: fails"); +like($stderr, qr/pg_xact.*is present in the manifest but not on disk/, + "multiple problems: missing files reported"); +like($stderr, qr/checksum mismatch for file \"PG_VERSION\"/, + "multiple problems: checksum mismatch reported"); + +# Verify that when -e is used, only the problem detected first is reported. +$result = IPC::Run::run ['pg_verifybackup', '-e', $backup_path ], + '>', \$stdout, '2>', \$stderr; +ok(!$result, "-e reports 1 error: fails"); +like($stderr, qr/pg_xact.*is present in the manifest but not on disk/, + "-e reports 1 error: missing files reported"); +unlike($stderr, qr/checksum mismatch for file \"PG_VERSION\"/, + "-e reports 1 error: checksum mismatch not reported"); + +# Test valid manifest with nonexistent backup directory. +command_fails_like(['pg_verifybackup', '-m', "$backup_path/backup_manifest", + "$backup_path/fake" ], + qr/could not open directory/, + 'nonexistent backup directory'); diff --git a/src/bin/pg_verifybackup/t/005_bad_manifest.pl b/src/bin/pg_verifybackup/t/005_bad_manifest.pl new file mode 100644 index 00000000000..3dd2b5a20df --- /dev/null +++ b/src/bin/pg_verifybackup/t/005_bad_manifest.pl @@ -0,0 +1,204 @@ +# Test the behavior of pg_verifybackup when the backup manifest has +# problems. + +use strict; +use warnings; +use Cwd; +use Config; +use PostgresNode; +use TestLib; +use Test::More tests => 58; + +my $tempdir = TestLib::tempdir; + +test_bad_manifest('input string ended unexpectedly', + qr/could not parse backup manifest: The input string ended unexpectedly/, + <', "$tempdir/backup_manifest") || die "open: $!"; + print $fh $manifest_contents; + close($fh); + + command_fails_like(['pg_verifybackup', $tempdir], $regexp, + $test_name); + return; +} diff --git a/src/bin/pg_verifybackup/t/006_encoding.pl b/src/bin/pg_verifybackup/t/006_encoding.pl new file mode 100644 index 00000000000..3c6b57adcd4 --- /dev/null +++ b/src/bin/pg_verifybackup/t/006_encoding.pl @@ -0,0 +1,27 @@ +# Verify that pg_verifybackup handles hex-encoded filenames correctly. + +use strict; +use warnings; +use Cwd; +use Config; +use PostgresNode; +use TestLib; +use Test::More tests => 5; + +my $master = get_new_node('master'); +$master->init(allows_streaming => 1); +$master->start; +my $backup_path = $master->backup_dir . '/test_encoding'; +$master->command_ok(['pg_basebackup', '-D', $backup_path, '--no-sync', + '--manifest-force-encode' ], + "backup ok with forced hex encoding"); + +my $manifest = slurp_file("$backup_path/backup_manifest"); +my $count_of_encoded_path_in_manifest = + (() = $manifest =~ /Encoded-Path/mig); +cmp_ok($count_of_encoded_path_in_manifest, '>', 100, + "many paths are encoded in the manifest"); + +command_like(['pg_verifybackup', '-s', $backup_path ], + qr/backup successfully verified/, + 'backup with forced encoding verified'); diff --git a/src/bin/pg_verifybackup/t/007_wal.pl b/src/bin/pg_verifybackup/t/007_wal.pl new file mode 100644 index 00000000000..5e891d1b6f4 --- /dev/null +++ b/src/bin/pg_verifybackup/t/007_wal.pl @@ -0,0 +1,55 @@ +# Test pg_verifybackup's WAL verification. + +use strict; +use warnings; +use Cwd; +use Config; +use File::Path qw(rmtree); +use PostgresNode; +use TestLib; +use Test::More tests => 7; + +# Start up the server and take a backup. +my $master = get_new_node('master'); +$master->init(allows_streaming => 1); +$master->start; +my $backup_path = $master->backup_dir . '/test_wal'; +$master->command_ok(['pg_basebackup', '-D', $backup_path, '--no-sync' ], + "base backup ok"); + +# Rename pg_wal. +my $original_pg_wal = $backup_path . '/pg_wal'; +my $relocated_pg_wal = $master->backup_dir . '/relocated_pg_wal'; +rename($original_pg_wal, $relocated_pg_wal) || die "rename pg_wal: $!"; + +# WAL verification should fail. +command_fails_like(['pg_verifybackup', $backup_path ], + qr/WAL parsing failed for timeline 1/, + 'missing pg_wal causes failure'); + +# Should work if we skip WAL verification. +command_ok(['pg_verifybackup', '-n', $backup_path ], + 'missing pg_wal OK if not verifying WAL'); + +# Should also work if we specify the correct WAL location. +command_ok(['pg_verifybackup', '-w', $relocated_pg_wal, $backup_path ], + '-w can be used to specify WAL directory'); + +# Move directory back to original location. +rename($relocated_pg_wal, $original_pg_wal) || die "rename pg_wal back: $!"; + +# Get a list of files in that directory that look like WAL files. +my @walfiles = grep { /^[0-9A-F]{24}$/ } slurp_dir($original_pg_wal); + +# Replace the contents of one of the files with garbage of equal length. +my $wal_corruption_target = $original_pg_wal . '/' . $walfiles[0]; +my $wal_size = -s $wal_corruption_target; +open(my $fh, '>', $wal_corruption_target) + || die "open $wal_corruption_target: $!"; +print $fh 'w' x $wal_size; +close($fh); + +# WAL verification should fail. +command_fails_like(['pg_verifybackup', $backup_path ], + qr/WAL parsing failed for timeline 1/, + 'corrupt WAL file causes failure'); -- cgit v1.2.3