/*------------------------------------------------------------------------- * * pg_verifybackup.c * Verify a backup against a backup manifest. * * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * src/bin/pg_verifybackup/pg_verifybackup.c * *------------------------------------------------------------------------- */ #include "postgres_fe.h" #include #include #include #include #include #include "access/xlog_internal.h" #include "common/logging.h" #include "common/parse_manifest.h" #include "fe_utils/simple_list.h" #include "getopt_long.h" #include "pg_verifybackup.h" #include "pgtime.h" /* * For efficiency, we'd like our hash table containing information about the * manifest to start out with approximately the correct number of entries. * There's no way to know the exact number of entries without reading the whole * file, but we can get an estimate by dividing the file size by the estimated * number of bytes per line. * * This could be off by about a factor of two in either direction, because the * checksum algorithm has a big impact on the line lengths; e.g. a SHA512 * checksum is 128 hex bytes, whereas a CRC-32C value is only 8, and there * might be no checksum at all. */ #define ESTIMATED_BYTES_PER_MANIFEST_LINE 100 /* * How many bytes should we try to read from a file at once? */ #define READ_CHUNK_SIZE (128 * 1024) /* * Tar file information needed for content verification. */ typedef struct tar_file { char *relpath; Oid tblspc_oid; pg_compress_algorithm compress_algorithm; } tar_file; static manifest_data *parse_manifest_file(char *manifest_path); static void verifybackup_version_cb(JsonManifestParseContext *context, int manifest_version); static void verifybackup_system_identifier(JsonManifestParseContext *context, uint64 manifest_system_identifier); static void verifybackup_per_file_cb(JsonManifestParseContext *context, const char *pathname, uint64 size, pg_checksum_type checksum_type, int checksum_length, uint8 *checksum_payload); static void verifybackup_per_wal_range_cb(JsonManifestParseContext *context, TimeLineID tli, XLogRecPtr start_lsn, XLogRecPtr end_lsn); pg_noreturn static void report_manifest_error(JsonManifestParseContext *context, const char *fmt,...) pg_attribute_printf(2, 3); static void verify_tar_backup(verifier_context *context, DIR *dir); static void verify_plain_backup_directory(verifier_context *context, char *relpath, char *fullpath, DIR *dir); static void verify_plain_backup_file(verifier_context *context, char *relpath, char *fullpath); static void verify_control_file(const char *controlpath, uint64 manifest_system_identifier); static void precheck_tar_backup_file(verifier_context *context, char *relpath, char *fullpath, SimplePtrList *tarfiles); static void verify_tar_file(verifier_context *context, char *relpath, char *fullpath, astreamer *streamer); static void report_extra_backup_files(verifier_context *context); static void verify_backup_checksums(verifier_context *context); static void verify_file_checksum(verifier_context *context, manifest_file *m, char *fullpath, uint8 *buffer); static void parse_required_wal(verifier_context *context, char *pg_waldump_path, char *wal_directory); static astreamer *create_archive_verifier(verifier_context *context, char *archive_name, Oid tblspc_oid, pg_compress_algorithm compress_algo); static void progress_report(bool finished); static void usage(void); static const char *progname; /* is progress reporting enabled? */ static bool show_progress = false; /* Progress indicators */ static uint64 total_size = 0; static uint64 done_size = 0; /* * Main entry point. */ int main(int argc, char **argv) { static struct option long_options[] = { {"exit-on-error", no_argument, NULL, 'e'}, {"ignore", required_argument, NULL, 'i'}, {"manifest-path", required_argument, NULL, 'm'}, {"format", required_argument, NULL, 'F'}, {"no-parse-wal", no_argument, NULL, 'n'}, {"progress", no_argument, NULL, 'P'}, {"quiet", no_argument, NULL, 'q'}, {"skip-checksums", no_argument, NULL, 's'}, {"wal-directory", required_argument, NULL, 'w'}, {NULL, 0, NULL, 0} }; int c; verifier_context context; char *manifest_path = NULL; bool no_parse_wal = false; bool quiet = false; char *wal_directory = NULL; char *pg_waldump_path = NULL; DIR *dir; pg_logging_init(argv[0]); set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_verifybackup")); progname = get_progname(argv[0]); memset(&context, 0, sizeof(context)); if (argc > 1) { if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { usage(); exit(0); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) { puts("pg_verifybackup (PostgreSQL) " PG_VERSION); exit(0); } } /* * Skip certain files in the toplevel directory. * * Ignore the backup_manifest file, because it's not included in the * backup manifest. * * Ignore the pg_wal directory, because those files are not included in * the backup manifest either, since they are fetched separately from the * backup itself, and verified via a separate mechanism. * * Ignore postgresql.auto.conf, recovery.signal, and standby.signal, * because we expect that those files may sometimes be created or changed * as part of the backup process. For example, pg_basebackup -R will * modify postgresql.auto.conf and create standby.signal. */ simple_string_list_append(&context.ignore_list, "backup_manifest"); simple_string_list_append(&context.ignore_list, "pg_wal"); simple_string_list_append(&context.ignore_list, "postgresql.auto.conf"); simple_string_list_append(&context.ignore_list, "recovery.signal"); simple_string_list_append(&context.ignore_list, "standby.signal"); while ((c = getopt_long(argc, argv, "eF:i:m:nPqsw:", long_options, NULL)) != -1) { switch (c) { case 'e': context.exit_on_error = true; break; case 'i': { char *arg = pstrdup(optarg); canonicalize_path(arg); simple_string_list_append(&context.ignore_list, arg); break; } case 'm': manifest_path = pstrdup(optarg); canonicalize_path(manifest_path); break; case 'F': if (strcmp(optarg, "p") == 0 || strcmp(optarg, "plain") == 0) context.format = 'p'; else if (strcmp(optarg, "t") == 0 || strcmp(optarg, "tar") == 0) context.format = 't'; else pg_fatal("invalid backup format \"%s\", must be \"plain\" or \"tar\"", optarg); break; case 'n': no_parse_wal = true; break; case 'P': show_progress = true; break; case 'q': quiet = true; break; case 's': context.skip_checksums = true; break; case 'w': wal_directory = pstrdup(optarg); canonicalize_path(wal_directory); break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit(1); } } /* Get backup directory name */ if (optind >= argc) { pg_log_error("no backup directory specified"); pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit(1); } context.backup_directory = pstrdup(argv[optind++]); canonicalize_path(context.backup_directory); /* Complain if any arguments remain */ if (optind < argc) { pg_log_error("too many command-line arguments (first is \"%s\")", argv[optind]); pg_log_error_hint("Try \"%s --help\" for more information.", progname); exit(1); } /* Complain if the specified arguments conflict */ if (show_progress && quiet) pg_fatal("cannot specify both %s and %s", "-P/--progress", "-q/--quiet"); /* Unless --no-parse-wal was specified, we will need pg_waldump. */ if (!no_parse_wal) { int ret; pg_waldump_path = pg_malloc(MAXPGPATH); ret = find_other_exec(argv[0], "pg_waldump", "pg_waldump (PostgreSQL) " PG_VERSION "\n", pg_waldump_path); if (ret < 0) { char full_path[MAXPGPATH]; if (find_my_exec(argv[0], full_path) < 0) strlcpy(full_path, progname, sizeof(full_path)); if (ret == -1) pg_fatal("program \"%s\" is needed by %s but was not found in the same directory as \"%s\"", "pg_waldump", "pg_verifybackup", full_path); else pg_fatal("program \"%s\" was found by \"%s\" but was not the same version as %s", "pg_waldump", full_path, "pg_verifybackup"); } } /* By default, look for the manifest in the backup directory. */ if (manifest_path == NULL) manifest_path = psprintf("%s/backup_manifest", context.backup_directory); /* By default, look for the WAL in the backup directory, too. */ if (wal_directory == NULL) wal_directory = psprintf("%s/pg_wal", context.backup_directory); /* * Try to read the manifest. We treat any errors encountered while parsing * the manifest as fatal; there doesn't seem to be much point in trying to * verify the backup directory against a corrupted manifest. */ context.manifest = parse_manifest_file(manifest_path); /* * If the backup directory cannot be found, treat this as a fatal error. */ dir = opendir(context.backup_directory); if (dir == NULL) report_fatal_error("could not open directory \"%s\": %m", context.backup_directory); /* * At this point, we know that the backup directory exists, so it's now * reasonable to check for files immediately inside it. Thus, before going * further, if the user did not specify the backup format, check for * PG_VERSION to distinguish between tar and plain format. */ if (context.format == '\0') { struct stat sb; char *path; path = psprintf("%s/%s", context.backup_directory, "PG_VERSION"); if (stat(path, &sb) == 0) context.format = 'p'; else if (errno != ENOENT) { pg_log_error("could not stat file \"%s\": %m", path); exit(1); } else { /* No PG_VERSION, so assume tar format. */ context.format = 't'; } pfree(path); } /* * XXX: In the future, we should consider enhancing pg_waldump to read WAL * files from an archive. */ if (!no_parse_wal && context.format == 't') { pg_log_error("pg_waldump cannot read tar files"); pg_log_error_hint("You must use -n/--no-parse-wal when verifying a tar-format backup."); exit(1); } /* * Perform the appropriate type of verification appropriate based on the * backup format. This will close 'dir'. */ if (context.format == 'p') verify_plain_backup_directory(&context, NULL, context.backup_directory, dir); else verify_tar_backup(&context, dir); /* * The "matched" flag should now be set on every entry in the hash table. * Any entries for which the bit is not set are files mentioned in the * manifest that don't exist on disk (or in the relevant tar files). */ report_extra_backup_files(&context); /* * If this is a tar-format backup, checksums were already verified above; * but if it's a plain-format backup, we postpone it until this point, * since the earlier checks can be performed just by knowing which files * are present, without needing to read all of them. */ if (context.format == 'p' && !context.skip_checksums) verify_backup_checksums(&context); /* * Try to parse the required ranges of WAL records, unless we were told * not to do so. */ if (!no_parse_wal) parse_required_wal(&context, pg_waldump_path, wal_directory); /* * If everything looks OK, tell the user this, unless we were asked to * work quietly. */ if (!context.saw_any_error && !quiet) printf(_("backup successfully verified\n")); return context.saw_any_error ? 1 : 0; } /* * Parse a manifest file and return a data structure describing the contents. */ static manifest_data * parse_manifest_file(char *manifest_path) { int fd; struct stat statbuf; off_t estimate; uint32 initial_size; manifest_files_hash *ht; char *buffer; int rc; JsonManifestParseContext context; manifest_data *result; int chunk_size = READ_CHUNK_SIZE; /* Open the manifest file. */ if ((fd = open(manifest_path, O_RDONLY | PG_BINARY, 0)) < 0) report_fatal_error("could not open file \"%s\": %m", manifest_path); /* Figure out how big the manifest is. */ if (fstat(fd, &statbuf) != 0) report_fatal_error("could not stat file \"%s\": %m", manifest_path); /* Guess how large to make the hash table based on the manifest size. */ estimate = statbuf.st_size / ESTIMATED_BYTES_PER_MANIFEST_LINE; initial_size = Min(PG_UINT32_MAX, Max(estimate, 256)); /* Create the hash table. */ ht = manifest_files_create(initial_size, NULL); result = pg_malloc0(sizeof(manifest_data)); result->files = ht; context.private_data = result; context.version_cb = verifybackup_version_cb; context.system_identifier_cb = verifybackup_system_identifier; context.per_file_cb = verifybackup_per_file_cb; context.per_wal_range_cb = verifybackup_per_wal_range_cb; context.error_cb = report_manifest_error; /* * Parse the file, in chunks if necessary. */ if (statbuf.st_size <= chunk_size) { buffer = pg_malloc(statbuf.st_size); rc = read(fd, buffer, statbuf.st_size); if (rc != statbuf.st_size) { if (rc < 0) pg_fatal("could not read file \"%s\": %m", manifest_path); else pg_fatal("could not read file \"%s\": read %d of %lld", manifest_path, rc, (long long int) statbuf.st_size); } /* Close the manifest file. */ close(fd); /* Parse the manifest. */ json_parse_manifest(&context, buffer, statbuf.st_size); } else { int bytes_left = statbuf.st_size; JsonManifestParseIncrementalState *inc_state; inc_state = json_parse_manifest_incremental_init(&context); buffer = pg_malloc(chunk_size + 1); while (bytes_left > 0) { int bytes_to_read = chunk_size; /* * Make sure that the last chunk is sufficiently large. (i.e. at * least half the chunk size) so that it will contain fully the * piece at the end with the checksum. */ if (bytes_left < chunk_size) bytes_to_read = bytes_left; else if (bytes_left < 2 * chunk_size) bytes_to_read = bytes_left / 2; rc = read(fd, buffer, bytes_to_read); if (rc != bytes_to_read) { if (rc < 0) pg_fatal("could not read file \"%s\": %m", manifest_path); else pg_fatal("could not read file \"%s\": read %lld of %lld", manifest_path, (long long int) (statbuf.st_size + rc - bytes_left), (long long int) statbuf.st_size); } bytes_left -= rc; json_parse_manifest_incremental_chunk(inc_state, buffer, rc, bytes_left == 0); } /* Release the incremental state memory */ json_parse_manifest_incremental_shutdown(inc_state); close(fd); } /* Done with the buffer. */ pfree(buffer); return result; } /* * Report an error while parsing the manifest. * * We consider all such errors to be fatal errors. The manifest parser * expects this function not to return. */ static void report_manifest_error(JsonManifestParseContext *context, const char *fmt,...) { va_list ap; va_start(ap, fmt); pg_log_generic_v(PG_LOG_ERROR, PG_LOG_PRIMARY, gettext(fmt), ap); va_end(ap); exit(1); } /* * Record details extracted from the backup manifest. */ static void verifybackup_version_cb(JsonManifestParseContext *context, int manifest_version) { manifest_data *manifest = context->private_data; /* Validation will be at the later stage */ manifest->version = manifest_version; } /* * Record details extracted from the backup manifest. */ static void verifybackup_system_identifier(JsonManifestParseContext *context, uint64 manifest_system_identifier) { manifest_data *manifest = context->private_data; /* Validation will be at the later stage */ manifest->system_identifier = manifest_system_identifier; } /* * Record details extracted from the backup manifest for one file. */ static void verifybackup_per_file_cb(JsonManifestParseContext *context, const char *pathname, uint64 size, pg_checksum_type checksum_type, int checksum_length, uint8 *checksum_payload) { manifest_data *manifest = context->private_data; manifest_files_hash *ht = manifest->files; manifest_file *m; bool found; /* Make a new entry in the hash table for this file. */ m = manifest_files_insert(ht, pathname, &found); if (found) report_fatal_error("duplicate path name in backup manifest: \"%s\"", pathname); /* Initialize the entry. */ m->size = size; m->checksum_type = checksum_type; m->checksum_length = checksum_length; m->checksum_payload = checksum_payload; m->matched = false; m->bad = false; } /* * Record details extracted from the backup manifest for one WAL range. */ static void verifybackup_per_wal_range_cb(JsonManifestParseContext *context, TimeLineID tli, XLogRecPtr start_lsn, XLogRecPtr end_lsn) { manifest_data *manifest = context->private_data; manifest_wal_range *range; /* Allocate and initialize a struct describing this WAL range. */ range = palloc(sizeof(manifest_wal_range)); range->tli = tli; range->start_lsn = start_lsn; range->end_lsn = end_lsn; range->prev = manifest->last_wal_range; range->next = NULL; /* Add it to the end of the list. */ if (manifest->first_wal_range == NULL) manifest->first_wal_range = range; else manifest->last_wal_range->next = range; manifest->last_wal_range = range; } /* * Verify one directory of a plain-format backup. * * 'relpath' is NULL if we are to verify the top-level backup directory, * and otherwise the relative path to the directory that is to be verified. * * 'fullpath' is the backup directory with 'relpath' appended; i.e. the actual * filesystem path at which it can be found. * * 'dir' is an open directory handle, or NULL if the caller wants us to * open it. If the caller chooses to pass a handle, we'll close it when * we're done with it. */ static void verify_plain_backup_directory(verifier_context *context, char *relpath, char *fullpath, DIR *dir) { struct dirent *dirent; /* Open the directory unless the caller did it. */ if (dir == NULL && ((dir = opendir(fullpath)) == NULL)) { report_backup_error(context, "could not open directory \"%s\": %m", fullpath); simple_string_list_append(&context->ignore_list, relpath); return; } while (errno = 0, (dirent = readdir(dir)) != NULL) { char *filename = dirent->d_name; char *newfullpath = psprintf("%s/%s", fullpath, filename); char *newrelpath; /* Skip "." and ".." */ if (filename[0] == '.' && (filename[1] == '\0' || strcmp(filename, "..") == 0)) continue; if (relpath == NULL) newrelpath = pstrdup(filename); else newrelpath = psprintf("%s/%s", relpath, filename); if (!should_ignore_relpath(context, newrelpath)) verify_plain_backup_file(context, newrelpath, newfullpath); pfree(newfullpath); pfree(newrelpath); } if (closedir(dir)) { report_backup_error(context, "could not close directory \"%s\": %m", fullpath); return; } } /* * Verify one file (which might actually be a directory or a symlink). * * The arguments to this function have the same meaning as the similarly named * arguments to verify_plain_backup_directory. */ static void verify_plain_backup_file(verifier_context *context, char *relpath, char *fullpath) { struct stat sb; manifest_file *m; if (stat(fullpath, &sb) != 0) { report_backup_error(context, "could not stat file or directory \"%s\": %m", relpath); /* * Suppress further errors related to this path name and, if it's a * directory, anything underneath it. */ simple_string_list_append(&context->ignore_list, relpath); return; } /* If it's a directory, just recurse. */ if (S_ISDIR(sb.st_mode)) { verify_plain_backup_directory(context, relpath, fullpath, NULL); return; } /* If it's not a directory, it should be a regular file. */ if (!S_ISREG(sb.st_mode)) { report_backup_error(context, "\"%s\" is not a regular file or directory", relpath); return; } /* Check whether there's an entry in the manifest hash. */ m = manifest_files_lookup(context->manifest->files, relpath); if (m == NULL) { report_backup_error(context, "\"%s\" is present on disk but not in the manifest", relpath); return; } /* Flag this entry as having been encountered in the filesystem. */ m->matched = true; /* Check that the size matches. */ if (m->size != sb.st_size) { report_backup_error(context, "\"%s\" has size %llu on disk but size %llu in the manifest", relpath, (unsigned long long) sb.st_size, (unsigned long long) m->size); m->bad = true; } /* * Validate the manifest system identifier, not available in manifest * version 1. */ if (context->manifest->version != 1 && strcmp(relpath, XLOG_CONTROL_FILE) == 0) verify_control_file(fullpath, context->manifest->system_identifier); /* Update statistics for progress report, if necessary */ if (show_progress && !context->skip_checksums && should_verify_checksum(m)) total_size += m->size; /* * We don't verify checksums at this stage. We first finish verifying that * we have the expected set of files with the expected sizes, and only * afterwards verify the checksums. That's because computing checksums may * take a while, and we'd like to report more obvious problems quickly. */ } /* * Sanity check control file and validate system identifier against manifest * system identifier. */ static void verify_control_file(const char *controlpath, uint64 manifest_system_identifier) { ControlFileData *control_file; bool crc_ok; pg_log_debug("reading \"%s\"", controlpath); control_file = get_controlfile_by_exact_path(controlpath, &crc_ok); /* Control file contents not meaningful if CRC is bad. */ if (!crc_ok) report_fatal_error("%s: CRC is incorrect", controlpath); /* Can't interpret control file if not current version. */ if (control_file->pg_control_version != PG_CONTROL_VERSION) report_fatal_error("%s: unexpected control file version", controlpath); /* System identifiers should match. */ if (manifest_system_identifier != control_file->system_identifier) report_fatal_error("%s: manifest system identifier is %" PRIu64 ", but control file has %" PRIu64, controlpath, manifest_system_identifier, control_file->system_identifier); /* Release memory. */ pfree(control_file); } /* * Verify tar backup. * * The caller should pass a handle to the target directory, which we will * close when we're done with it. */ static void verify_tar_backup(verifier_context *context, DIR *dir) { struct dirent *dirent; SimplePtrList tarfiles = {NULL, NULL}; SimplePtrListCell *cell; Assert(context->format != 'p'); progress_report(false); /* First pass: scan the directory for tar files. */ while (errno = 0, (dirent = readdir(dir)) != NULL) { char *filename = dirent->d_name; /* Skip "." and ".." */ if (filename[0] == '.' && (filename[1] == '\0' || strcmp(filename, "..") == 0)) continue; /* * Unless it's something we should ignore, perform prechecks and add * it to the list. */ if (!should_ignore_relpath(context, filename)) { char *fullpath; fullpath = psprintf("%s/%s", context->backup_directory, filename); precheck_tar_backup_file(context, filename, fullpath, &tarfiles); pfree(fullpath); } } if (closedir(dir)) { report_backup_error(context, "could not close directory \"%s\": %m", context->backup_directory); return; } /* Second pass: Perform the final verification of the tar contents. */ for (cell = tarfiles.head; cell != NULL; cell = cell->next) { tar_file *tar = (tar_file *) cell->ptr; astreamer *streamer; char *fullpath; /* * Prepares the archive streamer stack according to the tar * compression format. */ streamer = create_archive_verifier(context, tar->relpath, tar->tblspc_oid, tar->compress_algorithm); /* Compute the full pathname to the target file. */ fullpath = psprintf("%s/%s", context->backup_directory, tar->relpath); /* Invoke the streamer for reading, decompressing, and verifying. */ verify_tar_file(context, tar->relpath, fullpath, streamer); /* Cleanup. */ pfree(tar->relpath); pfree(tar); pfree(fullpath); astreamer_finalize(streamer); astreamer_free(streamer); } simple_ptr_list_destroy(&tarfiles); progress_report(true); } /* * Preparatory steps for verifying files in tar format backups. * * Carries out basic validation of the tar format backup file, detects the * compression type, and appends that information to the tarfiles list. An * error will be reported if the tar file is inaccessible, or if the file type, * name, or compression type is not as expected. * * The arguments to this function are mostly the same as the * verify_plain_backup_file. The additional argument outputs a list of valid * tar files. */ static void precheck_tar_backup_file(verifier_context *context, char *relpath, char *fullpath, SimplePtrList *tarfiles) { struct stat sb; Oid tblspc_oid = InvalidOid; pg_compress_algorithm compress_algorithm; tar_file *tar; char *suffix = NULL; /* Should be tar format backup */ Assert(context->format == 't'); /* Get file information */ if (stat(fullpath, &sb) != 0) { report_backup_error(context, "could not stat file or directory \"%s\": %m", relpath); return; } /* In a tar format backup, we expect only regular files. */ if (!S_ISREG(sb.st_mode)) { report_backup_error(context, "file \"%s\" is not a regular file", relpath); return; } /* * We expect tar files for backing up the main directory, tablespace, and * pg_wal directory. * * pg_basebackup writes the main data directory to an archive file named * base.tar, the pg_wal directory to pg_wal.tar, and the tablespace * directory to .tar, each followed by a compression type * extension such as .gz, .lz4, or .zst. */ if (strncmp("base", relpath, 4) == 0) suffix = relpath + 4; else if (strncmp("pg_wal", relpath, 6) == 0) suffix = relpath + 6; else { /* Expected a .tar file here. */ uint64 num = strtoul(relpath, &suffix, 10); /* * Report an error if we didn't consume at least one character, if the * result is 0, or if the value is too large to be a valid OID. */ if (suffix == NULL || num <= 0 || num > OID_MAX) { report_backup_error(context, "file \"%s\" is not expected in a tar format backup", relpath); return; } tblspc_oid = (Oid) num; } /* Now, check the compression type of the tar */ if (strcmp(suffix, ".tar") == 0) compress_algorithm = PG_COMPRESSION_NONE; else if (strcmp(suffix, ".tgz") == 0) compress_algorithm = PG_COMPRESSION_GZIP; else if (strcmp(suffix, ".tar.gz") == 0) compress_algorithm = PG_COMPRESSION_GZIP; else if (strcmp(suffix, ".tar.lz4") == 0) compress_algorithm = PG_COMPRESSION_LZ4; else if (strcmp(suffix, ".tar.zst") == 0) compress_algorithm = PG_COMPRESSION_ZSTD; else { report_backup_error(context, "file \"%s\" is not expected in a tar format backup", relpath); return; } /* * Ignore WALs, as reading and verification will be handled through * pg_waldump. */ if (strncmp("pg_wal", relpath, 6) == 0) return; /* * Append the information to the list for complete verification at a later * stage. */ tar = pg_malloc(sizeof(tar_file)); tar->relpath = pstrdup(relpath); tar->tblspc_oid = tblspc_oid; tar->compress_algorithm = compress_algorithm; simple_ptr_list_append(tarfiles, tar); /* Update statistics for progress report, if necessary */ if (show_progress) total_size += sb.st_size; } /* * Verification of a single tar file content. * * It reads a given tar archive in predefined chunks and passes it to the * streamer, which initiates routines for decompression (if necessary) and then * verifies each member within the tar file. */ static void verify_tar_file(verifier_context *context, char *relpath, char *fullpath, astreamer *streamer) { int fd; int rc; char *buffer; pg_log_debug("reading \"%s\"", fullpath); /* Open the target file. */ if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) < 0) { report_backup_error(context, "could not open file \"%s\": %m", relpath); return; } buffer = pg_malloc(READ_CHUNK_SIZE * sizeof(uint8)); /* Perform the reads */ while ((rc = read(fd, buffer, READ_CHUNK_SIZE)) > 0) { astreamer_content(streamer, NULL, buffer, rc, ASTREAMER_UNKNOWN); /* Report progress */ done_size += rc; progress_report(false); } pg_free(buffer); if (rc < 0) report_backup_error(context, "could not read file \"%s\": %m", relpath); /* Close the file. */ if (close(fd) != 0) report_backup_error(context, "could not close file \"%s\": %m", relpath); } /* * Scan the hash table for entries where the 'matched' flag is not set; report * that such files are present in the manifest but not on disk. */ static void report_extra_backup_files(verifier_context *context) { manifest_data *manifest = context->manifest; manifest_files_iterator it; manifest_file *m; manifest_files_start_iterate(manifest->files, &it); while ((m = manifest_files_iterate(manifest->files, &it)) != NULL) if (!m->matched && !should_ignore_relpath(context, m->pathname)) report_backup_error(context, "\"%s\" is present in the manifest but not on disk", m->pathname); } /* * Verify checksums for hash table entries that are otherwise unproblematic. * If we've already reported some problem related to a hash table entry, or * if it has no checksum, just skip it. */ static void verify_backup_checksums(verifier_context *context) { manifest_data *manifest = context->manifest; manifest_files_iterator it; manifest_file *m; uint8 *buffer; progress_report(false); buffer = pg_malloc(READ_CHUNK_SIZE * sizeof(uint8)); manifest_files_start_iterate(manifest->files, &it); while ((m = manifest_files_iterate(manifest->files, &it)) != NULL) { if (should_verify_checksum(m) && !should_ignore_relpath(context, m->pathname)) { char *fullpath; /* Compute the full pathname to the target file. */ fullpath = psprintf("%s/%s", context->backup_directory, m->pathname); /* Do the actual checksum verification. */ verify_file_checksum(context, m, fullpath, buffer); /* Avoid leaking memory. */ pfree(fullpath); } } pfree(buffer); progress_report(true); } /* * Verify the checksum of a single file. */ static void verify_file_checksum(verifier_context *context, manifest_file *m, char *fullpath, uint8 *buffer) { pg_checksum_context checksum_ctx; const char *relpath = m->pathname; int fd; int rc; uint64 bytes_read = 0; uint8 checksumbuf[PG_CHECKSUM_MAX_LENGTH]; int checksumlen; /* Open the target file. */ if ((fd = open(fullpath, O_RDONLY | PG_BINARY, 0)) < 0) { report_backup_error(context, "could not open file \"%s\": %m", relpath); return; } /* Initialize checksum context. */ if (pg_checksum_init(&checksum_ctx, m->checksum_type) < 0) { report_backup_error(context, "could not initialize checksum of file \"%s\"", relpath); close(fd); return; } /* Read the file chunk by chunk, updating the checksum as we go. */ while ((rc = read(fd, buffer, READ_CHUNK_SIZE)) > 0) { bytes_read += rc; if (pg_checksum_update(&checksum_ctx, buffer, rc) < 0) { report_backup_error(context, "could not update checksum of file \"%s\"", relpath); close(fd); return; } /* Report progress */ done_size += rc; progress_report(false); } if (rc < 0) report_backup_error(context, "could not read file \"%s\": %m", relpath); /* Close the file. */ if (close(fd) != 0) { report_backup_error(context, "could not close file \"%s\": %m", relpath); return; } /* If we didn't manage to read the whole file, bail out now. */ if (rc < 0) return; /* * Double-check that we read the expected number of bytes from the file. * Normally, mismatches would be caught in verify_plain_backup_file and * this check would never be reached, but this provides additional safety * and clarity in the event of concurrent modifications or filesystem * misbehavior. */ if (bytes_read != m->size) { report_backup_error(context, "file \"%s\" should contain %" PRIu64 " bytes, but read %" PRIu64, relpath, m->size, bytes_read); return; } /* Get the final checksum. */ checksumlen = pg_checksum_final(&checksum_ctx, checksumbuf); if (checksumlen < 0) { report_backup_error(context, "could not finalize checksum of file \"%s\"", relpath); return; } /* And check it against the manifest. */ if (checksumlen != m->checksum_length) report_backup_error(context, "file \"%s\" has checksum of length %d, but expected %d", relpath, m->checksum_length, checksumlen); else if (memcmp(checksumbuf, m->checksum_payload, checksumlen) != 0) report_backup_error(context, "checksum mismatch for file \"%s\"", relpath); } /* * Attempt to parse the WAL files required to restore from backup using * pg_waldump. */ static void parse_required_wal(verifier_context *context, char *pg_waldump_path, char *wal_directory) { manifest_data *manifest = context->manifest; manifest_wal_range *this_wal_range = manifest->first_wal_range; while (this_wal_range != NULL) { char *pg_waldump_cmd; pg_waldump_cmd = psprintf("\"%s\" --quiet --path=\"%s\" --timeline=%u --start=%X/%X --end=%X/%X\n", pg_waldump_path, wal_directory, this_wal_range->tli, LSN_FORMAT_ARGS(this_wal_range->start_lsn), LSN_FORMAT_ARGS(this_wal_range->end_lsn)); fflush(NULL); if (system(pg_waldump_cmd) != 0) report_backup_error(context, "WAL parsing failed for timeline %u", this_wal_range->tli); this_wal_range = this_wal_range->next; } } /* * Report a problem with the backup. * * Update the context to indicate that we saw an error, and exit if the * context says we should. */ void report_backup_error(verifier_context *context, const char *pg_restrict fmt,...) { va_list ap; va_start(ap, fmt); pg_log_generic_v(PG_LOG_ERROR, PG_LOG_PRIMARY, gettext(fmt), ap); va_end(ap); context->saw_any_error = true; if (context->exit_on_error) exit(1); } /* * Report a fatal error and exit */ void report_fatal_error(const char *pg_restrict fmt,...) { va_list ap; va_start(ap, fmt); pg_log_generic_v(PG_LOG_ERROR, PG_LOG_PRIMARY, gettext(fmt), ap); va_end(ap); exit(1); } /* * Is the specified relative path, or some prefix of it, listed in the set * of paths to ignore? * * Note that by "prefix" we mean a parent directory; for this purpose, * "aa/bb" is not a prefix of "aa/bbb", but it is a prefix of "aa/bb/cc". */ bool should_ignore_relpath(verifier_context *context, const char *relpath) { SimpleStringListCell *cell; for (cell = context->ignore_list.head; cell != NULL; cell = cell->next) { const char *r = relpath; char *v = cell->val; while (*v != '\0' && *r == *v) ++r, ++v; if (*v == '\0' && (*r == '\0' || *r == '/')) return true; } return false; } /* * Create a chain of archive streamers appropriate for verifying a given * archive. */ static astreamer * create_archive_verifier(verifier_context *context, char *archive_name, Oid tblspc_oid, pg_compress_algorithm compress_algo) { astreamer *streamer = NULL; /* Should be here only for tar backup */ Assert(context->format == 't'); /* Last step is the actual verification. */ streamer = astreamer_verify_content_new(streamer, context, archive_name, tblspc_oid); /* Before that we must parse the tar file. */ streamer = astreamer_tar_parser_new(streamer); /* Before that we must decompress, if archive is compressed. */ if (compress_algo == PG_COMPRESSION_GZIP) streamer = astreamer_gzip_decompressor_new(streamer); else if (compress_algo == PG_COMPRESSION_LZ4) streamer = astreamer_lz4_decompressor_new(streamer); else if (compress_algo == PG_COMPRESSION_ZSTD) streamer = astreamer_zstd_decompressor_new(streamer); return streamer; } /* * Print a progress report based on the global variables. * * Progress report is written at maximum once per second, unless the finished * parameter is set to true. * * If finished is set to true, this is the last progress report. The cursor * is moved to the next line. */ static void progress_report(bool finished) { static pg_time_t last_progress_report = 0; pg_time_t now; int percent_size = 0; char totalsize_str[32]; char donesize_str[32]; if (!show_progress) return; now = time(NULL); if (now == last_progress_report && !finished) return; /* Max once per second */ last_progress_report = now; percent_size = total_size ? (int) ((done_size * 100 / total_size)) : 0; snprintf(totalsize_str, sizeof(totalsize_str), UINT64_FORMAT, total_size / 1024); snprintf(donesize_str, sizeof(donesize_str), UINT64_FORMAT, done_size / 1024); fprintf(stderr, _("%*s/%s kB (%d%%) verified"), (int) strlen(totalsize_str), donesize_str, totalsize_str, percent_size); /* * Stay on the same line if reporting to a terminal and we're not done * yet. */ fputc((!finished && isatty(fileno(stderr))) ? '\r' : '\n', stderr); } /* * Print out usage information and exit. */ static void usage(void) { printf(_("%s verifies a backup against the backup manifest.\n\n"), progname); printf(_("Usage:\n %s [OPTION]... BACKUPDIR\n\n"), progname); printf(_("Options:\n")); printf(_(" -e, --exit-on-error exit immediately on error\n")); printf(_(" -F, --format=p|t backup format (plain, tar)\n")); printf(_(" -i, --ignore=RELATIVE_PATH ignore indicated path\n")); printf(_(" -m, --manifest-path=PATH use specified path for manifest\n")); printf(_(" -n, --no-parse-wal do not try to parse WAL files\n")); printf(_(" -P, --progress show progress information\n")); printf(_(" -q, --quiet do not print any output, except for errors\n")); printf(_(" -s, --skip-checksums skip checksum verification\n")); printf(_(" -w, --wal-directory=PATH use specified path for WAL files\n")); printf(_(" -V, --version output version information, then exit\n")); printf(_(" -?, --help show this help, then exit\n")); printf(_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); printf(_("%s home page: <%s>\n"), PACKAGE_NAME, PACKAGE_URL); }