aboutsummaryrefslogtreecommitdiff
path: root/src/bin/pg_combinebackup/load_manifest.c
blob: 8e0d04a26a6a73d5b14e773f7260b3711dca8a16 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
/*-------------------------------------------------------------------------
 *
 * Load data from a backup manifest into memory.
 *
 * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * src/bin/pg_combinebackup/load_manifest.c
 *
 *-------------------------------------------------------------------------
 */

#include "postgres_fe.h"

#include <sys/stat.h>
#include <unistd.h>

#include "common/hashfn_unstable.h"
#include "common/logging.h"
#include "common/parse_manifest.h"
#include "load_manifest.h"

/*
 * For efficiency, we'd like our hash table containing information about the
 * manifest to start out with approximately the correct number of entries.
 * There's no way to know the exact number of entries without reading the whole
 * file, but we can get an estimate by dividing the file size by the estimated
 * number of bytes per line.
 *
 * This could be off by about a factor of two in either direction, because the
 * checksum algorithm has a big impact on the line lengths; e.g. a SHA512
 * checksum is 128 hex bytes, whereas a CRC-32C value is only 8, and there
 * might be no checksum at all.
 */
#define ESTIMATED_BYTES_PER_MANIFEST_LINE	100

/*
 * size of json chunk to be read in
 *
 */
#define READ_CHUNK_SIZE (128  * 1024)

/*
 * Define a hash table which we can use to store information about the files
 * mentioned in the backup manifest.
 */
#define SH_PREFIX		manifest_files
#define SH_ELEMENT_TYPE	manifest_file
#define SH_KEY_TYPE		const char *
#define	SH_KEY			pathname
#define SH_HASH_KEY(tb, key)	hash_string(key)
#define SH_EQUAL(tb, a, b)		(strcmp(a, b) == 0)
#define	SH_SCOPE		extern
#define SH_RAW_ALLOCATOR	pg_malloc0
#define SH_DEFINE
#include "lib/simplehash.h"

static void combinebackup_version_cb(JsonManifestParseContext *context,
									 int manifest_version);
static void combinebackup_system_identifier_cb(JsonManifestParseContext *context,
											   uint64 manifest_system_identifier);
static void combinebackup_per_file_cb(JsonManifestParseContext *context,
									  const char *pathname, uint64 size,
									  pg_checksum_type checksum_type,
									  int checksum_length,
									  uint8 *checksum_payload);
static void combinebackup_per_wal_range_cb(JsonManifestParseContext *context,
										   TimeLineID tli,
										   XLogRecPtr start_lsn,
										   XLogRecPtr end_lsn);
pg_noreturn static void report_manifest_error(JsonManifestParseContext *context,
											  const char *fmt,...)
			pg_attribute_printf(2, 3);

/*
 * Load backup_manifest files from an array of backups and produces an array
 * of manifest_data objects.
 *
 * NB: Since load_backup_manifest() can return NULL, the resulting array could
 * contain NULL entries.
 */
manifest_data **
load_backup_manifests(int n_backups, char **backup_directories)
{
	manifest_data **result;
	int			i;

	result = pg_malloc(sizeof(manifest_data *) * n_backups);
	for (i = 0; i < n_backups; ++i)
		result[i] = load_backup_manifest(backup_directories[i]);

	return result;
}

/*
 * Parse the backup_manifest file in the named backup directory. Construct a
 * hash table with information about all the files it mentions, and a linked
 * list of all the WAL ranges it mentions.
 *
 * If the backup_manifest file simply doesn't exist, logs a warning and returns
 * NULL. Any other error, or any error parsing the contents of the file, is
 * fatal.
 */
manifest_data *
load_backup_manifest(char *backup_directory)
{
	char		pathname[MAXPGPATH];
	int			fd;
	struct stat statbuf;
	off_t		estimate;
	uint32		initial_size;
	manifest_files_hash *ht;
	char	   *buffer;
	int			rc;
	JsonManifestParseContext context;
	manifest_data *result;
	int			chunk_size = READ_CHUNK_SIZE;

	/* Open the manifest file. */
	snprintf(pathname, MAXPGPATH, "%s/backup_manifest", backup_directory);
	if ((fd = open(pathname, O_RDONLY | PG_BINARY, 0)) < 0)
	{
		if (errno == ENOENT)
		{
			pg_log_warning("file \"%s\" does not exist", pathname);
			return NULL;
		}
		pg_fatal("could not open file \"%s\": %m", pathname);
	}

	/* Figure out how big the manifest is. */
	if (fstat(fd, &statbuf) != 0)
		pg_fatal("could not stat file \"%s\": %m", pathname);

	/* Guess how large to make the hash table based on the manifest size. */
	estimate = statbuf.st_size / ESTIMATED_BYTES_PER_MANIFEST_LINE;
	initial_size = Min(PG_UINT32_MAX, Max(estimate, 256));

	/* Create the hash table. */
	ht = manifest_files_create(initial_size, NULL);

	result = pg_malloc0(sizeof(manifest_data));
	result->files = ht;
	context.private_data = result;
	context.version_cb = combinebackup_version_cb;
	context.system_identifier_cb = combinebackup_system_identifier_cb;
	context.per_file_cb = combinebackup_per_file_cb;
	context.per_wal_range_cb = combinebackup_per_wal_range_cb;
	context.error_cb = report_manifest_error;

	/*
	 * Parse the file, in chunks if necessary.
	 */
	if (statbuf.st_size <= chunk_size)
	{
		buffer = pg_malloc(statbuf.st_size);
		rc = read(fd, buffer, statbuf.st_size);
		if (rc != statbuf.st_size)
		{
			if (rc < 0)
				pg_fatal("could not read file \"%s\": %m", pathname);
			else
				pg_fatal("could not read file \"%s\": read %d of %lld",
						 pathname, rc, (long long int) statbuf.st_size);
		}

		/* Close the manifest file. */
		close(fd);

		/* Parse the manifest. */
		json_parse_manifest(&context, buffer, statbuf.st_size);
	}
	else
	{
		int			bytes_left = statbuf.st_size;
		JsonManifestParseIncrementalState *inc_state;

		inc_state = json_parse_manifest_incremental_init(&context);

		buffer = pg_malloc(chunk_size + 1);

		while (bytes_left > 0)
		{
			int			bytes_to_read = chunk_size;

			/*
			 * Make sure that the last chunk is sufficiently large. (i.e. at
			 * least half the chunk size) so that it will contain fully the
			 * piece at the end with the checksum.
			 */
			if (bytes_left < chunk_size)
				bytes_to_read = bytes_left;
			else if (bytes_left < 2 * chunk_size)
				bytes_to_read = bytes_left / 2;
			rc = read(fd, buffer, bytes_to_read);
			if (rc != bytes_to_read)
			{
				if (rc < 0)
					pg_fatal("could not read file \"%s\": %m", pathname);
				else
					pg_fatal("could not read file \"%s\": read %lld of %lld",
							 pathname,
							 (long long int) (statbuf.st_size + rc - bytes_left),
							 (long long int) statbuf.st_size);
			}
			bytes_left -= rc;
			json_parse_manifest_incremental_chunk(inc_state, buffer, rc, bytes_left == 0);
		}

		/* Release the incremental state memory */
		json_parse_manifest_incremental_shutdown(inc_state);

		close(fd);
	}

	/* All done. */
	pfree(buffer);
	return result;
}

/*
 * Report an error while parsing the manifest.
 *
 * We consider all such errors to be fatal errors. The manifest parser
 * expects this function not to return.
 */
static void
report_manifest_error(JsonManifestParseContext *context, const char *fmt,...)
{
	va_list		ap;

	va_start(ap, fmt);
	pg_log_generic_v(PG_LOG_ERROR, PG_LOG_PRIMARY, gettext(fmt), ap);
	va_end(ap);

	exit(1);
}

/*
 * This callback to validate the manifest version number for incremental backup.
 */
static void
combinebackup_version_cb(JsonManifestParseContext *context,
						 int manifest_version)
{
	/* Incremental backups supported on manifest version 2 or later */
	if (manifest_version == 1)
		pg_fatal("backup manifest version 1 does not support incremental backup");
}

/*
 * Record system identifier extracted from the backup manifest.
 */
static void
combinebackup_system_identifier_cb(JsonManifestParseContext *context,
								   uint64 manifest_system_identifier)
{
	manifest_data *manifest = context->private_data;

	/* Validation will be at the later stage */
	manifest->system_identifier = manifest_system_identifier;
}

/*
 * Record details extracted from the backup manifest for one file.
 */
static void
combinebackup_per_file_cb(JsonManifestParseContext *context,
						  const char *pathname, uint64 size,
						  pg_checksum_type checksum_type,
						  int checksum_length, uint8 *checksum_payload)
{
	manifest_data *manifest = context->private_data;
	manifest_file *m;
	bool		found;

	/* Make a new entry in the hash table for this file. */
	m = manifest_files_insert(manifest->files, pathname, &found);
	if (found)
		pg_fatal("duplicate path name in backup manifest: \"%s\"", pathname);

	/* Initialize the entry. */
	m->size = size;
	m->checksum_type = checksum_type;
	m->checksum_length = checksum_length;
	m->checksum_payload = checksum_payload;
}

/*
 * Record details extracted from the backup manifest for one WAL range.
 */
static void
combinebackup_per_wal_range_cb(JsonManifestParseContext *context,
							   TimeLineID tli,
							   XLogRecPtr start_lsn, XLogRecPtr end_lsn)
{
	manifest_data *manifest = context->private_data;
	manifest_wal_range *range;

	/* Allocate and initialize a struct describing this WAL range. */
	range = palloc(sizeof(manifest_wal_range));
	range->tli = tli;
	range->start_lsn = start_lsn;
	range->end_lsn = end_lsn;
	range->prev = manifest->last_wal_range;
	range->next = NULL;

	/* Add it to the end of the list. */
	if (manifest->first_wal_range == NULL)
		manifest->first_wal_range = range;
	else
		manifest->last_wal_range->next = range;
	manifest->last_wal_range = range;
}