aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2012-09-28 15:19:15 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2012-09-28 15:19:15 -0400
commit05b555d12bc2ad0d581f48a12b45174db41dc10d (patch)
tree4b279376cbd5d9bec6eb28267ab6d353abe09c15
parentedc9109c42299ea8d7d897647967cf65d638617c (diff)
downloadpostgresql-05b555d12bc2ad0d581f48a12b45174db41dc10d.tar.gz
postgresql-05b555d12bc2ad0d581f48a12b45174db41dc10d.zip
Fix tar files emitted by pg_dump and pg_basebackup to be POSIX conformant.
Both programs got the "magic" string wrong, causing standard-conforming tar implementations to believe the output was just legacy tar format without any POSIX extensions. This doesn't actually matter that much, especially since pg_dump failed to fill the POSIX fields anyway, but still there is little point in emitting tar format if we can't be compliant with the standard. In addition, pg_dump failed to write the EOF marker correctly (there should be 2 blocks of zeroes not just one), pg_basebackup put the numeric group ID in the wrong place, and both programs had a pretty brain-dead idea of how to compute the checksum. Fix all that and improve the comments a bit. pg_restore is modified to accept either the correct POSIX-compliant "magic" string or the previous value. This part of the change will need to be back-patched to avoid an unnecessary compatibility break when a previous version tries to read tar-format output from 9.3 pg_dump. Brian Weaver and Tom Lane
-rw-r--r--doc/src/sgml/protocol.sgml8
-rw-r--r--src/backend/replication/basebackup.c63
-rw-r--r--src/bin/pg_dump/pg_backup_tar.c87
-rw-r--r--src/bin/pg_dump/pg_backup_tar.h21
4 files changed, 110 insertions, 69 deletions
diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml
index e72556303ab..3d72a162ebf 100644
--- a/doc/src/sgml/protocol.sgml
+++ b/doc/src/sgml/protocol.sgml
@@ -1759,9 +1759,11 @@ The commands accepted in walsender mode are:
After the second regular result set, one or more CopyResponse results
will be sent, one for PGDATA and one for each additional tablespace other
than <literal>pg_default</> and <literal>pg_global</>. The data in
- the CopyResponse results will be a tar format (using ustar00
- extensions) dump of the tablespace contents. After the tar data is
- complete, a final ordinary result set will be sent.
+ the CopyResponse results will be a tar format (following the
+ <quote>ustar interchange format</> specified in the POSIX 1003.1-2008
+ standard) dump of the tablespace contents, except that the two trailing
+ blocks of zeroes specified in the standard are omitted.
+ After the tar data is complete, a final ordinary result set will be sent.
</para>
<para>
diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c
index 4aaa9e3d088..4636e8d1c6f 100644
--- a/src/backend/replication/basebackup.c
+++ b/src/backend/replication/basebackup.c
@@ -568,7 +568,7 @@ sendFileWithContent(const char *filename, const char *content)
/*
* Include all files from the given directory in the output tar stream. If
- * 'sizeonly' is true, we just calculate a total length and return ig, without
+ * 'sizeonly' is true, we just calculate a total length and return it, without
* actually sending anything.
*/
static int64
@@ -763,11 +763,16 @@ _tarChecksum(char *header)
int i,
sum;
- sum = 0;
+ /*
+ * Per POSIX, the checksum is the simple sum of all bytes in the header,
+ * treating the bytes as unsigned, and treating the checksum field (at
+ * offset 148) as though it contained 8 spaces.
+ */
+ sum = 8 * ' '; /* presumed value for checksum field */
for (i = 0; i < 512; i++)
if (i < 148 || i >= 156)
sum += 0xFF & header[i];
- return sum + 256; /* Assume 8 blanks in checksum field */
+ return sum;
}
/* Given the member, write the TAR header & send the file */
@@ -846,9 +851,13 @@ _tarWriteHeader(const char *filename, const char *linktarget,
struct stat * statbuf)
{
char h[512];
- int lastSum = 0;
- int sum;
+ /*
+ * Note: most of the fields in a tar header are not supposed to be
+ * null-terminated. We use sprintf, which will write a null after the
+ * required bytes; that null goes into the first byte of the next field.
+ * This is okay as long as we fill the fields in order.
+ */
memset(h, 0, sizeof(h));
/* Name 100 */
@@ -860,8 +869,11 @@ _tarWriteHeader(const char *filename, const char *linktarget,
* indicated in the tar format by adding a slash at the end of the
* name, the same as for regular directories.
*/
- h[strlen(filename)] = '/';
- h[strlen(filename) + 1] = '\0';
+ int flen = strlen(filename);
+
+ flen = Min(flen, 99);
+ h[flen] = '/';
+ h[flen + 1] = '\0';
}
/* Mode 8 */
@@ -871,9 +883,9 @@ _tarWriteHeader(const char *filename, const char *linktarget,
sprintf(&h[108], "%07o ", statbuf->st_uid);
/* Group 8 */
- sprintf(&h[117], "%07o ", statbuf->st_gid);
+ sprintf(&h[116], "%07o ", statbuf->st_gid);
- /* File size 12 - 11 digits, 1 space, no NUL */
+ /* File size 12 - 11 digits, 1 space; use print_val for 64 bit support */
if (linktarget != NULL || S_ISDIR(statbuf->st_mode))
/* Symbolic link or directory has size zero */
print_val(&h[124], 0, 8, 11);
@@ -884,13 +896,13 @@ _tarWriteHeader(const char *filename, const char *linktarget,
/* Mod Time 12 */
sprintf(&h[136], "%011o ", (int) statbuf->st_mtime);
- /* Checksum 8 */
- sprintf(&h[148], "%06o ", lastSum);
+ /* Checksum 8 cannot be calculated until we've filled all other fields */
if (linktarget != NULL)
{
/* Type - Symbolic link */
sprintf(&h[156], "2");
+ /* Link Name 100 */
sprintf(&h[157], "%.99s", linktarget);
}
else if (S_ISDIR(statbuf->st_mode))
@@ -900,10 +912,11 @@ _tarWriteHeader(const char *filename, const char *linktarget,
/* Type - regular file */
sprintf(&h[156], "0");
- /* Link tag 100 (NULL) */
+ /* Magic 6 */
+ sprintf(&h[257], "ustar");
- /* Magic 6 + Version 2 */
- sprintf(&h[257], "ustar00");
+ /* Version 2 */
+ sprintf(&h[263], "00");
/* User 32 */
/* XXX: Do we need to care about setting correct username? */
@@ -913,17 +926,21 @@ _tarWriteHeader(const char *filename, const char *linktarget,
/* XXX: Do we need to care about setting correct group name? */
sprintf(&h[297], "%.31s", "postgres");
- /* Maj Dev 8 */
- sprintf(&h[329], "%6o ", 0);
+ /* Major Dev 8 */
+ sprintf(&h[329], "%07o ", 0);
- /* Min Dev 8 */
- sprintf(&h[337], "%6o ", 0);
+ /* Minor Dev 8 */
+ sprintf(&h[337], "%07o ", 0);
- while ((sum = _tarChecksum(h)) != lastSum)
- {
- sprintf(&h[148], "%06o ", sum);
- lastSum = sum;
- }
+ /* Prefix 155 - not used, leave as nulls */
+
+ /*
+ * We mustn't overwrite the next field while inserting the checksum.
+ * Fortunately, the checksum can't exceed 6 octal digits, so we just write
+ * 6 digits, a space, and a null, which is legal per POSIX.
+ */
+ sprintf(&h[148], "%06o ", _tarChecksum(h));
+ /* Now send the completed header. */
pq_putmessage('d', h, 512);
}
diff --git a/src/bin/pg_dump/pg_backup_tar.c b/src/bin/pg_dump/pg_backup_tar.c
index ced5c13321e..34d2c1a0df5 100644
--- a/src/bin/pg_dump/pg_backup_tar.c
+++ b/src/bin/pg_dump/pg_backup_tar.c
@@ -882,8 +882,10 @@ _CloseArchive(ArchiveHandle *AH)
tarClose(AH, th);
- /* Add a block of NULLs since it's de-rigeur. */
- for (i = 0; i < 512; i++)
+ /*
+ * EOF marker for tar files is two blocks of NULLs.
+ */
+ for (i = 0; i < 512 * 2; i++)
{
if (fputc(0, ctx->tarFH) == EOF)
exit_horribly(modulename,
@@ -1032,11 +1034,16 @@ _tarChecksum(char *header)
int i,
sum;
- sum = 0;
+ /*
+ * Per POSIX, the checksum is the simple sum of all bytes in the header,
+ * treating the bytes as unsigned, and treating the checksum field (at
+ * offset 148) as though it contained 8 spaces.
+ */
+ sum = 8 * ' '; /* presumed value for checksum field */
for (i = 0; i < 512; i++)
if (i < 148 || i >= 156)
sum += 0xFF & header[i];
- return sum + 256; /* Assume 8 blanks in checksum field */
+ return sum;
}
bool
@@ -1050,11 +1057,15 @@ isValidTarHeader(char *header)
if (sum != chk)
return false;
- /* POSIX format */
- if (strncmp(&header[257], "ustar00", 7) == 0)
+ /* POSIX tar format */
+ if (memcmp(&header[257], "ustar\0", 6) == 0 &&
+ memcmp(&header[263], "00", 2) == 0)
return true;
- /* older format */
- if (strncmp(&header[257], "ustar ", 7) == 0)
+ /* GNU tar format */
+ if (memcmp(&header[257], "ustar \0", 8) == 0)
+ return true;
+ /* not-quite-POSIX format written by pre-9.3 pg_dump */
+ if (memcmp(&header[257], "ustar00\0", 8) == 0)
return true;
return false;
@@ -1329,63 +1340,71 @@ static void
_tarWriteHeader(TAR_MEMBER *th)
{
char h[512];
- int lastSum = 0;
- int sum;
+ /*
+ * Note: most of the fields in a tar header are not supposed to be
+ * null-terminated. We use sprintf, which will write a null after the
+ * required bytes; that null goes into the first byte of the next field.
+ * This is okay as long as we fill the fields in order.
+ */
memset(h, 0, sizeof(h));
/* Name 100 */
sprintf(&h[0], "%.99s", th->targetFile);
/* Mode 8 */
- sprintf(&h[100], "100600 ");
+ sprintf(&h[100], "0000600 ");
/* User ID 8 */
- sprintf(&h[108], "004000 ");
+ sprintf(&h[108], "0004000 ");
/* Group 8 */
- sprintf(&h[116], "002000 ");
+ sprintf(&h[116], "0002000 ");
- /* File size 12 - 11 digits, 1 space, no NUL */
+ /* File size 12 - 11 digits, 1 space; use print_val for 64 bit support */
print_val(&h[124], th->fileLen, 8, 11);
sprintf(&h[135], " ");
/* Mod Time 12 */
sprintf(&h[136], "%011o ", (int) time(NULL));
- /* Checksum 8 */
- sprintf(&h[148], "%06o ", lastSum);
+ /* Checksum 8 cannot be calculated until we've filled all other fields */
/* Type - regular file */
sprintf(&h[156], "0");
- /* Link tag 100 (NULL) */
+ /* Link Name 100 (leave as nulls) */
- /* Magic 6 + Version 2 */
- sprintf(&h[257], "ustar00");
+ /* Magic 6 */
+ sprintf(&h[257], "ustar");
+
+ /* Version 2 */
+ sprintf(&h[263], "00");
-#if 0
/* User 32 */
- sprintf(&h[265], "%.31s", ""); /* How do I get username reliably? Do
- * I need to? */
+ /* XXX: Do we need to care about setting correct username? */
+ sprintf(&h[265], "%.31s", "postgres");
/* Group 32 */
- sprintf(&h[297], "%.31s", ""); /* How do I get group reliably? Do I
- * need to? */
+ /* XXX: Do we need to care about setting correct group name? */
+ sprintf(&h[297], "%.31s", "postgres");
- /* Maj Dev 8 */
- sprintf(&h[329], "%6o ", 0);
+ /* Major Dev 8 */
+ sprintf(&h[329], "%07o ", 0);
- /* Min Dev 8 */
- sprintf(&h[337], "%6o ", 0);
-#endif
+ /* Minor Dev 8 */
+ sprintf(&h[337], "%07o ", 0);
- while ((sum = _tarChecksum(h)) != lastSum)
- {
- sprintf(&h[148], "%06o ", sum);
- lastSum = sum;
- }
+ /* Prefix 155 - not used, leave as nulls */
+
+ /*
+ * We mustn't overwrite the next field while inserting the checksum.
+ * Fortunately, the checksum can't exceed 6 octal digits, so we just write
+ * 6 digits, a space, and a null, which is legal per POSIX.
+ */
+ sprintf(&h[148], "%06o ", _tarChecksum(h));
+ /* Now write the completed header. */
if (fwrite(h, 1, 512, th->tarFH) != 512)
exit_horribly(modulename, "could not write to output file: %s\n", strerror(errno));
}
diff --git a/src/bin/pg_dump/pg_backup_tar.h b/src/bin/pg_dump/pg_backup_tar.h
index cb9be645af2..0277f08f071 100644
--- a/src/bin/pg_dump/pg_backup_tar.h
+++ b/src/bin/pg_dump/pg_backup_tar.h
@@ -1,28 +1,31 @@
/*
* src/bin/pg_dump/pg_backup_tar.h
*
- * TAR Header
+ * TAR Header (see "ustar interchange format" in POSIX 1003.1)
*
* Offset Length Contents
* 0 100 bytes File name ('\0' terminated, 99 maximum length)
* 100 8 bytes File mode (in octal ascii)
* 108 8 bytes User ID (in octal ascii)
* 116 8 bytes Group ID (in octal ascii)
- * 124 12 bytes File size (s) (in octal ascii)
- * 136 12 bytes Modify time (in octal ascii)
+ * 124 12 bytes File size (in octal ascii)
+ * 136 12 bytes Modify time (Unix timestamp in octal ascii)
* 148 8 bytes Header checksum (in octal ascii)
- * 156 1 bytes Link flag
- * 157 100 bytes Linkname ('\0' terminated, 99 maximum length)
- * 257 8 bytes Magic ("ustar \0")
+ * 156 1 bytes Type flag (see below)
+ * 157 100 bytes Linkname, if symlink ('\0' terminated, 99 maximum length)
+ * 257 6 bytes Magic ("ustar\0")
+ * 263 2 bytes Version ("00")
* 265 32 bytes User name ('\0' terminated, 31 maximum length)
* 297 32 bytes Group name ('\0' terminated, 31 maximum length)
* 329 8 bytes Major device ID (in octal ascii)
* 337 8 bytes Minor device ID (in octal ascii)
- * 345 167 bytes Padding
- * 512 (s+p)bytes File contents (s+p) := (((s) + 511) & ~511), round up to 512 bytes
+ * 345 155 bytes File name prefix (not used in our implementation)
+ * 500 12 bytes Padding
+ *
+ * 512 (s+p)bytes File contents, padded out to 512-byte boundary
*/
-/* The linkflag defines the type of file */
+/* The type flag defines the type of file */
#define LF_OLDNORMAL '\0' /* Normal disk file, Unix compatible */
#define LF_NORMAL '0' /* Normal disk file */
#define LF_LINK '1' /* Link to previously dumped file */