diff options
author | Andres Freund <andres@anarazel.de> | 2014-11-12 18:52:49 +0100 |
---|---|---|
committer | Andres Freund <andres@anarazel.de> | 2014-11-12 18:52:49 +0100 |
commit | ec5896aed3c01da24c1f335f138817e9890d68b6 (patch) | |
tree | d80e7cb09a8e59d61524cac1b52880259bfb13d8 /src/backend | |
parent | bd4ae0f396b36955c92f26c0058e7dc0452db817 (diff) | |
download | postgresql-ec5896aed3c01da24c1f335f138817e9890d68b6.tar.gz postgresql-ec5896aed3c01da24c1f335f138817e9890d68b6.zip |
Fix several weaknesses in slot and logical replication on-disk serialization.
Heikki noticed in 544E23C0.8090605@vmware.com that slot.c and
snapbuild.c were missing the FIN_CRC32 call when computing/checking
checksums of on disk files. That doesn't lower the the error detection
capabilities of the checksum, but is inconsistent with other usages.
In a followup mail Heikki also noticed that, contrary to a comment,
the 'version' and 'length' struct fields of replication slot's on disk
data where not covered by the checksum. That's not likely to lead to
actually missed corruption as those fields are cross checked with the
expected version and the actual file length. But it's wrong
nonetheless.
As fixing these issues makes existing on disk files unreadable, bump
the expected versions of on disk files for both slots and logical
decoding historic catalog snapshots. This means that loading old
files will fail with
ERROR: "replication slot file ... has unsupported version 1"
and
ERROR: "snapbuild state file ... has unsupported version 1 instead of
2" respectively. Given the low likelihood of anybody already using
these new features in a production setup that seems acceptable.
Fixing these issues made me notice that there's no regression test
covering the loading of historic snapshot from disk - so add one.
Backpatch to 9.4 where these features were introduced.
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/replication/logical/snapbuild.c | 6 | ||||
-rw-r--r-- | src/backend/replication/slot.c | 35 |
2 files changed, 29 insertions, 12 deletions
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 705ee0b61c2..d636ccb63b1 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -1406,7 +1406,7 @@ typedef struct SnapBuildOnDisk offsetof(SnapBuildOnDisk, version) #define SNAPBUILD_MAGIC 0x51A1E001 -#define SNAPBUILD_VERSION 1 +#define SNAPBUILD_VERSION 2 /* * Store/Load a snapshot from disk, depending on the snapshot builder's state. @@ -1552,6 +1552,8 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn) COMP_CRC32C(ondisk->checksum, ondisk_c, sz); ondisk_c += sz; + FIN_CRC32C(ondisk->checksum); + /* we have valid data now, open tempfile and write it there */ fd = OpenTransientFile(tmppath, O_CREAT | O_EXCL | O_WRONLY | PG_BINARY, @@ -1724,6 +1726,8 @@ SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn) CloseTransientFile(fd); + FIN_CRC32C(checksum); + /* verify checksum of what we've read */ if (!EQ_CRC32C(checksum, ondisk.checksum)) ereport(ERROR, diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index 7817ad8659e..937b669e8cd 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -61,18 +61,29 @@ typedef struct ReplicationSlotOnDisk uint32 version; uint32 length; + /* + * The actual data in the slot that follows can differ based on the above + * 'version'. + */ + ReplicationSlotPersistentData slotdata; } ReplicationSlotOnDisk; -/* size of the part of the slot that is version independent */ +/* size of version independent data */ #define ReplicationSlotOnDiskConstantSize \ offsetof(ReplicationSlotOnDisk, slotdata) -/* size of the slots that is not version indepenent */ -#define ReplicationSlotOnDiskDynamicSize \ +/* size of the part of the slot not covered by the checksum */ +#define SnapBuildOnDiskNotChecksummedSize \ + offsetof(ReplicationSlotOnDisk, version) +/* size of the part covered by the checksum */ +#define SnapBuildOnDiskChecksummedSize \ + sizeof(ReplicationSlotOnDisk) - SnapBuildOnDiskNotChecksummedSize +/* size of the slot data that is version dependant */ +#define ReplicationSlotOnDiskV2Size \ sizeof(ReplicationSlotOnDisk) - ReplicationSlotOnDiskConstantSize #define SLOT_MAGIC 0x1051CA1 /* format identifier */ -#define SLOT_VERSION 1 /* version for new files */ +#define SLOT_VERSION 2 /* version for new files */ /* Control array for replication slot management */ ReplicationSlotCtlData *ReplicationSlotCtl = NULL; @@ -992,8 +1003,8 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel) cp.magic = SLOT_MAGIC; INIT_CRC32C(cp.checksum); - cp.version = 1; - cp.length = ReplicationSlotOnDiskDynamicSize; + cp.version = SLOT_VERSION; + cp.length = ReplicationSlotOnDiskV2Size; SpinLockAcquire(&slot->mutex); @@ -1002,8 +1013,9 @@ SaveSlotToPath(ReplicationSlot *slot, const char *dir, int elevel) SpinLockRelease(&slot->mutex); COMP_CRC32C(cp.checksum, - (char *) (&cp) + ReplicationSlotOnDiskConstantSize, - ReplicationSlotOnDiskDynamicSize); + (char *) (&cp) + SnapBuildOnDiskNotChecksummedSize, + SnapBuildOnDiskChecksummedSize); + FIN_CRC32C(cp.checksum); if ((write(fd, &cp, sizeof(cp))) != sizeof(cp)) { @@ -1155,7 +1167,7 @@ RestoreSlotFromDisk(const char *name) path, cp.version))); /* boundary check on length */ - if (cp.length != ReplicationSlotOnDiskDynamicSize) + if (cp.length != ReplicationSlotOnDiskV2Size) ereport(PANIC, (errcode_for_file_access(), errmsg("replication slot file \"%s\" has corrupted length %u", @@ -1182,8 +1194,9 @@ RestoreSlotFromDisk(const char *name) /* now verify the CRC */ INIT_CRC32C(checksum); COMP_CRC32C(checksum, - (char *) &cp + ReplicationSlotOnDiskConstantSize, - ReplicationSlotOnDiskDynamicSize); + (char *) &cp + SnapBuildOnDiskNotChecksummedSize, + SnapBuildOnDiskChecksummedSize); + FIN_CRC32C(checksum); if (!EQ_CRC32C(checksum, cp.checksum)) ereport(PANIC, |