diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/access/transam/xlog.c | 2 | ||||
-rw-r--r-- | src/backend/access/transam/xloginsert.c | 73 | ||||
-rw-r--r-- | src/backend/access/transam/xlogreader.c | 58 | ||||
-rw-r--r-- | src/backend/utils/misc/guc.c | 36 | ||||
-rw-r--r-- | src/backend/utils/misc/postgresql.conf.sample | 3 | ||||
-rw-r--r-- | src/bin/pg_waldump/pg_waldump.c | 19 | ||||
-rw-r--r-- | src/include/access/xlog.h | 10 | ||||
-rw-r--r-- | src/include/access/xlogrecord.h | 16 | ||||
-rw-r--r-- | src/tools/pgindent/typedefs.list | 1 |
9 files changed, 174 insertions, 44 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 2c6e21bea5a..9cbca6392d3 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -98,7 +98,7 @@ char *XLogArchiveCommand = NULL; bool EnableHotStandby = false; bool fullPageWrites = true; bool wal_log_hints = false; -bool wal_compression = false; +int wal_compression = WAL_COMPRESSION_NONE; char *wal_consistency_checking_string = NULL; bool *wal_consistency_checking = NULL; bool wal_init_zero = true; diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c index 32b4cc84e79..10b3b090535 100644 --- a/src/backend/access/transam/xloginsert.c +++ b/src/backend/access/transam/xloginsert.c @@ -33,8 +33,20 @@ #include "storage/proc.h" #include "utils/memutils.h" -/* Buffer size required to store a compressed version of backup block image */ -#define PGLZ_MAX_BLCKSZ PGLZ_MAX_OUTPUT(BLCKSZ) +/* + * Guess the maximum buffer size required to store a compressed version of + * backup block image. + */ +#ifdef USE_LZ4 +#include <lz4.h> +#define LZ4_MAX_BLCKSZ LZ4_COMPRESSBOUND(BLCKSZ) +#else +#define LZ4_MAX_BLCKSZ 0 +#endif + +#define PGLZ_MAX_BLCKSZ PGLZ_MAX_OUTPUT(BLCKSZ) + +#define COMPRESS_BUFSIZE Max(PGLZ_MAX_BLCKSZ, LZ4_MAX_BLCKSZ) /* * For each block reference registered with XLogRegisterBuffer, we fill in @@ -58,7 +70,7 @@ typedef struct * backup block data in XLogRecordAssemble() */ /* buffer to store a compressed version of backup block image */ - char compressed_page[PGLZ_MAX_BLCKSZ]; + char compressed_page[COMPRESS_BUFSIZE]; } registered_buffer; static registered_buffer *registered_buffers; @@ -628,7 +640,7 @@ XLogRecordAssemble(RmgrId rmid, uint8 info, /* * Try to compress a block image if wal_compression is enabled */ - if (wal_compression) + if (wal_compression != WAL_COMPRESSION_NONE) { is_compressed = XLogCompressBackupBlock(page, bimg.hole_offset, @@ -665,8 +677,29 @@ XLogRecordAssemble(RmgrId rmid, uint8 info, if (is_compressed) { + /* The current compression is stored in the WAL record */ bimg.length = compressed_len; - bimg.bimg_info |= BKPIMAGE_IS_COMPRESSED; + + /* Set the compression method used for this block */ + switch ((WalCompression) wal_compression) + { + case WAL_COMPRESSION_PGLZ: + bimg.bimg_info |= BKPIMAGE_COMPRESS_PGLZ; + break; + + case WAL_COMPRESSION_LZ4: +#ifdef USE_LZ4 + bimg.bimg_info |= BKPIMAGE_COMPRESS_LZ4; +#else + elog(ERROR, "LZ4 is not supported by this build"); +#endif + break; + + case WAL_COMPRESSION_NONE: + Assert(false); /* cannot happen */ + break; + /* no default case, so that compiler will warn */ + } rdt_datas_last->data = regbuf->compressed_page; rdt_datas_last->len = compressed_len; @@ -853,12 +886,34 @@ XLogCompressBackupBlock(char *page, uint16 hole_offset, uint16 hole_length, else source = page; + switch ((WalCompression) wal_compression) + { + case WAL_COMPRESSION_PGLZ: + len = pglz_compress(source, orig_len, dest, PGLZ_strategy_default); + break; + + case WAL_COMPRESSION_LZ4: +#ifdef USE_LZ4 + len = LZ4_compress_default(source, dest, orig_len, + COMPRESS_BUFSIZE); + if (len <= 0) + len = -1; /* failure */ +#else + elog(ERROR, "LZ4 is not supported by this build"); +#endif + break; + + case WAL_COMPRESSION_NONE: + Assert(false); /* cannot happen */ + break; + /* no default case, so that compiler will warn */ + } + /* - * We recheck the actual size even if pglz_compress() reports success and - * see if the number of bytes saved by compression is larger than the - * length of extra data needed for the compressed version of block image. + * We recheck the actual size even if compression reports success and see + * if the number of bytes saved by compression is larger than the length + * of extra data needed for the compressed version of block image. */ - len = pglz_compress(source, orig_len, dest, PGLZ_strategy_default); if (len >= 0 && len + extra_bytes < orig_len) { diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index 42738eb940c..9a2cdf888e2 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -18,6 +18,9 @@ #include "postgres.h" #include <unistd.h> +#ifdef USE_LZ4 +#include <lz4.h> +#endif #include "access/transam.h" #include "access/xlog_internal.h" @@ -1290,7 +1293,7 @@ DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errormsg) blk->apply_image = ((blk->bimg_info & BKPIMAGE_APPLY) != 0); - if (blk->bimg_info & BKPIMAGE_IS_COMPRESSED) + if (BKPIMAGE_COMPRESSED(blk->bimg_info)) { if (blk->bimg_info & BKPIMAGE_HAS_HOLE) COPY_HEADER_FIELD(&blk->hole_length, sizeof(uint16)); @@ -1335,29 +1338,28 @@ DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errormsg) } /* - * cross-check that bimg_len < BLCKSZ if the IS_COMPRESSED - * flag is set. + * Cross-check that bimg_len < BLCKSZ if it is compressed. */ - if ((blk->bimg_info & BKPIMAGE_IS_COMPRESSED) && + if (BKPIMAGE_COMPRESSED(blk->bimg_info) && blk->bimg_len == BLCKSZ) { report_invalid_record(state, - "BKPIMAGE_IS_COMPRESSED set, but block image length %u at %X/%X", + "BKPIMAGE_COMPRESSED set, but block image length %u at %X/%X", (unsigned int) blk->bimg_len, LSN_FORMAT_ARGS(state->ReadRecPtr)); goto err; } /* - * cross-check that bimg_len = BLCKSZ if neither HAS_HOLE nor - * IS_COMPRESSED flag is set. + * cross-check that bimg_len = BLCKSZ if neither HAS_HOLE is + * set nor COMPRESSED(). */ if (!(blk->bimg_info & BKPIMAGE_HAS_HOLE) && - !(blk->bimg_info & BKPIMAGE_IS_COMPRESSED) && + !BKPIMAGE_COMPRESSED(blk->bimg_info) && blk->bimg_len != BLCKSZ) { report_invalid_record(state, - "neither BKPIMAGE_HAS_HOLE nor BKPIMAGE_IS_COMPRESSED set, but block image length is %u at %X/%X", + "neither BKPIMAGE_HAS_HOLE nor BKPIMAGE_COMPRESSED set, but block image length is %u at %X/%X", (unsigned int) blk->data_len, LSN_FORMAT_ARGS(state->ReadRecPtr)); goto err; @@ -1555,17 +1557,49 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page) bkpb = &record->blocks[block_id]; ptr = bkpb->bkp_image; - if (bkpb->bimg_info & BKPIMAGE_IS_COMPRESSED) + if (BKPIMAGE_COMPRESSED(bkpb->bimg_info)) { /* If a backup block image is compressed, decompress it */ - if (pglz_decompress(ptr, bkpb->bimg_len, tmp.data, - BLCKSZ - bkpb->hole_length, true) < 0) + bool decomp_success = true; + + if ((bkpb->bimg_info & BKPIMAGE_COMPRESS_PGLZ) != 0) + { + if (pglz_decompress(ptr, bkpb->bimg_len, tmp.data, + BLCKSZ - bkpb->hole_length, true) < 0) + decomp_success = false; + } + else if ((bkpb->bimg_info & BKPIMAGE_COMPRESS_LZ4) != 0) + { +#ifdef USE_LZ4 + if (LZ4_decompress_safe(ptr, tmp.data, + bkpb->bimg_len, BLCKSZ - bkpb->hole_length) <= 0) + decomp_success = false; +#else + report_invalid_record(record, "image at %X/%X compressed with %s not supported by build, block %d", + (uint32) (record->ReadRecPtr >> 32), + (uint32) record->ReadRecPtr, + "LZ4", + block_id); + return false; +#endif + } + else + { + report_invalid_record(record, "image at %X/%X compressed with unknown method, block %d", + (uint32) (record->ReadRecPtr >> 32), + (uint32) record->ReadRecPtr, + block_id); + return false; + } + + if (!decomp_success) { report_invalid_record(record, "invalid compressed image at %X/%X, block %d", LSN_FORMAT_ARGS(record->ReadRecPtr), block_id); return false; } + ptr = tmp.data; } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 297e705b806..480e8cd1991 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -540,6 +540,22 @@ static struct config_enum_entry default_toast_compression_options[] = { {NULL, 0, false} }; +static const struct config_enum_entry wal_compression_options[] = { + {"pglz", WAL_COMPRESSION_PGLZ, false}, +#ifdef USE_LZ4 + {"lz4", WAL_COMPRESSION_LZ4, false}, +#endif + {"on", WAL_COMPRESSION_PGLZ, false}, + {"off", WAL_COMPRESSION_NONE, false}, + {"true", WAL_COMPRESSION_PGLZ, true}, + {"false", WAL_COMPRESSION_NONE, true}, + {"yes", WAL_COMPRESSION_PGLZ, true}, + {"no", WAL_COMPRESSION_NONE, true}, + {"1", WAL_COMPRESSION_PGLZ, true}, + {"0", WAL_COMPRESSION_NONE, true}, + {NULL, 0, false} +}; + /* * Options for enum values stored in other modules */ @@ -1305,16 +1321,6 @@ static struct config_bool ConfigureNamesBool[] = }, { - {"wal_compression", PGC_SUSET, WAL_SETTINGS, - gettext_noop("Compresses full-page writes written in WAL file."), - NULL - }, - &wal_compression, - false, - NULL, NULL, NULL - }, - - { {"wal_init_zero", PGC_SUSET, WAL_SETTINGS, gettext_noop("Writes zeroes to new WAL files before first use."), NULL @@ -4817,6 +4823,16 @@ static struct config_enum ConfigureNamesEnum[] = }, { + {"wal_compression", PGC_SUSET, WAL_SETTINGS, + gettext_noop("Compresses full-page writes written in WAL file with specified method."), + NULL + }, + &wal_compression, + WAL_COMPRESSION_NONE, wal_compression_options, + NULL, NULL, NULL + }, + + { {"wal_level", PGC_POSTMASTER, WAL_SETTINGS, gettext_noop("Sets the level of information written to the WAL."), NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index af04ec3c744..b696abfe541 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -218,7 +218,8 @@ #full_page_writes = on # recover from partial page writes #wal_log_hints = off # also do full page writes of non-critical updates # (change requires restart) -#wal_compression = off # enable compression of full-page writes +#wal_compression = off # enables compression of full-page writes; + # off, pglz, lz4, or on #wal_init_zero = on # zero-fill new WAL files #wal_recycle = on # recycle WAL files #wal_buffers = -1 # min 32kB, -1 sets based on shared_buffers diff --git a/src/bin/pg_waldump/pg_waldump.c b/src/bin/pg_waldump/pg_waldump.c index f8b8afe4a7b..d83847b276d 100644 --- a/src/bin/pg_waldump/pg_waldump.c +++ b/src/bin/pg_waldump/pg_waldump.c @@ -537,18 +537,29 @@ XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record) blk); if (XLogRecHasBlockImage(record, block_id)) { - if (record->blocks[block_id].bimg_info & - BKPIMAGE_IS_COMPRESSED) + uint8 bimg_info = record->blocks[block_id].bimg_info; + + if (BKPIMAGE_COMPRESSED(bimg_info)) { + const char *method; + + if ((bimg_info & BKPIMAGE_COMPRESS_PGLZ) != 0) + method = "pglz"; + else if ((bimg_info & BKPIMAGE_COMPRESS_LZ4) != 0) + method = "lz4"; + else + method = "unknown"; + printf(" (FPW%s); hole: offset: %u, length: %u, " - "compression saved: %u", + "compression saved: %u, method: %s", XLogRecBlockImageApply(record, block_id) ? "" : " for WAL verification", record->blocks[block_id].hole_offset, record->blocks[block_id].hole_length, BLCKSZ - record->blocks[block_id].hole_length - - record->blocks[block_id].bimg_len); + record->blocks[block_id].bimg_len, + method); } else { diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 7510e882287..ccfcf43d62a 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -116,7 +116,7 @@ extern char *XLogArchiveCommand; extern bool EnableHotStandby; extern bool fullPageWrites; extern bool wal_log_hints; -extern bool wal_compression; +extern int wal_compression; extern bool wal_init_zero; extern bool wal_recycle; extern bool *wal_consistency_checking; @@ -167,6 +167,14 @@ typedef enum WalLevel WAL_LEVEL_LOGICAL } WalLevel; +/* Compression algorithms for WAL */ +typedef enum WalCompression +{ + WAL_COMPRESSION_NONE = 0, + WAL_COMPRESSION_PGLZ, + WAL_COMPRESSION_LZ4 +} WalCompression; + /* Recovery states */ typedef enum RecoveryState { diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h index 80c92a2498a..e06ee92a5e5 100644 --- a/src/include/access/xlogrecord.h +++ b/src/include/access/xlogrecord.h @@ -114,8 +114,8 @@ typedef struct XLogRecordBlockHeader * present is (BLCKSZ - <length of "hole" bytes>). * * Additionally, when wal_compression is enabled, we will try to compress full - * page images using the PGLZ compression algorithm, after removing the "hole". - * This can reduce the WAL volume, but at some extra cost of CPU spent + * page images using one of the supported algorithms, after removing the + * "hole". This can reduce the WAL volume, but at some extra cost of CPU spent * on the compression during WAL logging. In this case, since the "hole" * length cannot be calculated by subtracting the number of page image bytes * from BLCKSZ, basically it needs to be stored as an extra information. @@ -134,7 +134,7 @@ typedef struct XLogRecordBlockImageHeader uint8 bimg_info; /* flag bits, see below */ /* - * If BKPIMAGE_HAS_HOLE and BKPIMAGE_IS_COMPRESSED, an + * If BKPIMAGE_HAS_HOLE and BKPIMAGE_COMPRESSED(), an * XLogRecordBlockCompressHeader struct follows. */ } XLogRecordBlockImageHeader; @@ -144,9 +144,13 @@ typedef struct XLogRecordBlockImageHeader /* Information stored in bimg_info */ #define BKPIMAGE_HAS_HOLE 0x01 /* page image has "hole" */ -#define BKPIMAGE_IS_COMPRESSED 0x02 /* page image is compressed */ -#define BKPIMAGE_APPLY 0x04 /* page image should be restored during - * replay */ +#define BKPIMAGE_APPLY 0x02 /* page image should be restored + * during replay */ +/* compression methods supported */ +#define BKPIMAGE_COMPRESS_PGLZ 0x04 +#define BKPIMAGE_COMPRESS_LZ4 0x08 +#define BKPIMAGE_COMPRESSED(info) \ + ((info & (BKPIMAGE_COMPRESS_PGLZ | BKPIMAGE_COMPRESS_LZ4)) != 0) /* * Extra header information used when page image has "hole" and diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 1b3da854214..64c06cf9523 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2843,6 +2843,7 @@ WaitEventSet WaitEventTimeout WaitPMResult WalCloseMethod +WalCompression WalLevel WalRcvData WalRcvExecResult |