aboutsummaryrefslogtreecommitdiff
path: root/src/include/postgres.h
diff options
context:
space:
mode:
authorRobert Haas <rhaas@postgresql.org>2021-03-19 15:10:38 -0400
committerRobert Haas <rhaas@postgresql.org>2021-03-19 15:10:38 -0400
commitbbe0a81db69bd10bd166907c3701492a29aca294 (patch)
tree13d4a0dd3309af4f7970a1edbb61da0d974a2448 /src/include/postgres.h
parente589c4890b05044a04207c2797e7c8af6693ea5f (diff)
downloadpostgresql-bbe0a81db69bd10bd166907c3701492a29aca294.tar.gz
postgresql-bbe0a81db69bd10bd166907c3701492a29aca294.zip
Allow configurable LZ4 TOAST compression.
There is now a per-column COMPRESSION option which can be set to pglz (the default, and the only option in up until now) or lz4. Or, if you like, you can set the new default_toast_compression GUC to lz4, and then that will be the default for new table columns for which no value is specified. We don't have lz4 support in the PostgreSQL code, so to use lz4 compression, PostgreSQL must be built --with-lz4. In general, TOAST compression means compression of individual column values, not the whole tuple, and those values can either be compressed inline within the tuple or compressed and then stored externally in the TOAST table, so those properties also apply to this feature. Prior to this commit, a TOAST pointer has two unused bits as part of the va_extsize field, and a compessed datum has two unused bits as part of the va_rawsize field. These bits are unused because the length of a varlena is limited to 1GB; we now use them to indicate the compression type that was used. This means we only have bit space for 2 more built-in compresison types, but we could work around that problem, if necessary, by introducing a new vartag_external value for any further types we end up wanting to add. Hopefully, it won't be too important to offer a wide selection of algorithms here, since each one we add not only takes more coding but also adds a build dependency for every packager. Nevertheless, it seems worth doing at least this much, because LZ4 gets better compression than PGLZ with less CPU usage. It's possible for LZ4-compressed datums to leak into composite type values stored on disk, just as it is for PGLZ. It's also possible for LZ4-compressed attributes to be copied into a different table via SQL commands such as CREATE TABLE AS or INSERT .. SELECT. It would be expensive to force such values to be decompressed, so PostgreSQL has never done so. For the same reasons, we also don't force recompression of already-compressed values even if the target table prefers a different compression method than was used for the source data. These architectural decisions are perhaps arguable but revisiting them is well beyond the scope of what seemed possible to do as part of this project. However, it's relatively cheap to recompress as part of VACUUM FULL or CLUSTER, so this commit adjusts those commands to do so, if the configured compression method of the table happens not to match what was used for some column value stored therein. Dilip Kumar. The original patches on which this work was based were written by Ildus Kurbangaliev, and those were patches were based on even earlier work by Nikita Glukhov, but the design has since changed very substantially, since allow a potentially large number of compression methods that could be added and dropped on a running system proved too problematic given some of the architectural issues mentioned above; the choice of which specific compression method to add first is now different; and a lot of the code has been heavily refactored. More recently, Justin Przyby helped quite a bit with testing and reviewing and this version also includes some code contributions from him. Other design input and review from Tomas Vondra, Álvaro Herrera, Andres Freund, Oleg Bartunov, Alexander Korotkov, and me. Discussion: http://postgr.es/m/20170907194236.4cefce96%40wp.localdomain Discussion: http://postgr.es/m/CAFiTN-uUpX3ck%3DK0mLEk-G_kUQY%3DSNOTeqdaNRR9FMdQrHKebw%40mail.gmail.com
Diffstat (limited to 'src/include/postgres.h')
-rw-r--r--src/include/postgres.h50
1 files changed, 46 insertions, 4 deletions
diff --git a/src/include/postgres.h b/src/include/postgres.h
index 2ed572004dd..2ccbea8e502 100644
--- a/src/include/postgres.h
+++ b/src/include/postgres.h
@@ -55,7 +55,9 @@
/*
* struct varatt_external is a traditional "TOAST pointer", that is, the
* information needed to fetch a Datum stored out-of-line in a TOAST table.
- * The data is compressed if and only if va_extsize < va_rawsize - VARHDRSZ.
+ * The data is compressed if and only if the size stored in va_extinfo <
+ * va_rawsize - VARHDRSZ.
+ *
* This struct must not contain any padding, because we sometimes compare
* these pointers using memcmp.
*
@@ -67,7 +69,8 @@
typedef struct varatt_external
{
int32 va_rawsize; /* Original data size (includes header) */
- int32 va_extsize; /* External saved size (doesn't) */
+ uint32 va_extinfo; /* External saved size (without header) and
+ * compression method */
Oid va_valueid; /* Unique ID of value within TOAST table */
Oid va_toastrelid; /* RelID of TOAST table containing it */
} varatt_external;
@@ -145,7 +148,8 @@ typedef union
struct /* Compressed-in-line format */
{
uint32 va_header;
- uint32 va_rawsize; /* Original data size (excludes header) */
+ uint32 va_tcinfo; /* Original data size (excludes header) and
+ * compression method */
char va_data[FLEXIBLE_ARRAY_MEMBER]; /* Compressed data */
} va_compressed;
} varattrib_4b;
@@ -274,14 +278,23 @@ typedef struct
(VARSIZE(PTR) - VARHDRSZ + VARHDRSZ_SHORT)
#define VARHDRSZ_EXTERNAL offsetof(varattrib_1b_e, va_data)
+#define VARHDRSZ_COMPRESS offsetof(varattrib_4b, va_compressed.va_data)
#define VARDATA_4B(PTR) (((varattrib_4b *) (PTR))->va_4byte.va_data)
#define VARDATA_4B_C(PTR) (((varattrib_4b *) (PTR))->va_compressed.va_data)
#define VARDATA_1B(PTR) (((varattrib_1b *) (PTR))->va_data)
#define VARDATA_1B_E(PTR) (((varattrib_1b_e *) (PTR))->va_data)
+#define VARLENA_RAWSIZE_BITS 30
+#define VARLENA_RAWSIZE_MASK ((1U << VARLENA_RAWSIZE_BITS) - 1)
+
+/*
+ * va_tcinfo in va_compress contains raw size of datum and compression method.
+ */
#define VARRAWSIZE_4B_C(PTR) \
- (((varattrib_4b *) (PTR))->va_compressed.va_rawsize)
+ (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo & VARLENA_RAWSIZE_MASK)
+#define VARCOMPRESS_4B_C(PTR) \
+ (((varattrib_4b *) (PTR))->va_compressed.va_tcinfo >> VARLENA_RAWSIZE_BITS)
/* Externally visible macros */
@@ -323,6 +336,35 @@ typedef struct
(VARATT_IS_EXTERNAL(PTR) && VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR)))
#define VARATT_IS_EXTERNAL_NON_EXPANDED(PTR) \
(VARATT_IS_EXTERNAL(PTR) && !VARTAG_IS_EXPANDED(VARTAG_EXTERNAL(PTR)))
+
+/*
+ * va_extinfo in varatt_external contains actual length of the external data
+ * and compression method if external data is compressed.
+ */
+#define VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) \
+ ((toast_pointer).va_extinfo & VARLENA_RAWSIZE_MASK)
+
+#define VARATT_EXTERNAL_SET_SIZE_AND_COMPRESSION(toast_pointer, len, cm) \
+ do { \
+ Assert((cm) == TOAST_PGLZ_COMPRESSION_ID || \
+ (cm) == TOAST_LZ4_COMPRESSION_ID); \
+ ((toast_pointer).va_extinfo = (len) | (cm) << VARLENA_RAWSIZE_BITS); \
+ } while (0)
+
+#define VARATT_EXTERNAL_GET_COMPRESSION(PTR) \
+ ((toast_pointer).va_extinfo >> VARLENA_RAWSIZE_BITS)
+
+/*
+ * Testing whether an externally-stored value is compressed now requires
+ * comparing size stored in va_extinfo (the actual length of the external data)
+ * to rawsize (the original uncompressed datum's size). The latter includes
+ * VARHDRSZ overhead, the former doesn't. We never use compression unless it
+ * actually saves space, so we expect either equality or less-than.
+ */
+#define VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) \
+ (VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer) < \
+ (toast_pointer).va_rawsize - VARHDRSZ)
+
#define VARATT_IS_SHORT(PTR) VARATT_IS_1B(PTR)
#define VARATT_IS_EXTENDED(PTR) (!VARATT_IS_4B_U(PTR))