aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/gin/ginpostinglist.c35
-rw-r--r--src/test/modules/Makefile1
-rw-r--r--src/test/modules/test_ginpostinglist/Makefile21
-rw-r--r--src/test/modules/test_ginpostinglist/README2
-rw-r--r--src/test/modules/test_ginpostinglist/expected/test_ginpostinglist.out19
-rw-r--r--src/test/modules/test_ginpostinglist/sql/test_ginpostinglist.sql7
-rw-r--r--src/test/modules/test_ginpostinglist/test_ginpostinglist--1.0.sql8
-rw-r--r--src/test/modules/test_ginpostinglist/test_ginpostinglist.c96
-rw-r--r--src/test/modules/test_ginpostinglist/test_ginpostinglist.control4
9 files changed, 184 insertions, 9 deletions
diff --git a/src/backend/access/gin/ginpostinglist.c b/src/backend/access/gin/ginpostinglist.c
index 48ccfafdd22..44aef77edb7 100644
--- a/src/backend/access/gin/ginpostinglist.c
+++ b/src/backend/access/gin/ginpostinglist.c
@@ -23,25 +23,32 @@
/*
* For encoding purposes, item pointers are represented as 64-bit unsigned
* integers. The lowest 11 bits represent the offset number, and the next
- * lowest 32 bits are the block number. That leaves 17 bits unused, i.e.
+ * lowest 32 bits are the block number. That leaves 21 bits unused, i.e.
* only 43 low bits are used.
*
+ * 11 bits is enough for the offset number, because MaxHeapTuplesPerPage <
+ * 2^11 on all supported block sizes. We are frugal with the bits, because
+ * smaller integers use fewer bytes in the varbyte encoding, saving disk
+ * space. (If we get a new table AM in the future that wants to use the full
+ * range of possible offset numbers, we'll need to change this.)
+ *
* These 43-bit integers are encoded using varbyte encoding. In each byte,
* the 7 low bits contain data, while the highest bit is a continuation bit.
* When the continuation bit is set, the next byte is part of the same
- * integer, otherwise this is the last byte of this integer. 43 bits fit
- * conveniently in at most 6 bytes when varbyte encoded (the 6th byte does
- * not need a continuation bit, because we know the max size to be 43 bits):
+ * integer, otherwise this is the last byte of this integer. 43 bits need
+ * at most 7 bytes in this encoding:
*
* 0XXXXXXX
* 1XXXXXXX 0XXXXYYY
* 1XXXXXXX 1XXXXYYY 0YYYYYYY
* 1XXXXXXX 1XXXXYYY 1YYYYYYY 0YYYYYYY
* 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
- * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY YYYYYYYY
+ * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0YYYYYYY
+ * 1XXXXXXX 1XXXXYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 1YYYYYYY 0uuuuuuY
*
* X = bits used for offset number
* Y = bits used for block number
+ * u = unused bit
*
* The bytes are in stored in little-endian order.
*
@@ -73,6 +80,9 @@
*/
#define MaxHeapTuplesPerPageBits 11
+/* Max. number of bytes needed to encode the largest supported integer. */
+#define MaxBytesPerInteger 7
+
static inline uint64
itemptr_to_uint64(const ItemPointer iptr)
{
@@ -126,33 +136,40 @@ decode_varbyte(unsigned char **ptr)
unsigned char *p = *ptr;
uint64 c;
+ /* 1st byte */
c = *(p++);
val = c & 0x7F;
if (c & 0x80)
{
+ /* 2nd byte */
c = *(p++);
val |= (c & 0x7F) << 7;
if (c & 0x80)
{
+ /* 3rd byte */
c = *(p++);
val |= (c & 0x7F) << 14;
if (c & 0x80)
{
+ /* 4th byte */
c = *(p++);
val |= (c & 0x7F) << 21;
if (c & 0x80)
{
+ /* 5th byte */
c = *(p++);
val |= (c & 0x7F) << 28;
if (c & 0x80)
{
+ /* 6th byte */
c = *(p++);
val |= (c & 0x7F) << 35;
if (c & 0x80)
{
- /* last byte, no continuation bit */
+ /* 7th byte, should not have continuation bit */
c = *(p++);
val |= c << 42;
+ Assert((c & 0x80) == 0);
}
}
}
@@ -208,15 +225,15 @@ ginCompressPostingList(const ItemPointer ipd, int nipd, int maxsize,
Assert(val > prev);
- if (endptr - ptr >= 6)
+ if (endptr - ptr >= MaxBytesPerInteger)
encode_varbyte(delta, &ptr);
else
{
/*
- * There are less than 6 bytes left. Have to check if the next
+ * There are less than 7 bytes left. Have to check if the next
* item fits in that space before writing it out.
*/
- unsigned char buf[6];
+ unsigned char buf[MaxBytesPerInteger];
unsigned char *p = buf;
encode_varbyte(delta, &p);
diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile
index 60d6d7be1b2..953905d1eff 100644
--- a/src/test/modules/Makefile
+++ b/src/test/modules/Makefile
@@ -12,6 +12,7 @@ SUBDIRS = \
test_bloomfilter \
test_ddl_deparse \
test_extensions \
+ test_ginpostinglist \
test_integerset \
test_parser \
test_pg_dump \
diff --git a/src/test/modules/test_ginpostinglist/Makefile b/src/test/modules/test_ginpostinglist/Makefile
new file mode 100644
index 00000000000..4d45ac999af
--- /dev/null
+++ b/src/test/modules/test_ginpostinglist/Makefile
@@ -0,0 +1,21 @@
+# src/test/modules/test_ginpostinglist/Makefile
+
+MODULE_big = test_ginpostinglist
+OBJS = test_ginpostinglist.o $(WIN32RES)
+PGFILEDESC = "test_ginpostinglist - test code for src/backend/access/gin//ginpostinglist.c"
+
+EXTENSION = test_ginpostinglist
+DATA = test_ginpostinglist--1.0.sql
+
+REGRESS = test_ginpostinglist
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/test_ginpostinglist
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/src/test/modules/test_ginpostinglist/README b/src/test/modules/test_ginpostinglist/README
new file mode 100644
index 00000000000..66684dd0da4
--- /dev/null
+++ b/src/test/modules/test_ginpostinglist/README
@@ -0,0 +1,2 @@
+test_ginpostinglist contains unit tests for the GIN posting list code in
+src/backend/access/gin/ginpostinglist.c.
diff --git a/src/test/modules/test_ginpostinglist/expected/test_ginpostinglist.out b/src/test/modules/test_ginpostinglist/expected/test_ginpostinglist.out
new file mode 100644
index 00000000000..4d0beaecea9
--- /dev/null
+++ b/src/test/modules/test_ginpostinglist/expected/test_ginpostinglist.out
@@ -0,0 +1,19 @@
+CREATE EXTENSION test_ginpostinglist;
+--
+-- All the logic is in the test_ginpostinglist() function. It will throw
+-- a error if something fails.
+--
+SELECT test_ginpostinglist();
+NOTICE: testing with (0, 1), (0, 2), max 14 bytes
+NOTICE: encoded 2 item pointers to 10 bytes
+NOTICE: testing with (0, 1), (0, 291), max 14 bytes
+NOTICE: encoded 2 item pointers to 10 bytes
+NOTICE: testing with (0, 1), (4294967294, 291), max 14 bytes
+NOTICE: encoded 1 item pointers to 8 bytes
+NOTICE: testing with (0, 1), (4294967294, 291), max 16 bytes
+NOTICE: encoded 2 item pointers to 16 bytes
+ test_ginpostinglist
+---------------------
+
+(1 row)
+
diff --git a/src/test/modules/test_ginpostinglist/sql/test_ginpostinglist.sql b/src/test/modules/test_ginpostinglist/sql/test_ginpostinglist.sql
new file mode 100644
index 00000000000..b8cab7accec
--- /dev/null
+++ b/src/test/modules/test_ginpostinglist/sql/test_ginpostinglist.sql
@@ -0,0 +1,7 @@
+CREATE EXTENSION test_ginpostinglist;
+
+--
+-- All the logic is in the test_ginpostinglist() function. It will throw
+-- a error if something fails.
+--
+SELECT test_ginpostinglist();
diff --git a/src/test/modules/test_ginpostinglist/test_ginpostinglist--1.0.sql b/src/test/modules/test_ginpostinglist/test_ginpostinglist--1.0.sql
new file mode 100644
index 00000000000..37396a48424
--- /dev/null
+++ b/src/test/modules/test_ginpostinglist/test_ginpostinglist--1.0.sql
@@ -0,0 +1,8 @@
+/* src/test/modules/test_ginpostinglist/test_ginpostinglist--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION test_ginpostinglist" to load this file. \quit
+
+CREATE FUNCTION test_ginpostinglist()
+RETURNS pg_catalog.void STRICT
+AS 'MODULE_PATHNAME' LANGUAGE C;
diff --git a/src/test/modules/test_ginpostinglist/test_ginpostinglist.c b/src/test/modules/test_ginpostinglist/test_ginpostinglist.c
new file mode 100644
index 00000000000..bab073bcecc
--- /dev/null
+++ b/src/test/modules/test_ginpostinglist/test_ginpostinglist.c
@@ -0,0 +1,96 @@
+/*--------------------------------------------------------------------------
+ *
+ * test_ginpostinglist.c
+ * Test varbyte-encoding in ginpostinglist.c
+ *
+ * Copyright (c) 2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/test/modules/test_ginpostinglist/test_ginpostinglist.c
+ *
+ * -------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "access/ginblock.h"
+#include "access/gin_private.h"
+#include "access/htup_details.h"
+
+PG_MODULE_MAGIC;
+
+PG_FUNCTION_INFO_V1(test_ginpostinglist);
+
+/*
+ * Encodes a pair of TIDs, and decodes it back. The first TID is always
+ * (0, 1), the second one is formed from the blk/off arguments. The 'maxsize'
+ * argument is passed to ginCompressPostingList(); it can be used to test the
+ * overflow checks.
+ *
+ * The reason that we test a pair, instead of just a single TID, is that
+ * the GinPostingList stores the first TID as is, and the varbyte-encoding
+ * is only used for the deltas between TIDs. So testing a single TID would
+ * not exercise the varbyte encoding at all.
+ *
+ * This function prints NOTICEs to describe what is tested, and how large the
+ * resulting GinPostingList is. Any incorrect results, e.g. if the encode +
+ * decode round trip doesn't return the original input, are reported as
+ * ERRORs.
+ */
+static void
+test_itemptr_pair(BlockNumber blk, OffsetNumber off, int maxsize)
+{
+ ItemPointerData orig_itemptrs[2];
+ ItemPointer decoded_itemptrs;
+ GinPostingList *pl;
+ int nwritten;
+ int ndecoded;
+
+ elog(NOTICE, "testing with (%u, %d), (%u, %d), max %d bytes",
+ 0, 1, blk, off, maxsize);
+ ItemPointerSet(&orig_itemptrs[0], 0, 1);
+ ItemPointerSet(&orig_itemptrs[1], blk, off);
+
+ /* Encode, and decode it back */
+ pl = ginCompressPostingList(orig_itemptrs, 2, maxsize, &nwritten);
+ elog(NOTICE, "encoded %d item pointers to %zu bytes",
+ nwritten, SizeOfGinPostingList(pl));
+
+ if (SizeOfGinPostingList(pl) > maxsize)
+ elog(ERROR, "overflow: result was %zu bytes, max %d",
+ SizeOfGinPostingList(pl), maxsize);
+
+ decoded_itemptrs = ginPostingListDecode(pl, &ndecoded);
+ if (nwritten != ndecoded)
+ elog(NOTICE, "encoded %d itemptrs, %d came back", nwritten, ndecoded);
+
+ /* Check the result */
+ if (!ItemPointerEquals(&orig_itemptrs[0], &decoded_itemptrs[0]))
+ elog(ERROR, "mismatch on first itemptr: (%u, %d) vs (%u, %d)",
+ 0, 1,
+ ItemPointerGetBlockNumber(&decoded_itemptrs[0]),
+ ItemPointerGetOffsetNumber(&decoded_itemptrs[0]));
+
+ if (ndecoded == 2 &&
+ !ItemPointerEquals(&orig_itemptrs[0], &decoded_itemptrs[0]))
+ {
+ elog(ERROR, "mismatch on second itemptr: (%u, %d) vs (%u, %d)",
+ 0, 1,
+ ItemPointerGetBlockNumber(&decoded_itemptrs[0]),
+ ItemPointerGetOffsetNumber(&decoded_itemptrs[0]));
+ }
+}
+
+/*
+ * SQL-callable entry point to perform all tests.
+ */
+Datum
+test_ginpostinglist(PG_FUNCTION_ARGS)
+{
+ test_itemptr_pair(0, 2, 14);
+ test_itemptr_pair(0, MaxHeapTuplesPerPage, 14);
+ test_itemptr_pair(MaxBlockNumber, MaxHeapTuplesPerPage, 14);
+ test_itemptr_pair(MaxBlockNumber, MaxHeapTuplesPerPage, 16);
+
+ PG_RETURN_VOID();
+}
diff --git a/src/test/modules/test_ginpostinglist/test_ginpostinglist.control b/src/test/modules/test_ginpostinglist/test_ginpostinglist.control
new file mode 100644
index 00000000000..e4f5a7ceadb
--- /dev/null
+++ b/src/test/modules/test_ginpostinglist/test_ginpostinglist.control
@@ -0,0 +1,4 @@
+comment = 'Test code for ginpostinglist.c'
+default_version = '1.0'
+module_pathname = '$libdir/test_ginpostinglist'
+relocatable = true