aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/amcheck/Makefile2
-rw-r--r--contrib/amcheck/amcheck--1.1--1.2.sql19
-rw-r--r--contrib/amcheck/amcheck.control2
-rw-r--r--contrib/amcheck/expected/check_btree.out5
-rw-r--r--contrib/amcheck/sql/check_btree.sql5
-rw-r--r--contrib/amcheck/verify_nbtree.c136
-rw-r--r--doc/src/sgml/amcheck.sgml7
7 files changed, 160 insertions, 16 deletions
diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index c5764b544fd..dcec3b85203 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -4,7 +4,7 @@ MODULE_big = amcheck
OBJS = verify_nbtree.o $(WIN32RES)
EXTENSION = amcheck
-DATA = amcheck--1.0--1.1.sql amcheck--1.0.sql
+DATA = amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql
PGFILEDESC = "amcheck - function for verifying relation integrity"
REGRESS = check check_btree
diff --git a/contrib/amcheck/amcheck--1.1--1.2.sql b/contrib/amcheck/amcheck--1.1--1.2.sql
new file mode 100644
index 00000000000..883530dec74
--- /dev/null
+++ b/contrib/amcheck/amcheck--1.1--1.2.sql
@@ -0,0 +1,19 @@
+/* contrib/amcheck/amcheck--1.1--1.2.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.2'" to load this file. \quit
+
+-- In order to avoid issues with dependencies when updating amcheck to 1.2,
+-- create new, overloaded version of the 1.1 function signature
+
+--
+-- bt_index_parent_check()
+--
+CREATE FUNCTION bt_index_parent_check(index regclass,
+ heapallindexed boolean, rootdescend boolean)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'bt_index_parent_check'
+LANGUAGE C STRICT PARALLEL RESTRICTED;
+
+-- Don't want this to be available to public
+REVOKE ALL ON FUNCTION bt_index_parent_check(regclass, boolean, boolean) FROM PUBLIC;
diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control
index 469048403db..c6e310046d4 100644
--- a/contrib/amcheck/amcheck.control
+++ b/contrib/amcheck/amcheck.control
@@ -1,5 +1,5 @@
# amcheck extension
comment = 'functions for verifying relation integrity'
-default_version = '1.1'
+default_version = '1.2'
module_pathname = '$libdir/amcheck'
relocatable = true
diff --git a/contrib/amcheck/expected/check_btree.out b/contrib/amcheck/expected/check_btree.out
index 1e6079ddd29..6103aa4e472 100644
--- a/contrib/amcheck/expected/check_btree.out
+++ b/contrib/amcheck/expected/check_btree.out
@@ -126,7 +126,8 @@ SELECT bt_index_parent_check('bttest_multi_idx', true);
(1 row)
--
--- Test for multilevel page deletion/downlink present checks
+-- Test for multilevel page deletion/downlink present checks, and rootdescend
+-- checks
--
INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,80000) i;
ALTER TABLE delete_test_table ADD PRIMARY KEY (a,b,c,d);
@@ -137,7 +138,7 @@ VACUUM delete_test_table;
-- root"
DELETE FROM delete_test_table WHERE a < 79990;
VACUUM delete_test_table;
-SELECT bt_index_parent_check('delete_test_table_pkey', true);
+SELECT bt_index_parent_check('delete_test_table_pkey', true, true);
bt_index_parent_check
-----------------------
diff --git a/contrib/amcheck/sql/check_btree.sql b/contrib/amcheck/sql/check_btree.sql
index 3f1e0d17efe..f3e365f4bdf 100644
--- a/contrib/amcheck/sql/check_btree.sql
+++ b/contrib/amcheck/sql/check_btree.sql
@@ -78,7 +78,8 @@ INSERT INTO bttest_multi SELECT i, i%2 FROM generate_series(1, 100000) as i;
SELECT bt_index_parent_check('bttest_multi_idx', true);
--
--- Test for multilevel page deletion/downlink present checks
+-- Test for multilevel page deletion/downlink present checks, and rootdescend
+-- checks
--
INSERT INTO delete_test_table SELECT i, 1, 2, 3 FROM generate_series(1,80000) i;
ALTER TABLE delete_test_table ADD PRIMARY KEY (a,b,c,d);
@@ -89,7 +90,7 @@ VACUUM delete_test_table;
-- root"
DELETE FROM delete_test_table WHERE a < 79990;
VACUUM delete_test_table;
-SELECT bt_index_parent_check('delete_test_table_pkey', true);
+SELECT bt_index_parent_check('delete_test_table_pkey', true, true);
--
-- BUG #15597: must not assume consistent input toasting state when forming
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index 4363e6b82e7..6ae3bca9536 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -75,6 +75,8 @@ typedef struct BtreeCheckState
bool readonly;
/* Also verifying heap has no unindexed tuples? */
bool heapallindexed;
+ /* Also making sure non-pivot tuples can be found by new search? */
+ bool rootdescend;
/* Per-page context */
MemoryContext targetcontext;
/* Buffer access strategy */
@@ -124,10 +126,11 @@ PG_FUNCTION_INFO_V1(bt_index_check);
PG_FUNCTION_INFO_V1(bt_index_parent_check);
static void bt_index_check_internal(Oid indrelid, bool parentcheck,
- bool heapallindexed);
+ bool heapallindexed, bool rootdescend);
static inline void btree_index_checkable(Relation rel);
static void bt_check_every_level(Relation rel, Relation heaprel,
- bool heapkeyspace, bool readonly, bool heapallindexed);
+ bool heapkeyspace, bool readonly, bool heapallindexed,
+ bool rootdescend);
static BtreeLevel bt_check_level_from_leftmost(BtreeCheckState *state,
BtreeLevel level);
static void bt_target_page_check(BtreeCheckState *state);
@@ -140,6 +143,7 @@ static void bt_tuple_present_callback(Relation index, HeapTuple htup,
bool tupleIsAlive, void *checkstate);
static IndexTuple bt_normalize_tuple(BtreeCheckState *state,
IndexTuple itup);
+static bool bt_rootdescend(BtreeCheckState *state, IndexTuple itup);
static inline bool offset_is_negative_infinity(BTPageOpaque opaque,
OffsetNumber offset);
static inline bool invariant_l_offset(BtreeCheckState *state, BTScanInsert key,
@@ -177,7 +181,7 @@ bt_index_check(PG_FUNCTION_ARGS)
if (PG_NARGS() == 2)
heapallindexed = PG_GETARG_BOOL(1);
- bt_index_check_internal(indrelid, false, heapallindexed);
+ bt_index_check_internal(indrelid, false, heapallindexed, false);
PG_RETURN_VOID();
}
@@ -196,11 +200,14 @@ bt_index_parent_check(PG_FUNCTION_ARGS)
{
Oid indrelid = PG_GETARG_OID(0);
bool heapallindexed = false;
+ bool rootdescend = false;
- if (PG_NARGS() == 2)
+ if (PG_NARGS() >= 2)
heapallindexed = PG_GETARG_BOOL(1);
+ if (PG_NARGS() == 3)
+ rootdescend = PG_GETARG_BOOL(2);
- bt_index_check_internal(indrelid, true, heapallindexed);
+ bt_index_check_internal(indrelid, true, heapallindexed, rootdescend);
PG_RETURN_VOID();
}
@@ -209,7 +216,8 @@ bt_index_parent_check(PG_FUNCTION_ARGS)
* Helper for bt_index_[parent_]check, coordinating the bulk of the work.
*/
static void
-bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed)
+bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed,
+ bool rootdescend)
{
Oid heapid;
Relation indrel;
@@ -267,7 +275,7 @@ bt_index_check_internal(Oid indrelid, bool parentcheck, bool heapallindexed)
/* Check index, possibly against table it is an index on */
heapkeyspace = _bt_heapkeyspace(indrel);
bt_check_every_level(indrel, heaprel, heapkeyspace, parentcheck,
- heapallindexed);
+ heapallindexed, rootdescend);
/*
* Release locks early. That's ok here because nothing in the called
@@ -338,7 +346,7 @@ btree_index_checkable(Relation rel)
*/
static void
bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
- bool readonly, bool heapallindexed)
+ bool readonly, bool heapallindexed, bool rootdescend)
{
BtreeCheckState *state;
Page metapage;
@@ -362,6 +370,7 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
state->heapkeyspace = heapkeyspace;
state->readonly = readonly;
state->heapallindexed = heapallindexed;
+ state->rootdescend = rootdescend;
if (state->heapallindexed)
{
@@ -430,6 +439,14 @@ bt_check_every_level(Relation rel, Relation heaprel, bool heapkeyspace,
}
}
+ Assert(!state->rootdescend || state->readonly);
+ if (state->rootdescend && !state->heapkeyspace)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot verify that tuples from index \"%s\" can each be found by an independent index search",
+ RelationGetRelationName(rel)),
+ errhint("Only B-Tree version 4 indexes support rootdescend verification.")));
+
/* Create context for page */
state->targetcontext = AllocSetContextCreate(CurrentMemoryContext,
"amcheck context",
@@ -922,6 +939,31 @@ bt_target_page_check(BtreeCheckState *state)
if (offset_is_negative_infinity(topaque, offset))
continue;
+ /*
+ * Readonly callers may optionally verify that non-pivot tuples can
+ * each be found by an independent search that starts from the root
+ */
+ if (state->rootdescend && P_ISLEAF(topaque) &&
+ !bt_rootdescend(state, itup))
+ {
+ char *itid,
+ *htid;
+
+ itid = psprintf("(%u,%u)", state->targetblock, offset);
+ htid = psprintf("(%u,%u)",
+ ItemPointerGetBlockNumber(&(itup->t_tid)),
+ ItemPointerGetOffsetNumber(&(itup->t_tid)));
+
+ ereport(ERROR,
+ (errcode(ERRCODE_INDEX_CORRUPTED),
+ errmsg("could not find tuple using search from root page in index \"%s\"",
+ RelationGetRelationName(state->rel)),
+ errdetail_internal("Index tid=%s points to heap tid=%s page lsn=%X/%X.",
+ itid, htid,
+ (uint32) (state->targetlsn >> 32),
+ (uint32) state->targetlsn)));
+ }
+
/* Build insertion scankey for current page offset */
skey = bt_mkscankey_pivotsearch(state->rel, itup);
@@ -1526,6 +1568,9 @@ bt_downlink_check(BtreeCheckState *state, BTScanInsert targetkey,
* internal pages. In more general terms, a negative infinity item is
* only negative infinity with respect to the subtree that the page is
* at the root of.
+ *
+ * See also: bt_rootdescend(), which can even detect transitive
+ * inconsistencies on cousin leaf pages.
*/
if (offset_is_negative_infinity(copaque, offset))
continue;
@@ -1927,6 +1972,81 @@ bt_normalize_tuple(BtreeCheckState *state, IndexTuple itup)
}
/*
+ * Search for itup in index, starting from fast root page. itup must be a
+ * non-pivot tuple. This is only supported with heapkeyspace indexes, since
+ * we rely on having fully unique keys to find a match with only a signle
+ * visit to a leaf page, barring an interrupted page split, where we may have
+ * to move right. (A concurrent page split is impossible because caller must
+ * be readonly caller.)
+ *
+ * This routine can detect very subtle transitive consistency issues across
+ * more than one level of the tree. Leaf pages all have a high key (even the
+ * rightmost page has a conceptual positive infinity high key), but not a low
+ * key. Their downlink in parent is a lower bound, which along with the high
+ * key is almost enough to detect every possible inconsistency. A downlink
+ * separator key value won't always be available from parent, though, because
+ * the first items of internal pages are negative infinity items, truncated
+ * down to zero attributes during internal page splits. While it's true that
+ * bt_downlink_check() and the high key check can detect most imaginable key
+ * space problems, there are remaining problems it won't detect with non-pivot
+ * tuples in cousin leaf pages. Starting a search from the root for every
+ * existing leaf tuple detects small inconsistencies in upper levels of the
+ * tree that cannot be detected any other way. (Besides all this, this is
+ * probably also useful as a direct test of the code used by index scans
+ * themselves.)
+ */
+static bool
+bt_rootdescend(BtreeCheckState *state, IndexTuple itup)
+{
+ BTScanInsert key;
+ BTStack stack;
+ Buffer lbuf;
+ bool exists;
+
+ key = _bt_mkscankey(state->rel, itup);
+ Assert(key->heapkeyspace && key->scantid != NULL);
+
+ /*
+ * Search from root.
+ *
+ * Ideally, we would arrange to only move right within _bt_search() when
+ * an interrupted page split is detected (i.e. when the incomplete split
+ * bit is found to be set), but for now we accept the possibility that
+ * that could conceal an inconsistency.
+ */
+ Assert(state->readonly && state->rootdescend);
+ exists = false;
+ stack = _bt_search(state->rel, key, &lbuf, BT_READ, NULL);
+
+ if (BufferIsValid(lbuf))
+ {
+ BTInsertStateData insertstate;
+ OffsetNumber offnum;
+ Page page;
+
+ insertstate.itup = itup;
+ insertstate.itemsz = MAXALIGN(IndexTupleSize(itup));
+ insertstate.itup_key = key;
+ insertstate.bounds_valid = false;
+ insertstate.buf = lbuf;
+
+ /* Get matching tuple on leaf page */
+ offnum = _bt_binsrch_insert(state->rel, &insertstate);
+ /* Compare first >= matching item on leaf page, if any */
+ page = BufferGetPage(lbuf);
+ if (offnum <= PageGetMaxOffsetNumber(page) &&
+ _bt_compare(state->rel, key, page, offnum) == 0)
+ exists = true;
+ _bt_relbuf(state->rel, lbuf);
+ }
+
+ _bt_freestack(stack);
+ pfree(key);
+
+ return exists;
+}
+
+/*
* Is particular offset within page (whose special state is passed by caller)
* the page negative-infinity item?
*
diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml
index 8bb60d5c2da..627651d8d4a 100644
--- a/doc/src/sgml/amcheck.sgml
+++ b/doc/src/sgml/amcheck.sgml
@@ -112,7 +112,7 @@ ORDER BY c.relpages DESC LIMIT 10;
<varlistentry>
<term>
- <function>bt_index_parent_check(index regclass, heapallindexed boolean) returns void</function>
+ <function>bt_index_parent_check(index regclass, heapallindexed boolean, rootdescend boolean) returns void</function>
<indexterm>
<primary>bt_index_parent_check</primary>
</indexterm>
@@ -126,7 +126,10 @@ ORDER BY c.relpages DESC LIMIT 10;
argument is <literal>true</literal>, the function verifies the
presence of all heap tuples that should be found within the
index, and that there are no missing downlinks in the index
- structure. The checks that can be performed by
+ structure. When the optional <parameter>rootdescend</parameter>
+ argument is <literal>true</literal>, verification re-finds
+ tuples on the leaf level by performing a new search from the
+ root page for each tuple. The checks that can be performed by
<function>bt_index_parent_check</function> are a superset of the
checks that can be performed by <function>bt_index_check</function>.
<function>bt_index_parent_check</function> can be thought of as