diff options
Diffstat (limited to 'src/backend/access/nbtree/nbtpage.c')
-rw-r--r-- | src/backend/access/nbtree/nbtpage.c | 206 |
1 files changed, 141 insertions, 65 deletions
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 56041c3d383..37829d34321 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -33,7 +33,8 @@ #include "storage/predicate.h" #include "utils/snapmgr.h" -static void _bt_cachemetadata(Relation rel, BTMetaPageData *metad); +static void _bt_cachemetadata(Relation rel, BTMetaPageData *input); +static BTMetaPageData *_bt_getmeta(Relation rel, Buffer metabuf); static bool _bt_mark_page_halfdead(Relation rel, Buffer buf, BTStack stack); static bool _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty); @@ -77,7 +78,9 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level) } /* - * _bt_upgrademetapage() -- Upgrade a meta-page from an old format to the new. + * _bt_upgrademetapage() -- Upgrade a meta-page from an old format to version + * 3, the last version that can be updated without broadly affecting + * on-disk compatibility. (A REINDEX is required to upgrade to v4.) * * This routine does purely in-memory image upgrade. Caller is * responsible for locking, WAL-logging etc. @@ -93,11 +96,11 @@ _bt_upgrademetapage(Page page) /* It must be really a meta page of upgradable version */ Assert(metaopaque->btpo_flags & BTP_META); - Assert(metad->btm_version < BTREE_VERSION); + Assert(metad->btm_version < BTREE_NOVAC_VERSION); Assert(metad->btm_version >= BTREE_MIN_VERSION); /* Set version number and fill extra fields added into version 3 */ - metad->btm_version = BTREE_VERSION; + metad->btm_version = BTREE_NOVAC_VERSION; metad->btm_oldest_btpo_xact = InvalidTransactionId; metad->btm_last_cleanup_num_heap_tuples = -1.0; @@ -107,44 +110,80 @@ _bt_upgrademetapage(Page page) } /* - * Cache metadata from meta page to rel->rd_amcache. + * Cache metadata from input meta page to rel->rd_amcache. */ static void -_bt_cachemetadata(Relation rel, BTMetaPageData *metad) +_bt_cachemetadata(Relation rel, BTMetaPageData *input) { + BTMetaPageData *cached_metad; + /* We assume rel->rd_amcache was already freed by caller */ Assert(rel->rd_amcache == NULL); rel->rd_amcache = MemoryContextAlloc(rel->rd_indexcxt, sizeof(BTMetaPageData)); - /* - * Meta page should be of supported version (should be already checked by - * caller). - */ - Assert(metad->btm_version >= BTREE_MIN_VERSION && - metad->btm_version <= BTREE_VERSION); + /* Meta page should be of supported version */ + Assert(input->btm_version >= BTREE_MIN_VERSION && + input->btm_version <= BTREE_VERSION); - if (metad->btm_version == BTREE_VERSION) + cached_metad = (BTMetaPageData *) rel->rd_amcache; + if (input->btm_version >= BTREE_NOVAC_VERSION) { - /* Last version of meta-data, no need to upgrade */ - memcpy(rel->rd_amcache, metad, sizeof(BTMetaPageData)); + /* Version with compatible meta-data, no need to upgrade */ + memcpy(cached_metad, input, sizeof(BTMetaPageData)); } else { - BTMetaPageData *cached_metad = (BTMetaPageData *) rel->rd_amcache; - /* * Upgrade meta-data: copy available information from meta-page and * fill new fields with default values. + * + * Note that we cannot upgrade to version 4+ without a REINDEX, since + * extensive on-disk changes are required. */ - memcpy(rel->rd_amcache, metad, offsetof(BTMetaPageData, btm_oldest_btpo_xact)); - cached_metad->btm_version = BTREE_VERSION; + memcpy(cached_metad, input, offsetof(BTMetaPageData, btm_oldest_btpo_xact)); + cached_metad->btm_version = BTREE_NOVAC_VERSION; cached_metad->btm_oldest_btpo_xact = InvalidTransactionId; cached_metad->btm_last_cleanup_num_heap_tuples = -1.0; } } /* + * Get metadata from share-locked buffer containing metapage, while performing + * standard sanity checks. Sanity checks here must match _bt_getroot(). + */ +static BTMetaPageData * +_bt_getmeta(Relation rel, Buffer metabuf) +{ + Page metapg; + BTPageOpaque metaopaque; + BTMetaPageData *metad; + + metapg = BufferGetPage(metabuf); + metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg); + metad = BTPageGetMeta(metapg); + + /* sanity-check the metapage */ + if (!P_ISMETA(metaopaque) || + metad->btm_magic != BTREE_MAGIC) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" is not a btree", + RelationGetRelationName(rel)))); + + if (metad->btm_version < BTREE_MIN_VERSION || + metad->btm_version > BTREE_VERSION) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("version mismatch in index \"%s\": file version %d, " + "current version %d, minimal supported version %d", + RelationGetRelationName(rel), + metad->btm_version, BTREE_VERSION, BTREE_MIN_VERSION))); + + return metad; +} + +/* * _bt_update_meta_cleanup_info() -- Update cleanup-related information in * the metapage. * @@ -167,7 +206,7 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, metad = BTPageGetMeta(metapg); /* outdated version of metapage always needs rewrite */ - if (metad->btm_version < BTREE_VERSION) + if (metad->btm_version < BTREE_NOVAC_VERSION) needsRewrite = true; else if (metad->btm_oldest_btpo_xact != oldestBtpoXact || metad->btm_last_cleanup_num_heap_tuples != numHeapTuples) @@ -186,7 +225,7 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, START_CRIT_SECTION(); /* upgrade meta-page if needed */ - if (metad->btm_version < BTREE_VERSION) + if (metad->btm_version < BTREE_NOVAC_VERSION) _bt_upgrademetapage(metapg); /* update cleanup-related information */ @@ -202,6 +241,8 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact, XLogBeginInsert(); XLogRegisterBuffer(0, metabuf, REGBUF_WILL_INIT | REGBUF_STANDARD); + Assert(metad->btm_version >= BTREE_NOVAC_VERSION); + md.version = metad->btm_version; md.root = metad->btm_root; md.level = metad->btm_level; md.fastroot = metad->btm_fastroot; @@ -376,7 +417,7 @@ _bt_getroot(Relation rel, int access) START_CRIT_SECTION(); /* upgrade metapage if needed */ - if (metad->btm_version < BTREE_VERSION) + if (metad->btm_version < BTREE_NOVAC_VERSION) _bt_upgrademetapage(metapg); metad->btm_root = rootblkno; @@ -400,6 +441,8 @@ _bt_getroot(Relation rel, int access) XLogRegisterBuffer(0, rootbuf, REGBUF_WILL_INIT); XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT | REGBUF_STANDARD); + Assert(metad->btm_version >= BTREE_NOVAC_VERSION); + md.version = metad->btm_version; md.root = rootblkno; md.level = 0; md.fastroot = rootblkno; @@ -595,37 +638,12 @@ _bt_getrootheight(Relation rel) { BTMetaPageData *metad; - /* - * We can get what we need from the cached metapage data. If it's not - * cached yet, load it. Sanity checks here must match _bt_getroot(). - */ if (rel->rd_amcache == NULL) { Buffer metabuf; - Page metapg; - BTPageOpaque metaopaque; metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); - metapg = BufferGetPage(metabuf); - metaopaque = (BTPageOpaque) PageGetSpecialPointer(metapg); - metad = BTPageGetMeta(metapg); - - /* sanity-check the metapage */ - if (!P_ISMETA(metaopaque) || - metad->btm_magic != BTREE_MAGIC) - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("index \"%s\" is not a btree", - RelationGetRelationName(rel)))); - - if (metad->btm_version < BTREE_MIN_VERSION || - metad->btm_version > BTREE_VERSION) - ereport(ERROR, - (errcode(ERRCODE_INDEX_CORRUPTED), - errmsg("version mismatch in index \"%s\": file version %d, " - "current version %d, minimal supported version %d", - RelationGetRelationName(rel), - metad->btm_version, BTREE_VERSION, BTREE_MIN_VERSION))); + metad = _bt_getmeta(rel, metabuf); /* * If there's no root page yet, _bt_getroot() doesn't expect a cache @@ -642,20 +660,71 @@ _bt_getrootheight(Relation rel) * Cache the metapage data for next time */ _bt_cachemetadata(rel, metad); - + /* We shouldn't have cached it if any of these fail */ + Assert(metad->btm_magic == BTREE_MAGIC); + Assert(metad->btm_version >= BTREE_NOVAC_VERSION); + Assert(metad->btm_fastroot != P_NONE); _bt_relbuf(rel, metabuf); } + /* Get cached page */ metad = (BTMetaPageData *) rel->rd_amcache; - /* We shouldn't have cached it if any of these fail */ - Assert(metad->btm_magic == BTREE_MAGIC); - Assert(metad->btm_version == BTREE_VERSION); - Assert(metad->btm_fastroot != P_NONE); return metad->btm_fastlevel; } /* + * _bt_heapkeyspace() -- is heap TID being treated as a key? + * + * This is used to determine the rules that must be used to descend a + * btree. Version 4 indexes treat heap TID as a tiebreaker attribute. + * pg_upgrade'd version 3 indexes need extra steps to preserve reasonable + * performance when inserting a new BTScanInsert-wise duplicate tuple + * among many leaf pages already full of such duplicates. + */ +bool +_bt_heapkeyspace(Relation rel) +{ + BTMetaPageData *metad; + + if (rel->rd_amcache == NULL) + { + Buffer metabuf; + + metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ); + metad = _bt_getmeta(rel, metabuf); + + /* + * If there's no root page yet, _bt_getroot() doesn't expect a cache + * to be made, so just stop here. (XXX perhaps _bt_getroot() should + * be changed to allow this case.) + */ + if (metad->btm_root == P_NONE) + { + uint32 btm_version = metad->btm_version; + + _bt_relbuf(rel, metabuf); + return btm_version > BTREE_NOVAC_VERSION; + } + + /* + * Cache the metapage data for next time + */ + _bt_cachemetadata(rel, metad); + /* We shouldn't have cached it if any of these fail */ + Assert(metad->btm_magic == BTREE_MAGIC); + Assert(metad->btm_version >= BTREE_NOVAC_VERSION); + Assert(metad->btm_fastroot != P_NONE); + _bt_relbuf(rel, metabuf); + } + + /* Get cached page */ + metad = (BTMetaPageData *) rel->rd_amcache; + + return metad->btm_version > BTREE_NOVAC_VERSION; +} + +/* * _bt_checkpage() -- Verify that a freshly-read page looks sane. */ void @@ -1123,11 +1192,12 @@ _bt_is_page_halfdead(Relation rel, BlockNumber blk) * right sibling. * * "child" is the leaf page we wish to delete, and "stack" is a search stack - * leading to it (approximately). Note that we will update the stack - * entry(s) to reflect current downlink positions --- this is essentially the - * same as the corresponding step of splitting, and is not expected to affect - * caller. The caller should initialize *target and *rightsib to the leaf - * page and its right sibling. + * leading to it (it actually leads to the leftmost leaf page with a high key + * matching that of the page to be deleted in !heapkeyspace indexes). Note + * that we will update the stack entry(s) to reflect current downlink + * positions --- this is essentially the same as the corresponding step of + * splitting, and is not expected to affect caller. The caller should + * initialize *target and *rightsib to the leaf page and its right sibling. * * Note: it's OK to release page locks on any internal pages between the leaf * and *topparent, because a safe deletion can't become unsafe due to @@ -1149,8 +1219,10 @@ _bt_lock_branch_parent(Relation rel, BlockNumber child, BTStack stack, BlockNumber leftsib; /* - * Locate the downlink of "child" in the parent (updating the stack entry - * if needed) + * Locate the downlink of "child" in the parent, updating the stack entry + * if needed. This is how !heapkeyspace indexes deal with having + * non-unique high keys in leaf level pages. Even heapkeyspace indexes + * can have a stale stack due to insertions into the parent. */ stack->bts_btentry = child; pbuf = _bt_getstackbuf(rel, stack); @@ -1362,9 +1434,10 @@ _bt_pagedel(Relation rel, Buffer buf) { /* * We need an approximate pointer to the page's parent page. We - * use the standard search mechanism to search for the page's high - * key; this will give us a link to either the current parent or - * someplace to its left (if there are multiple equal high keys). + * use a variant of the standard search mechanism to search for + * the page's high key; this will give us a link to either the + * current parent or someplace to its left (if there are multiple + * equal high keys, which is possible with !heapkeyspace indexes). * * Also check if this is the right-half of an incomplete split * (see comment above). @@ -1422,7 +1495,8 @@ _bt_pagedel(Relation rel, Buffer buf) /* we need an insertion scan key for the search, so build one */ itup_key = _bt_mkscankey(rel, targetkey); - /* get stack to leaf page by searching index */ + /* find the leftmost leaf page with matching pivot/high key */ + itup_key->pivotsearch = true; stack = _bt_search(rel, itup_key, &lbuf, BT_READ, NULL); /* don't need a lock or second pin on the page */ _bt_relbuf(rel, lbuf); @@ -1969,7 +2043,7 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) if (BufferIsValid(metabuf)) { /* upgrade metapage if needed */ - if (metad->btm_version < BTREE_VERSION) + if (metad->btm_version < BTREE_NOVAC_VERSION) _bt_upgrademetapage(metapg); metad->btm_fastroot = rightsib; metad->btm_fastlevel = targetlevel; @@ -2017,6 +2091,8 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty) { XLogRegisterBuffer(4, metabuf, REGBUF_WILL_INIT | REGBUF_STANDARD); + Assert(metad->btm_version >= BTREE_NOVAC_VERSION); + xlmeta.version = metad->btm_version; xlmeta.root = metad->btm_root; xlmeta.level = metad->btm_level; xlmeta.fastroot = metad->btm_fastroot; |