diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/access/nbtree/nbtsort.c | 60 |
1 files changed, 42 insertions, 18 deletions
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c index e662eee2360..cc50ec1fa43 100644 --- a/src/backend/access/nbtree/nbtsort.c +++ b/src/backend/access/nbtree/nbtsort.c @@ -841,6 +841,7 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) OffsetNumber last_off; Size pgspc; Size itupsz; + bool isleaf; /* * This is a handy place to check for cancel interrupts during the btree @@ -855,9 +856,12 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) pgspc = PageGetFreeSpace(npage); itupsz = IndexTupleSize(itup); itupsz = MAXALIGN(itupsz); + /* Leaf case has slightly different rules due to suffix truncation */ + isleaf = (state->btps_level == 0); /* - * Check whether the item can fit on a btree page at all. + * Check whether the new item can fit on a btree page on current level at + * all. * * Every newly built index will treat heap TID as part of the keyspace, * which imposes the requirement that new high keys must occasionally have @@ -870,16 +874,29 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) * the reserved space. This should never fail on internal pages. */ if (unlikely(itupsz > BTMaxItemSize(npage))) - _bt_check_third_page(wstate->index, wstate->heap, - state->btps_level == 0, npage, itup); + _bt_check_third_page(wstate->index, wstate->heap, isleaf, npage, + itup); /* - * Check to see if page is "full". It's definitely full if the item won't - * fit. Otherwise, compare to the target freespace derived from the - * fillfactor. However, we must put at least two items on each page, so - * disregard fillfactor if we don't have that many. + * Check to see if current page will fit new item, with space left over to + * append a heap TID during suffix truncation when page is a leaf page. + * + * It is guaranteed that we can fit at least 2 non-pivot tuples plus a + * high key with heap TID when finishing off a leaf page, since we rely on + * _bt_check_third_page() rejecting oversized non-pivot tuples. On + * internal pages we can always fit 3 pivot tuples with larger internal + * page tuple limit (includes page high key). + * + * Most of the time, a page is only "full" in the sense that the soft + * fillfactor-wise limit has been exceeded. However, we must always leave + * at least two items plus a high key on each page before starting a new + * page. Disregard fillfactor and insert on "full" current page if we + * don't have the minimum number of items yet. (Note that we deliberately + * assume that suffix truncation neither enlarges nor shrinks new high key + * when applying soft limit.) */ - if (pgspc < itupsz || (pgspc < state->btps_full && last_off > P_FIRSTKEY)) + if (pgspc < itupsz + (isleaf ? MAXALIGN(sizeof(ItemPointerData)) : 0) || + (pgspc < state->btps_full && last_off > P_FIRSTKEY)) { /* * Finish off the page and write it out. @@ -889,7 +906,6 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) ItemId ii; ItemId hii; IndexTuple oitup; - BTPageOpaque opageop = (BTPageOpaque) PageGetSpecialPointer(opage); /* Create new page of same level */ npage = _bt_blnewpage(state->btps_level); @@ -910,14 +926,20 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) _bt_sortaddtup(npage, ItemIdGetLength(ii), oitup, P_FIRSTKEY); /* - * Move 'last' into the high key position on opage + * Move 'last' into the high key position on opage. _bt_blnewpage() + * allocated empty space for a line pointer when opage was first + * created, so this is a matter of rearranging already-allocated space + * on page, and initializing high key line pointer. (Actually, leaf + * pages must also swap oitup with a truncated version of oitup, which + * is sometimes larger than oitup, though never by more than the space + * needed to append a heap TID.) */ hii = PageGetItemId(opage, P_HIKEY); *hii = *ii; ItemIdSetUnused(ii); /* redundant */ ((PageHeader) opage)->pd_lower -= sizeof(ItemIdData); - if (P_ISLEAF(opageop)) + if (isleaf) { IndexTuple lastleft; IndexTuple truncated; @@ -943,15 +965,13 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) * tuple, it cannot just be copied in place (besides, we want * to actually save space on the leaf page). We delete the * original high key, and add our own truncated high key at the - * same offset. It's okay if the truncated tuple is slightly - * larger due to containing a heap TID value, since this case is - * known to _bt_check_third_page(), which reserves space. + * same offset. * * Note that the page layout won't be changed very much. oitup is * already located at the physical beginning of tuple space, so we * only shift the line pointer array back and forth, and overwrite - * the latter portion of the space occupied by the original tuple. - * This is fairly cheap. + * the tuple space previously occupied by oitup. This is fairly + * cheap. */ ii = PageGetItemId(opage, OffsetNumberPrev(last_off)); lastleft = (IndexTuple) PageGetItem(opage, ii); @@ -979,9 +999,9 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) Assert((BTreeTupleGetNAtts(state->btps_minkey, wstate->index) <= IndexRelationGetNumberOfKeyAttributes(wstate->index) && BTreeTupleGetNAtts(state->btps_minkey, wstate->index) > 0) || - P_LEFTMOST(opageop)); + P_LEFTMOST((BTPageOpaque) PageGetSpecialPointer(opage))); Assert(BTreeTupleGetNAtts(state->btps_minkey, wstate->index) == 0 || - !P_LEFTMOST(opageop)); + !P_LEFTMOST((BTPageOpaque) PageGetSpecialPointer(opage))); BTreeInnerTupleSetDownLink(state->btps_minkey, oblkno); _bt_buildadd(wstate, state->btps_next, state->btps_minkey); pfree(state->btps_minkey); @@ -1018,6 +1038,10 @@ _bt_buildadd(BTWriteState *wstate, BTPageState *state, IndexTuple itup) } /* + * By here, either original page is still the current page, or a new page + * was created that became the current page. Either way, the current page + * definitely has space for new item. + * * If the new item is the first for its page, stash a copy for later. Note * this will only happen for the first item on a level; on later pages, * the first item for a page is copied from the prior page in the code |