diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/access/index/indexam.c | 7 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtree.c | 120 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtsearch.c | 68 | ||||
-rw-r--r-- | src/backend/access/nbtree/nbtutils.c | 39 | ||||
-rw-r--r-- | src/backend/executor/nodeIndexscan.c | 2 | ||||
-rw-r--r-- | src/include/access/nbtree.h | 22 | ||||
-rw-r--r-- | src/include/access/relscan.h | 2 |
7 files changed, 130 insertions, 130 deletions
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c index 3e0797a5c2e..6d423a7d682 100644 --- a/src/backend/access/index/indexam.c +++ b/src/backend/access/index/indexam.c @@ -443,9 +443,10 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction) Assert(TransactionIdIsValid(RecentGlobalXmin)); /* - * The AM's gettuple proc finds the next index entry matching the scan - * keys, and puts the TID in xs_ctup.t_self. It should also set - * scan->xs_recheck, though we pay no attention to that here. + * The AM's amgettuple proc finds the next index entry matching the scan + * keys, and puts the TID into scan->xs_ctup.t_self. It should also set + * scan->xs_recheck and possibly scan->xs_itup, though we pay no attention + * to those fields here. */ found = DatumGetBool(FunctionCall2(procedure, PointerGetDatum(scan), diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c index 11b57659ee8..80e703b1906 100644 --- a/src/backend/access/nbtree/nbtree.c +++ b/src/backend/access/nbtree/nbtree.c @@ -73,7 +73,6 @@ static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, BTCycleId cycleid); static void btvacuumpage(BTVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno); -static IndexTuple bt_getindextuple(IndexScanDesc scan); /* @@ -311,95 +310,10 @@ btgettuple(PG_FUNCTION_ARGS) else res = _bt_first(scan, dir); - /* Return the whole index tuple if requested */ - if (scan->xs_want_itup) - { - /* First, free the last one ... */ - if (scan->xs_itup != NULL) - { - pfree(scan->xs_itup); - scan->xs_itup = NULL; - } - - if (res) - scan->xs_itup = bt_getindextuple(scan); - } - PG_RETURN_BOOL(res); } /* - * bt_getindextuple - fetch index tuple at current position. - * - * This can fail to find the tuple if new tuples have been inserted on the - * index page since we stepped onto the page. NULL is returned in that case. - * (We could try a bit harder by searching for the TID; but if insertions - * are happening, it's reasonably likely that an index-only scan will fail - * anyway because of visibility. So probably not worth the trouble.) - * - * The tuple returned is a palloc'd copy, so that we don't need to keep a - * lock on the index page. - * - * The caller must have pin on so->currPos.buf. - */ -static IndexTuple -bt_getindextuple(IndexScanDesc scan) -{ - BTScanOpaque so = (BTScanOpaque) scan->opaque; - Page page; - BTPageOpaque opaque; - OffsetNumber minoff; - OffsetNumber maxoff; - int itemIndex; - OffsetNumber offnum; - IndexTuple ituple, - result; - - Assert(BufferIsValid(so->currPos.buf)); - - LockBuffer(so->currPos.buf, BT_READ); - - /* Locate the tuple, being paranoid about possibility the page changed */ - page = BufferGetPage(so->currPos.buf); - opaque = (BTPageOpaque) PageGetSpecialPointer(page); - minoff = P_FIRSTDATAKEY(opaque); - maxoff = PageGetMaxOffsetNumber(page); - - itemIndex = so->currPos.itemIndex; - /* pure paranoia */ - Assert(itemIndex >= so->currPos.firstItem && - itemIndex <= so->currPos.lastItem); - - offnum = so->currPos.items[itemIndex].indexOffset; - if (offnum < minoff || offnum > maxoff) - { - /* should never happen, since we have pin on page, but be careful */ - LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); - return NULL; - } - - ituple = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); - - if (ItemPointerEquals(&ituple->t_tid, &scan->xs_ctup.t_self)) - { - /* yup, it's the desired tuple, so make a copy */ - Size itupsz = IndexTupleSize(ituple); - - result = palloc(itupsz); - memcpy(result, ituple, itupsz); - } - else - { - /* oops, it got moved */ - result = NULL; - } - - LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); - - return result; -} - -/* * btgetbitmap() -- gets all matching tuples, and adds them to a bitmap */ Datum @@ -471,6 +385,15 @@ btbeginscan(PG_FUNCTION_ARGS) so->keyData = NULL; so->killedItems = NULL; /* until needed */ so->numKilled = 0; + + /* + * We don't know yet whether the scan will be index-only, so we do not + * allocate the tuple workspace arrays until btrescan. + */ + so->currTuples = so->markTuples = NULL; + so->currPos.nextTupleOffset = 0; + so->markPos.nextTupleOffset = 0; + scan->opaque = so; PG_RETURN_POINTER(scan); @@ -506,6 +429,18 @@ btrescan(PG_FUNCTION_ARGS) so->markItemIndex = -1; /* + * Allocate tuple workspace arrays, if needed for an index-only scan and + * not already done in a previous rescan call. To save on palloc + * overhead, both workspaces are allocated as one palloc block; only this + * function and btendscan know that. + */ + if (scan->xs_want_itup && so->currTuples == NULL) + { + so->currTuples = (char *) palloc(BLCKSZ * 2); + so->markTuples = so->currTuples + BLCKSZ; + } + + /* * Reset the scan keys. Note that keys ordering stuff moved to _bt_first. * - vadim 05/05/97 */ @@ -544,18 +479,16 @@ btendscan(PG_FUNCTION_ARGS) } so->markItemIndex = -1; + /* Release storage */ if (so->killedItems != NULL) pfree(so->killedItems); if (so->keyData != NULL) pfree(so->keyData); + if (so->currTuples != NULL) + pfree(so->currTuples); + /* so->markTuples should not be pfree'd, see btrescan */ pfree(so); - if (scan->xs_itup != NULL) - { - pfree(scan->xs_itup); - scan->xs_itup = NULL; - } - PG_RETURN_VOID(); } @@ -626,6 +559,9 @@ btrestrpos(PG_FUNCTION_ARGS) memcpy(&so->currPos, &so->markPos, offsetof(BTScanPosData, items[1]) + so->markPos.lastItem * sizeof(BTScanPosItem)); + if (so->currTuples) + memcpy(so->currTuples, so->markTuples, + so->markPos.nextTupleOffset); } } diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c index 313e574f969..f4bea8cd537 100644 --- a/src/backend/access/nbtree/nbtsearch.c +++ b/src/backend/access/nbtree/nbtsearch.c @@ -26,6 +26,8 @@ static bool _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum); +static void _bt_saveitem(BTScanOpaque so, int itemIndex, + OffsetNumber offnum, IndexTuple itup); static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir); static Buffer _bt_walk_left(Relation rel, Buffer buf); static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir); @@ -429,8 +431,9 @@ _bt_compare(Relation rel, * if backwards scan, the last item) in the tree that satisfies the * qualifications in the scan key. On success exit, the page containing * the current index tuple is pinned but not locked, and data about - * the matching tuple(s) on the page has been loaded into so->currPos, - * and scan->xs_ctup.t_self is set to the heap TID of the current tuple. + * the matching tuple(s) on the page has been loaded into so->currPos. + * scan->xs_ctup.t_self is set to the heap TID of the current tuple, + * and if requested, scan->xs_itup points to a copy of the index tuple. * * If there are no matching items in the index, we return FALSE, with no * pins or locks held. @@ -456,6 +459,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) int keysCount = 0; int i; StrategyNumber strat_total; + BTScanPosItem *currItem; pgstat_count_index_scan(rel); @@ -912,7 +916,10 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); /* OK, itemIndex says what to return */ - scan->xs_ctup.t_self = so->currPos.items[so->currPos.itemIndex].heapTid; + currItem = &so->currPos.items[so->currPos.itemIndex]; + scan->xs_ctup.t_self = currItem->heapTid; + if (scan->xs_want_itup) + scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); return true; } @@ -925,7 +932,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir) * previously returned. * * On successful exit, scan->xs_ctup.t_self is set to the TID of the - * next heap tuple, and so->currPos is updated as needed. + * next heap tuple, and if requested, scan->xs_itup points to a copy of + * the index tuple. so->currPos is updated as needed. * * On failure exit (no more tuples), we release pin and set * so->currPos.buf to InvalidBuffer. @@ -934,6 +942,7 @@ bool _bt_next(IndexScanDesc scan, ScanDirection dir) { BTScanOpaque so = (BTScanOpaque) scan->opaque; + BTScanPosItem *currItem; /* * Advance to next tuple on current page; or if there's no more, try to @@ -967,7 +976,10 @@ _bt_next(IndexScanDesc scan, ScanDirection dir) } /* OK, itemIndex says what to return */ - scan->xs_ctup.t_self = so->currPos.items[so->currPos.itemIndex].heapTid; + currItem = &so->currPos.items[so->currPos.itemIndex]; + scan->xs_ctup.t_self = currItem->heapTid; + if (scan->xs_want_itup) + scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); return true; } @@ -996,6 +1008,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum) OffsetNumber minoff; OffsetNumber maxoff; int itemIndex; + IndexTuple itup; bool continuescan; /* we must have the buffer pinned and locked */ @@ -1013,6 +1026,9 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum) */ so->currPos.nextPage = opaque->btpo_next; + /* initialize tuple workspace to empty */ + so->currPos.nextTupleOffset = 0; + if (ScanDirectionIsForward(dir)) { /* load items[] in ascending order */ @@ -1022,12 +1038,11 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum) while (offnum <= maxoff) { - if (_bt_checkkeys(scan, page, offnum, dir, &continuescan)) + itup = _bt_checkkeys(scan, page, offnum, dir, &continuescan); + if (itup != NULL) { /* tuple passes all scan key conditions, so remember it */ - /* _bt_checkkeys put the heap ptr into scan->xs_ctup.t_self */ - so->currPos.items[itemIndex].heapTid = scan->xs_ctup.t_self; - so->currPos.items[itemIndex].indexOffset = offnum; + _bt_saveitem(so, itemIndex, offnum, itup); itemIndex++; } if (!continuescan) @@ -1054,13 +1069,12 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum) while (offnum >= minoff) { - if (_bt_checkkeys(scan, page, offnum, dir, &continuescan)) + itup = _bt_checkkeys(scan, page, offnum, dir, &continuescan); + if (itup != NULL) { /* tuple passes all scan key conditions, so remember it */ - /* _bt_checkkeys put the heap ptr into scan->xs_ctup.t_self */ itemIndex--; - so->currPos.items[itemIndex].heapTid = scan->xs_ctup.t_self; - so->currPos.items[itemIndex].indexOffset = offnum; + _bt_saveitem(so, itemIndex, offnum, itup); } if (!continuescan) { @@ -1081,6 +1095,25 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum) return (so->currPos.firstItem <= so->currPos.lastItem); } +/* Save an index item into so->currPos.items[itemIndex] */ +static void +_bt_saveitem(BTScanOpaque so, int itemIndex, + OffsetNumber offnum, IndexTuple itup) +{ + BTScanPosItem *currItem = &so->currPos.items[itemIndex]; + + currItem->heapTid = itup->t_tid; + currItem->indexOffset = offnum; + if (so->currTuples) + { + Size itupsz = IndexTupleSize(itup); + + currItem->tupleOffset = so->currPos.nextTupleOffset; + memcpy(so->currTuples + so->currPos.nextTupleOffset, itup, itupsz); + so->currPos.nextTupleOffset += MAXALIGN(itupsz); + } +} + /* * _bt_steppage() -- Step to next page containing valid data for scan * @@ -1119,6 +1152,9 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir) memcpy(&so->markPos, &so->currPos, offsetof(BTScanPosData, items[1]) + so->currPos.lastItem * sizeof(BTScanPosItem)); + if (so->markTuples) + memcpy(so->markTuples, so->currTuples, + so->currPos.nextTupleOffset); so->markPos.itemIndex = so->markItemIndex; so->markItemIndex = -1; } @@ -1428,6 +1464,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) Page page; BTPageOpaque opaque; OffsetNumber start; + BTScanPosItem *currItem; /* * Scan down to the leftmost or rightmost leaf page. This is a simplified @@ -1505,7 +1542,10 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK); /* OK, itemIndex says what to return */ - scan->xs_ctup.t_self = so->currPos.items[so->currPos.itemIndex].heapTid; + currItem = &so->currPos.items[so->currPos.itemIndex]; + scan->xs_ctup.t_self = currItem->heapTid; + if (scan->xs_want_itup) + scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset); return true; } diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index 3ca75b79ccb..b6fb3867bf0 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -835,8 +835,8 @@ _bt_mark_scankey_required(ScanKey skey) /* * Test whether an indextuple satisfies all the scankey conditions. * - * If so, copy its TID into scan->xs_ctup.t_self, and return TRUE. - * If not, return FALSE (xs_ctup is not changed). + * If so, return the address of the index tuple on the index page. + * If not, return NULL. * * If the tuple fails to pass the qual, we also determine whether there's * any need to continue the scan beyond this tuple, and set *continuescan @@ -848,14 +848,16 @@ _bt_mark_scankey_required(ScanKey skey) * offnum: offset number of index tuple (must be a valid item!) * dir: direction we are scanning in * continuescan: output parameter (will be set correctly in all cases) + * + * Caller must hold pin and lock on the index page. */ -bool +IndexTuple _bt_checkkeys(IndexScanDesc scan, Page page, OffsetNumber offnum, ScanDirection dir, bool *continuescan) { ItemId iid = PageGetItemId(page, offnum); - bool tuple_valid; + bool tuple_alive; IndexTuple tuple; TupleDesc tupdesc; BTScanOpaque so; @@ -879,24 +881,24 @@ _bt_checkkeys(IndexScanDesc scan, if (ScanDirectionIsForward(dir)) { if (offnum < PageGetMaxOffsetNumber(page)) - return false; + return NULL; } else { BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (offnum > P_FIRSTDATAKEY(opaque)) - return false; + return NULL; } /* - * OK, we want to check the keys, but we'll return FALSE even if the - * tuple passes the key tests. + * OK, we want to check the keys so we can set continuescan correctly, + * but we'll return NULL even if the tuple passes the key tests. */ - tuple_valid = false; + tuple_alive = false; } else - tuple_valid = true; + tuple_alive = true; tuple = (IndexTuple) PageGetItem(page, iid); @@ -915,7 +917,7 @@ _bt_checkkeys(IndexScanDesc scan, { if (_bt_check_rowcompare(key, tuple, tupdesc, dir, continuescan)) continue; - return false; + return NULL; } datum = index_getattr(tuple, @@ -953,7 +955,7 @@ _bt_checkkeys(IndexScanDesc scan, /* * In any case, this indextuple doesn't match the qual. */ - return false; + return NULL; } if (isNull) @@ -988,7 +990,7 @@ _bt_checkkeys(IndexScanDesc scan, /* * In any case, this indextuple doesn't match the qual. */ - return false; + return NULL; } test = FunctionCall2Coll(&key->sk_func, key->sk_collation, @@ -1016,15 +1018,16 @@ _bt_checkkeys(IndexScanDesc scan, /* * In any case, this indextuple doesn't match the qual. */ - return false; + return NULL; } } - /* If we get here, the tuple passes all index quals. */ - if (tuple_valid) - scan->xs_ctup.t_self = tuple->t_tid; + /* Check for failure due to it being a killed tuple. */ + if (!tuple_alive) + return NULL; - return tuple_valid; + /* If we get here, the tuple passes all index quals. */ + return tuple; } /* diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index 32ed65797ae..56b9855094a 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -92,7 +92,7 @@ IndexNext(IndexScanState *node) * scan's xs_cbuf, ie, the previously visited heap page. It's not * clear whether it'd be better to release that pin. */ - if (scandesc->xs_itup != NULL && + if (scandesc->xs_want_itup && visibilitymap_test(scandesc->heapRelation, ItemPointerGetBlockNumber(tid), &node->iss_VMBuffer)) diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h index 4e20c79ca6e..199fc940267 100644 --- a/src/include/access/nbtree.h +++ b/src/include/access/nbtree.h @@ -472,12 +472,18 @@ typedef BTStackData *BTStack; * items were killed, we re-lock the page to mark them killed, then unlock. * Finally we drop the pin and step to the next page in the appropriate * direction. + * + * If we are doing an index-only scan, we save the entire IndexTuple for each + * matched item, otherwise only its heap TID and offset. The IndexTuples go + * into a separate workspace array; each BTScanPosItem stores its tuple's + * offset within that array. */ typedef struct BTScanPosItem /* what we remember about each match */ { ItemPointerData heapTid; /* TID of referenced heap item */ OffsetNumber indexOffset; /* index item's location within page */ + LocationIndex tupleOffset; /* IndexTuple's offset in workspace, if any */ } BTScanPosItem; typedef struct BTScanPosData @@ -496,6 +502,12 @@ typedef struct BTScanPosData bool moreRight; /* + * If we are doing an index-only scan, nextTupleOffset is the first free + * location in the associated tuple storage workspace. + */ + int nextTupleOffset; + + /* * The items array is always ordered in index order (ie, increasing * indexoffset). When scanning backwards it is convenient to fill the * array back-to-front, so we start at the last slot and fill downwards. @@ -525,6 +537,14 @@ typedef struct BTScanOpaqueData int numKilled; /* number of currently stored items */ /* + * If we are doing an index-only scan, these are the tuple storage + * workspaces for the currPos and markPos respectively. Each is of + * size BLCKSZ, so it can hold as much as a full page's worth of tuples. + */ + char *currTuples; /* tuple storage for currPos */ + char *markTuples; /* tuple storage for markPos */ + + /* * If the marked position is on the same page as current position, we * don't use markPos, but just keep the marked itemIndex in markItemIndex * (all the rest of currPos is valid for the mark position). Hence, to @@ -620,7 +640,7 @@ extern ScanKey _bt_mkscankey_nodata(Relation rel); extern void _bt_freeskey(ScanKey skey); extern void _bt_freestack(BTStack stack); extern void _bt_preprocess_keys(IndexScanDesc scan); -extern bool _bt_checkkeys(IndexScanDesc scan, +extern IndexTuple _bt_checkkeys(IndexScanDesc scan, Page page, OffsetNumber offnum, ScanDirection dir, bool *continuescan); extern void _bt_killitems(IndexScanDesc scan, bool haveLock); diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h index 656aefcceec..d48bbf865ea 100644 --- a/src/include/access/relscan.h +++ b/src/include/access/relscan.h @@ -79,7 +79,7 @@ typedef struct IndexScanDescData void *opaque; /* access-method-specific info */ /* in an index-only scan, this is valid after a successful amgettuple */ - IndexTuple xs_itup; /* index tuple returned by AM, or NULL */ + IndexTuple xs_itup; /* index tuple returned by AM */ /* xs_ctup/xs_cbuf/xs_recheck are valid after a successful index_getnext */ HeapTupleData xs_ctup; /* current heap tuple, if any */ |