aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/index/indexam.c7
-rw-r--r--src/backend/access/nbtree/nbtree.c120
-rw-r--r--src/backend/access/nbtree/nbtsearch.c68
-rw-r--r--src/backend/access/nbtree/nbtutils.c39
-rw-r--r--src/backend/executor/nodeIndexscan.c2
-rw-r--r--src/include/access/nbtree.h22
-rw-r--r--src/include/access/relscan.h2
7 files changed, 130 insertions, 130 deletions
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index 3e0797a5c2e..6d423a7d682 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -443,9 +443,10 @@ index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
Assert(TransactionIdIsValid(RecentGlobalXmin));
/*
- * The AM's gettuple proc finds the next index entry matching the scan
- * keys, and puts the TID in xs_ctup.t_self. It should also set
- * scan->xs_recheck, though we pay no attention to that here.
+ * The AM's amgettuple proc finds the next index entry matching the scan
+ * keys, and puts the TID into scan->xs_ctup.t_self. It should also set
+ * scan->xs_recheck and possibly scan->xs_itup, though we pay no attention
+ * to those fields here.
*/
found = DatumGetBool(FunctionCall2(procedure,
PointerGetDatum(scan),
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 11b57659ee8..80e703b1906 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -73,7 +73,6 @@ static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
BTCycleId cycleid);
static void btvacuumpage(BTVacState *vstate, BlockNumber blkno,
BlockNumber orig_blkno);
-static IndexTuple bt_getindextuple(IndexScanDesc scan);
/*
@@ -311,95 +310,10 @@ btgettuple(PG_FUNCTION_ARGS)
else
res = _bt_first(scan, dir);
- /* Return the whole index tuple if requested */
- if (scan->xs_want_itup)
- {
- /* First, free the last one ... */
- if (scan->xs_itup != NULL)
- {
- pfree(scan->xs_itup);
- scan->xs_itup = NULL;
- }
-
- if (res)
- scan->xs_itup = bt_getindextuple(scan);
- }
-
PG_RETURN_BOOL(res);
}
/*
- * bt_getindextuple - fetch index tuple at current position.
- *
- * This can fail to find the tuple if new tuples have been inserted on the
- * index page since we stepped onto the page. NULL is returned in that case.
- * (We could try a bit harder by searching for the TID; but if insertions
- * are happening, it's reasonably likely that an index-only scan will fail
- * anyway because of visibility. So probably not worth the trouble.)
- *
- * The tuple returned is a palloc'd copy, so that we don't need to keep a
- * lock on the index page.
- *
- * The caller must have pin on so->currPos.buf.
- */
-static IndexTuple
-bt_getindextuple(IndexScanDesc scan)
-{
- BTScanOpaque so = (BTScanOpaque) scan->opaque;
- Page page;
- BTPageOpaque opaque;
- OffsetNumber minoff;
- OffsetNumber maxoff;
- int itemIndex;
- OffsetNumber offnum;
- IndexTuple ituple,
- result;
-
- Assert(BufferIsValid(so->currPos.buf));
-
- LockBuffer(so->currPos.buf, BT_READ);
-
- /* Locate the tuple, being paranoid about possibility the page changed */
- page = BufferGetPage(so->currPos.buf);
- opaque = (BTPageOpaque) PageGetSpecialPointer(page);
- minoff = P_FIRSTDATAKEY(opaque);
- maxoff = PageGetMaxOffsetNumber(page);
-
- itemIndex = so->currPos.itemIndex;
- /* pure paranoia */
- Assert(itemIndex >= so->currPos.firstItem &&
- itemIndex <= so->currPos.lastItem);
-
- offnum = so->currPos.items[itemIndex].indexOffset;
- if (offnum < minoff || offnum > maxoff)
- {
- /* should never happen, since we have pin on page, but be careful */
- LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
- return NULL;
- }
-
- ituple = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
-
- if (ItemPointerEquals(&ituple->t_tid, &scan->xs_ctup.t_self))
- {
- /* yup, it's the desired tuple, so make a copy */
- Size itupsz = IndexTupleSize(ituple);
-
- result = palloc(itupsz);
- memcpy(result, ituple, itupsz);
- }
- else
- {
- /* oops, it got moved */
- result = NULL;
- }
-
- LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
-
- return result;
-}
-
-/*
* btgetbitmap() -- gets all matching tuples, and adds them to a bitmap
*/
Datum
@@ -471,6 +385,15 @@ btbeginscan(PG_FUNCTION_ARGS)
so->keyData = NULL;
so->killedItems = NULL; /* until needed */
so->numKilled = 0;
+
+ /*
+ * We don't know yet whether the scan will be index-only, so we do not
+ * allocate the tuple workspace arrays until btrescan.
+ */
+ so->currTuples = so->markTuples = NULL;
+ so->currPos.nextTupleOffset = 0;
+ so->markPos.nextTupleOffset = 0;
+
scan->opaque = so;
PG_RETURN_POINTER(scan);
@@ -506,6 +429,18 @@ btrescan(PG_FUNCTION_ARGS)
so->markItemIndex = -1;
/*
+ * Allocate tuple workspace arrays, if needed for an index-only scan and
+ * not already done in a previous rescan call. To save on palloc
+ * overhead, both workspaces are allocated as one palloc block; only this
+ * function and btendscan know that.
+ */
+ if (scan->xs_want_itup && so->currTuples == NULL)
+ {
+ so->currTuples = (char *) palloc(BLCKSZ * 2);
+ so->markTuples = so->currTuples + BLCKSZ;
+ }
+
+ /*
* Reset the scan keys. Note that keys ordering stuff moved to _bt_first.
* - vadim 05/05/97
*/
@@ -544,18 +479,16 @@ btendscan(PG_FUNCTION_ARGS)
}
so->markItemIndex = -1;
+ /* Release storage */
if (so->killedItems != NULL)
pfree(so->killedItems);
if (so->keyData != NULL)
pfree(so->keyData);
+ if (so->currTuples != NULL)
+ pfree(so->currTuples);
+ /* so->markTuples should not be pfree'd, see btrescan */
pfree(so);
- if (scan->xs_itup != NULL)
- {
- pfree(scan->xs_itup);
- scan->xs_itup = NULL;
- }
-
PG_RETURN_VOID();
}
@@ -626,6 +559,9 @@ btrestrpos(PG_FUNCTION_ARGS)
memcpy(&so->currPos, &so->markPos,
offsetof(BTScanPosData, items[1]) +
so->markPos.lastItem * sizeof(BTScanPosItem));
+ if (so->currTuples)
+ memcpy(so->currTuples, so->markTuples,
+ so->markPos.nextTupleOffset);
}
}
diff --git a/src/backend/access/nbtree/nbtsearch.c b/src/backend/access/nbtree/nbtsearch.c
index 313e574f969..f4bea8cd537 100644
--- a/src/backend/access/nbtree/nbtsearch.c
+++ b/src/backend/access/nbtree/nbtsearch.c
@@ -26,6 +26,8 @@
static bool _bt_readpage(IndexScanDesc scan, ScanDirection dir,
OffsetNumber offnum);
+static void _bt_saveitem(BTScanOpaque so, int itemIndex,
+ OffsetNumber offnum, IndexTuple itup);
static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir);
static Buffer _bt_walk_left(Relation rel, Buffer buf);
static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
@@ -429,8 +431,9 @@ _bt_compare(Relation rel,
* if backwards scan, the last item) in the tree that satisfies the
* qualifications in the scan key. On success exit, the page containing
* the current index tuple is pinned but not locked, and data about
- * the matching tuple(s) on the page has been loaded into so->currPos,
- * and scan->xs_ctup.t_self is set to the heap TID of the current tuple.
+ * the matching tuple(s) on the page has been loaded into so->currPos.
+ * scan->xs_ctup.t_self is set to the heap TID of the current tuple,
+ * and if requested, scan->xs_itup points to a copy of the index tuple.
*
* If there are no matching items in the index, we return FALSE, with no
* pins or locks held.
@@ -456,6 +459,7 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
int keysCount = 0;
int i;
StrategyNumber strat_total;
+ BTScanPosItem *currItem;
pgstat_count_index_scan(rel);
@@ -912,7 +916,10 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
/* OK, itemIndex says what to return */
- scan->xs_ctup.t_self = so->currPos.items[so->currPos.itemIndex].heapTid;
+ currItem = &so->currPos.items[so->currPos.itemIndex];
+ scan->xs_ctup.t_self = currItem->heapTid;
+ if (scan->xs_want_itup)
+ scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
return true;
}
@@ -925,7 +932,8 @@ _bt_first(IndexScanDesc scan, ScanDirection dir)
* previously returned.
*
* On successful exit, scan->xs_ctup.t_self is set to the TID of the
- * next heap tuple, and so->currPos is updated as needed.
+ * next heap tuple, and if requested, scan->xs_itup points to a copy of
+ * the index tuple. so->currPos is updated as needed.
*
* On failure exit (no more tuples), we release pin and set
* so->currPos.buf to InvalidBuffer.
@@ -934,6 +942,7 @@ bool
_bt_next(IndexScanDesc scan, ScanDirection dir)
{
BTScanOpaque so = (BTScanOpaque) scan->opaque;
+ BTScanPosItem *currItem;
/*
* Advance to next tuple on current page; or if there's no more, try to
@@ -967,7 +976,10 @@ _bt_next(IndexScanDesc scan, ScanDirection dir)
}
/* OK, itemIndex says what to return */
- scan->xs_ctup.t_self = so->currPos.items[so->currPos.itemIndex].heapTid;
+ currItem = &so->currPos.items[so->currPos.itemIndex];
+ scan->xs_ctup.t_self = currItem->heapTid;
+ if (scan->xs_want_itup)
+ scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
return true;
}
@@ -996,6 +1008,7 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
OffsetNumber minoff;
OffsetNumber maxoff;
int itemIndex;
+ IndexTuple itup;
bool continuescan;
/* we must have the buffer pinned and locked */
@@ -1013,6 +1026,9 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
*/
so->currPos.nextPage = opaque->btpo_next;
+ /* initialize tuple workspace to empty */
+ so->currPos.nextTupleOffset = 0;
+
if (ScanDirectionIsForward(dir))
{
/* load items[] in ascending order */
@@ -1022,12 +1038,11 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
while (offnum <= maxoff)
{
- if (_bt_checkkeys(scan, page, offnum, dir, &continuescan))
+ itup = _bt_checkkeys(scan, page, offnum, dir, &continuescan);
+ if (itup != NULL)
{
/* tuple passes all scan key conditions, so remember it */
- /* _bt_checkkeys put the heap ptr into scan->xs_ctup.t_self */
- so->currPos.items[itemIndex].heapTid = scan->xs_ctup.t_self;
- so->currPos.items[itemIndex].indexOffset = offnum;
+ _bt_saveitem(so, itemIndex, offnum, itup);
itemIndex++;
}
if (!continuescan)
@@ -1054,13 +1069,12 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
while (offnum >= minoff)
{
- if (_bt_checkkeys(scan, page, offnum, dir, &continuescan))
+ itup = _bt_checkkeys(scan, page, offnum, dir, &continuescan);
+ if (itup != NULL)
{
/* tuple passes all scan key conditions, so remember it */
- /* _bt_checkkeys put the heap ptr into scan->xs_ctup.t_self */
itemIndex--;
- so->currPos.items[itemIndex].heapTid = scan->xs_ctup.t_self;
- so->currPos.items[itemIndex].indexOffset = offnum;
+ _bt_saveitem(so, itemIndex, offnum, itup);
}
if (!continuescan)
{
@@ -1081,6 +1095,25 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum)
return (so->currPos.firstItem <= so->currPos.lastItem);
}
+/* Save an index item into so->currPos.items[itemIndex] */
+static void
+_bt_saveitem(BTScanOpaque so, int itemIndex,
+ OffsetNumber offnum, IndexTuple itup)
+{
+ BTScanPosItem *currItem = &so->currPos.items[itemIndex];
+
+ currItem->heapTid = itup->t_tid;
+ currItem->indexOffset = offnum;
+ if (so->currTuples)
+ {
+ Size itupsz = IndexTupleSize(itup);
+
+ currItem->tupleOffset = so->currPos.nextTupleOffset;
+ memcpy(so->currTuples + so->currPos.nextTupleOffset, itup, itupsz);
+ so->currPos.nextTupleOffset += MAXALIGN(itupsz);
+ }
+}
+
/*
* _bt_steppage() -- Step to next page containing valid data for scan
*
@@ -1119,6 +1152,9 @@ _bt_steppage(IndexScanDesc scan, ScanDirection dir)
memcpy(&so->markPos, &so->currPos,
offsetof(BTScanPosData, items[1]) +
so->currPos.lastItem * sizeof(BTScanPosItem));
+ if (so->markTuples)
+ memcpy(so->markTuples, so->currTuples,
+ so->currPos.nextTupleOffset);
so->markPos.itemIndex = so->markItemIndex;
so->markItemIndex = -1;
}
@@ -1428,6 +1464,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
Page page;
BTPageOpaque opaque;
OffsetNumber start;
+ BTScanPosItem *currItem;
/*
* Scan down to the leftmost or rightmost leaf page. This is a simplified
@@ -1505,7 +1542,10 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
LockBuffer(so->currPos.buf, BUFFER_LOCK_UNLOCK);
/* OK, itemIndex says what to return */
- scan->xs_ctup.t_self = so->currPos.items[so->currPos.itemIndex].heapTid;
+ currItem = &so->currPos.items[so->currPos.itemIndex];
+ scan->xs_ctup.t_self = currItem->heapTid;
+ if (scan->xs_want_itup)
+ scan->xs_itup = (IndexTuple) (so->currTuples + currItem->tupleOffset);
return true;
}
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index 3ca75b79ccb..b6fb3867bf0 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -835,8 +835,8 @@ _bt_mark_scankey_required(ScanKey skey)
/*
* Test whether an indextuple satisfies all the scankey conditions.
*
- * If so, copy its TID into scan->xs_ctup.t_self, and return TRUE.
- * If not, return FALSE (xs_ctup is not changed).
+ * If so, return the address of the index tuple on the index page.
+ * If not, return NULL.
*
* If the tuple fails to pass the qual, we also determine whether there's
* any need to continue the scan beyond this tuple, and set *continuescan
@@ -848,14 +848,16 @@ _bt_mark_scankey_required(ScanKey skey)
* offnum: offset number of index tuple (must be a valid item!)
* dir: direction we are scanning in
* continuescan: output parameter (will be set correctly in all cases)
+ *
+ * Caller must hold pin and lock on the index page.
*/
-bool
+IndexTuple
_bt_checkkeys(IndexScanDesc scan,
Page page, OffsetNumber offnum,
ScanDirection dir, bool *continuescan)
{
ItemId iid = PageGetItemId(page, offnum);
- bool tuple_valid;
+ bool tuple_alive;
IndexTuple tuple;
TupleDesc tupdesc;
BTScanOpaque so;
@@ -879,24 +881,24 @@ _bt_checkkeys(IndexScanDesc scan,
if (ScanDirectionIsForward(dir))
{
if (offnum < PageGetMaxOffsetNumber(page))
- return false;
+ return NULL;
}
else
{
BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
if (offnum > P_FIRSTDATAKEY(opaque))
- return false;
+ return NULL;
}
/*
- * OK, we want to check the keys, but we'll return FALSE even if the
- * tuple passes the key tests.
+ * OK, we want to check the keys so we can set continuescan correctly,
+ * but we'll return NULL even if the tuple passes the key tests.
*/
- tuple_valid = false;
+ tuple_alive = false;
}
else
- tuple_valid = true;
+ tuple_alive = true;
tuple = (IndexTuple) PageGetItem(page, iid);
@@ -915,7 +917,7 @@ _bt_checkkeys(IndexScanDesc scan,
{
if (_bt_check_rowcompare(key, tuple, tupdesc, dir, continuescan))
continue;
- return false;
+ return NULL;
}
datum = index_getattr(tuple,
@@ -953,7 +955,7 @@ _bt_checkkeys(IndexScanDesc scan,
/*
* In any case, this indextuple doesn't match the qual.
*/
- return false;
+ return NULL;
}
if (isNull)
@@ -988,7 +990,7 @@ _bt_checkkeys(IndexScanDesc scan,
/*
* In any case, this indextuple doesn't match the qual.
*/
- return false;
+ return NULL;
}
test = FunctionCall2Coll(&key->sk_func, key->sk_collation,
@@ -1016,15 +1018,16 @@ _bt_checkkeys(IndexScanDesc scan,
/*
* In any case, this indextuple doesn't match the qual.
*/
- return false;
+ return NULL;
}
}
- /* If we get here, the tuple passes all index quals. */
- if (tuple_valid)
- scan->xs_ctup.t_self = tuple->t_tid;
+ /* Check for failure due to it being a killed tuple. */
+ if (!tuple_alive)
+ return NULL;
- return tuple_valid;
+ /* If we get here, the tuple passes all index quals. */
+ return tuple;
}
/*
diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c
index 32ed65797ae..56b9855094a 100644
--- a/src/backend/executor/nodeIndexscan.c
+++ b/src/backend/executor/nodeIndexscan.c
@@ -92,7 +92,7 @@ IndexNext(IndexScanState *node)
* scan's xs_cbuf, ie, the previously visited heap page. It's not
* clear whether it'd be better to release that pin.
*/
- if (scandesc->xs_itup != NULL &&
+ if (scandesc->xs_want_itup &&
visibilitymap_test(scandesc->heapRelation,
ItemPointerGetBlockNumber(tid),
&node->iss_VMBuffer))
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 4e20c79ca6e..199fc940267 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -472,12 +472,18 @@ typedef BTStackData *BTStack;
* items were killed, we re-lock the page to mark them killed, then unlock.
* Finally we drop the pin and step to the next page in the appropriate
* direction.
+ *
+ * If we are doing an index-only scan, we save the entire IndexTuple for each
+ * matched item, otherwise only its heap TID and offset. The IndexTuples go
+ * into a separate workspace array; each BTScanPosItem stores its tuple's
+ * offset within that array.
*/
typedef struct BTScanPosItem /* what we remember about each match */
{
ItemPointerData heapTid; /* TID of referenced heap item */
OffsetNumber indexOffset; /* index item's location within page */
+ LocationIndex tupleOffset; /* IndexTuple's offset in workspace, if any */
} BTScanPosItem;
typedef struct BTScanPosData
@@ -496,6 +502,12 @@ typedef struct BTScanPosData
bool moreRight;
/*
+ * If we are doing an index-only scan, nextTupleOffset is the first free
+ * location in the associated tuple storage workspace.
+ */
+ int nextTupleOffset;
+
+ /*
* The items array is always ordered in index order (ie, increasing
* indexoffset). When scanning backwards it is convenient to fill the
* array back-to-front, so we start at the last slot and fill downwards.
@@ -525,6 +537,14 @@ typedef struct BTScanOpaqueData
int numKilled; /* number of currently stored items */
/*
+ * If we are doing an index-only scan, these are the tuple storage
+ * workspaces for the currPos and markPos respectively. Each is of
+ * size BLCKSZ, so it can hold as much as a full page's worth of tuples.
+ */
+ char *currTuples; /* tuple storage for currPos */
+ char *markTuples; /* tuple storage for markPos */
+
+ /*
* If the marked position is on the same page as current position, we
* don't use markPos, but just keep the marked itemIndex in markItemIndex
* (all the rest of currPos is valid for the mark position). Hence, to
@@ -620,7 +640,7 @@ extern ScanKey _bt_mkscankey_nodata(Relation rel);
extern void _bt_freeskey(ScanKey skey);
extern void _bt_freestack(BTStack stack);
extern void _bt_preprocess_keys(IndexScanDesc scan);
-extern bool _bt_checkkeys(IndexScanDesc scan,
+extern IndexTuple _bt_checkkeys(IndexScanDesc scan,
Page page, OffsetNumber offnum,
ScanDirection dir, bool *continuescan);
extern void _bt_killitems(IndexScanDesc scan, bool haveLock);
diff --git a/src/include/access/relscan.h b/src/include/access/relscan.h
index 656aefcceec..d48bbf865ea 100644
--- a/src/include/access/relscan.h
+++ b/src/include/access/relscan.h
@@ -79,7 +79,7 @@ typedef struct IndexScanDescData
void *opaque; /* access-method-specific info */
/* in an index-only scan, this is valid after a successful amgettuple */
- IndexTuple xs_itup; /* index tuple returned by AM, or NULL */
+ IndexTuple xs_itup; /* index tuple returned by AM */
/* xs_ctup/xs_cbuf/xs_recheck are valid after a successful index_getnext */
HeapTupleData xs_ctup; /* current heap tuple, if any */