diff options
Diffstat (limited to 'src/backend/access/gist')
-rw-r--r-- | src/backend/access/gist/gist.c | 10 | ||||
-rw-r--r-- | src/backend/access/gist/gistget.c | 819 | ||||
-rw-r--r-- | src/backend/access/gist/gistproc.c | 97 | ||||
-rw-r--r-- | src/backend/access/gist/gistscan.c | 179 |
4 files changed, 676 insertions, 429 deletions
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 8c2dbc940f4..d6aaea2162d 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -1030,6 +1030,9 @@ gistnewroot(Relation r, Buffer buffer, IndexTuple *itup, int len, ItemPointer ke END_CRIT_SECTION(); } +/* + * Fill a GISTSTATE with information about the index + */ void initGISTstate(GISTSTATE *giststate, Relation index) { @@ -1064,6 +1067,13 @@ initGISTstate(GISTSTATE *giststate, Relation index) fmgr_info_copy(&(giststate->equalFn[i]), index_getprocinfo(index, i + 1, GIST_EQUAL_PROC), CurrentMemoryContext); + /* opclasses are not required to provide a Distance method */ + if (OidIsValid(index_getprocid(index, i + 1, GIST_DISTANCE_PROC))) + fmgr_info_copy(&(giststate->distanceFn[i]), + index_getprocinfo(index, i + 1, GIST_DISTANCE_PROC), + CurrentMemoryContext); + else + giststate->distanceFn[i].fn_oid = InvalidOid; } } diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index fab13eed523..f0418a08af2 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -20,504 +20,553 @@ #include "miscadmin.h" #include "pgstat.h" #include "storage/bufmgr.h" +#include "utils/builtins.h" #include "utils/memutils.h" -static OffsetNumber gistfindnext(IndexScanDesc scan, OffsetNumber n); -static int64 gistnext(IndexScanDesc scan, TIDBitmap *tbm); -static bool gistindex_keytest(IndexTuple tuple, IndexScanDesc scan, - OffsetNumber offset); - -static void -killtuple(Relation r, GISTScanOpaque so, ItemPointer iptr) +/* + * gistindex_keytest() -- does this index tuple satisfy the scan key(s)? + * + * The index tuple might represent either a heap tuple or a lower index page, + * depending on whether the containing page is a leaf page or not. + * + * On success return for a heap tuple, *recheck_p is set to indicate + * whether recheck is needed. We recheck if any of the consistent() functions + * request it. recheck is not interesting when examining a non-leaf entry, + * since we must visit the lower index page if there's any doubt. + * + * If we are doing an ordered scan, so->distances[] is filled with distance + * data from the distance() functions before returning success. + * + * We must decompress the key in the IndexTuple before passing it to the + * sk_funcs (which actually are the opclass Consistent or Distance methods). + * + * Note that this function is always invoked in a short-lived memory context, + * so we don't need to worry about cleaning up allocated memory, either here + * or in the implementation of any Consistent or Distance methods. + */ +static bool +gistindex_keytest(IndexScanDesc scan, + IndexTuple tuple, + Page page, + OffsetNumber offset, + bool *recheck_p) { - Page p; - OffsetNumber offset; + GISTScanOpaque so = (GISTScanOpaque) scan->opaque; + GISTSTATE *giststate = so->giststate; + ScanKey key = scan->keyData; + int keySize = scan->numberOfKeys; + double *distance_p; + Relation r = scan->indexRelation; - LockBuffer(so->curbuf, GIST_SHARE); - gistcheckpage(r, so->curbuf); - p = (Page) BufferGetPage(so->curbuf); + *recheck_p = false; - if (XLByteEQ(so->stack->lsn, PageGetLSN(p))) + /* + * If it's a leftover invalid tuple from pre-9.1, treat it as a match + * with minimum possible distances. This means we'll always follow it + * to the referenced page. + */ + if (GistTupleIsInvalid(tuple)) { - /* page unchanged, so all is simple */ - offset = ItemPointerGetOffsetNumber(iptr); - ItemIdMarkDead(PageGetItemId(p, offset)); - SetBufferCommitInfoNeedsSave(so->curbuf); + int i; + + for (i = 0; i < scan->numberOfOrderBys; i++) + so->distances[i] = -get_float8_infinity(); + *recheck_p = true; /* probably unnecessary */ + return true; } - else + + /* Check whether it matches according to the Consistent functions */ + while (keySize > 0) { - OffsetNumber maxoff = PageGetMaxOffsetNumber(p); + Datum datum; + bool isNull; - for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset)) - { - IndexTuple ituple = (IndexTuple) PageGetItem(p, PageGetItemId(p, offset)); + datum = index_getattr(tuple, + key->sk_attno, + giststate->tupdesc, + &isNull); - if (ItemPointerEquals(&(ituple->t_tid), iptr)) + if (key->sk_flags & SK_ISNULL) + { + /* + * On non-leaf page we can't conclude that child hasn't NULL + * values because of assumption in GiST: union (VAL, NULL) is VAL. + * But if on non-leaf page key IS NULL, then all children are + * NULL. + */ + if (key->sk_flags & SK_SEARCHNULL) { - /* found */ - ItemIdMarkDead(PageGetItemId(p, offset)); - SetBufferCommitInfoNeedsSave(so->curbuf); - break; + if (GistPageIsLeaf(page) && !isNull) + return false; + } + else + { + Assert(key->sk_flags & SK_SEARCHNOTNULL); + if (isNull) + return false; } } - } + else if (isNull) + { + return false; + } + else + { + Datum test; + bool recheck; + GISTENTRY de; - LockBuffer(so->curbuf, GIST_UNLOCK); -} + gistdentryinit(giststate, key->sk_attno - 1, &de, + datum, r, page, offset, + FALSE, isNull); -/* - * gistgettuple() -- Get the next tuple in the scan - */ -Datum -gistgettuple(PG_FUNCTION_ARGS) -{ - IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); - ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); - GISTScanOpaque so; - bool res; + /* + * Call the Consistent function to evaluate the test. The + * arguments are the index datum (as a GISTENTRY*), the comparison + * datum, the comparison operator's strategy number and subtype + * from pg_amop, and the recheck flag. + * + * (Presently there's no need to pass the subtype since it'll + * always be zero, but might as well pass it for possible future + * use.) + * + * We initialize the recheck flag to true (the safest assumption) + * in case the Consistent function forgets to set it. + */ + recheck = true; - so = (GISTScanOpaque) scan->opaque; + test = FunctionCall5(&key->sk_func, + PointerGetDatum(&de), + key->sk_argument, + Int32GetDatum(key->sk_strategy), + ObjectIdGetDatum(key->sk_subtype), + PointerGetDatum(&recheck)); - if (dir != ForwardScanDirection) - elog(ERROR, "GiST doesn't support other scan directions than forward"); + if (!DatumGetBool(test)) + return false; + *recheck_p |= recheck; + } - /* - * If we have produced an index tuple in the past and the executor has - * informed us we need to mark it as "killed", do so now. - */ - if (scan->kill_prior_tuple && ItemPointerIsValid(&(so->curpos))) - killtuple(scan->indexRelation, so, &(so->curpos)); + key++; + keySize--; + } - /* - * Get the next tuple that matches the search key. - */ - res = (gistnext(scan, NULL) > 0); + /* OK, it passes --- now let's compute the distances */ + key = scan->orderByData; + distance_p = so->distances; + keySize = scan->numberOfOrderBys; + while (keySize > 0) + { + Datum datum; + bool isNull; - PG_RETURN_BOOL(res); -} + datum = index_getattr(tuple, + key->sk_attno, + giststate->tupdesc, + &isNull); -Datum -gistgetbitmap(PG_FUNCTION_ARGS) -{ - IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); - TIDBitmap *tbm = (TIDBitmap *) PG_GETARG_POINTER(1); - int64 ntids; + if ((key->sk_flags & SK_ISNULL) || isNull) + { + /* Assume distance computes as null and sorts to the end */ + *distance_p = get_float8_infinity(); + } + else + { + Datum dist; + GISTENTRY de; - ntids = gistnext(scan, tbm); + gistdentryinit(giststate, key->sk_attno - 1, &de, + datum, r, page, offset, + FALSE, isNull); - PG_RETURN_INT64(ntids); + /* + * Call the Distance function to evaluate the distance. The + * arguments are the index datum (as a GISTENTRY*), the comparison + * datum, and the ordering operator's strategy number and subtype + * from pg_amop. + * + * (Presently there's no need to pass the subtype since it'll + * always be zero, but might as well pass it for possible future + * use.) + * + * Note that Distance functions don't get a recheck argument. + * We can't tolerate lossy distance calculations on leaf tuples; + * there is no opportunity to re-sort the tuples afterwards. + */ + dist = FunctionCall4(&key->sk_func, + PointerGetDatum(&de), + key->sk_argument, + Int32GetDatum(key->sk_strategy), + ObjectIdGetDatum(key->sk_subtype)); + + *distance_p = DatumGetFloat8(dist); + } + + key++; + distance_p++; + keySize--; + } + + return true; } /* - * Fetch tuple(s) that match the search key; this can be invoked - * either to fetch the first such tuple or subsequent matching tuples. + * Scan all items on the GiST index page identified by *pageItem, and insert + * them into the queue (or directly to output areas) * - * This function is used by both gistgettuple and gistgetbitmap. When - * invoked from gistgettuple, tbm is null and the next matching tuple - * is returned in scan->xs_ctup.t_self. When invoked from getbitmap, - * tbm is non-null and all matching tuples are added to tbm before - * returning. In both cases, the function result is the number of - * returned tuples. + * scan: index scan we are executing + * pageItem: search queue item identifying an index page to scan + * myDistances: distances array associated with pageItem, or NULL at the root + * tbm: if not NULL, gistgetbitmap's output bitmap + * ntids: if not NULL, gistgetbitmap's output tuple counter * - * If scan specifies to skip killed tuples, continue looping until we find a - * non-killed tuple that matches the search key. + * If tbm/ntids aren't NULL, we are doing an amgetbitmap scan, and heap + * tuples should be reported directly into the bitmap. If they are NULL, + * we're doing a plain or ordered indexscan. For a plain indexscan, heap + * tuple TIDs are returned into so->pageData[]. For an ordered indexscan, + * heap tuple TIDs are pushed into individual search queue items. + * + * If we detect that the index page has split since we saw its downlink + * in the parent, we push its new right sibling onto the queue so the + * sibling will be processed next. */ -static int64 -gistnext(IndexScanDesc scan, TIDBitmap *tbm) +static void +gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, + TIDBitmap *tbm, int64 *ntids) { - Page p; - OffsetNumber n; - GISTScanOpaque so; - GISTSearchStack *stk; - IndexTuple it; + GISTScanOpaque so = (GISTScanOpaque) scan->opaque; + Buffer buffer; + Page page; GISTPageOpaque opaque; - int64 ntids = 0; + OffsetNumber maxoff; + OffsetNumber i; + GISTSearchTreeItem *tmpItem = so->tmpTreeItem; + bool isNew; + MemoryContext oldcxt; - so = (GISTScanOpaque) scan->opaque; + Assert(!GISTSearchItemIsHeap(*pageItem)); - if (so->qual_ok == false) - return 0; + buffer = ReadBuffer(scan->indexRelation, pageItem->blkno); + LockBuffer(buffer, GIST_SHARE); + gistcheckpage(scan->indexRelation, buffer); + page = BufferGetPage(buffer); + opaque = GistPageGetOpaque(page); - if (so->curbuf == InvalidBuffer) + /* check if page split occurred since visit to parent */ + if (!XLogRecPtrIsInvalid(pageItem->data.parentlsn) && + XLByteLT(pageItem->data.parentlsn, opaque->nsn) && + opaque->rightlink != InvalidBlockNumber /* sanity check */ ) { - if (ItemPointerIsValid(&so->curpos) == false) - { - /* Being asked to fetch the first entry, so start at the root */ - Assert(so->curbuf == InvalidBuffer); - Assert(so->stack == NULL); + /* There was a page split, follow right link to add pages */ + GISTSearchItem *item; - so->curbuf = ReadBuffer(scan->indexRelation, GIST_ROOT_BLKNO); + /* This can't happen when starting at the root */ + Assert(myDistances != NULL); - stk = so->stack = (GISTSearchStack *) palloc0(sizeof(GISTSearchStack)); + oldcxt = MemoryContextSwitchTo(so->queueCxt); - stk->next = NULL; - stk->block = GIST_ROOT_BLKNO; + /* Create new GISTSearchItem for the right sibling index page */ + item = palloc(sizeof(GISTSearchItem)); + item->next = NULL; + item->blkno = opaque->rightlink; + item->data.parentlsn = pageItem->data.parentlsn; - pgstat_count_index_scan(scan->indexRelation); - } - else - { - /* scan is finished */ - return 0; - } + /* Insert it into the queue using same distances as for this page */ + tmpItem->head = item; + tmpItem->lastHeap = NULL; + memcpy(tmpItem->distances, myDistances, + sizeof(double) * scan->numberOfOrderBys); + + (void) rb_insert(so->queue, (RBNode *) tmpItem, &isNew); + + MemoryContextSwitchTo(oldcxt); } + so->nPageData = so->curPageData = 0; + /* - * check stored pointers from last visit + * check all tuples on page */ - if (so->nPageData > 0) + maxoff = PageGetMaxOffsetNumber(page); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { + IndexTuple it = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); + bool match; + bool recheck; + /* - * gistgetmulti never should go here + * Must call gistindex_keytest in tempCxt, and clean up any leftover + * junk afterward. */ - Assert(tbm == NULL); + oldcxt = MemoryContextSwitchTo(so->tempCxt); - if (so->curPageData < so->nPageData) - { - scan->xs_ctup.t_self = so->pageData[so->curPageData].heapPtr; - scan->xs_recheck = so->pageData[so->curPageData].recheck; + match = gistindex_keytest(scan, it, page, i, &recheck); - ItemPointerSet(&so->curpos, - BufferGetBlockNumber(so->curbuf), - so->pageData[so->curPageData].pageOffset); + MemoryContextSwitchTo(oldcxt); + MemoryContextReset(so->tempCxt); - so->curPageData++; + /* Ignore tuple if it doesn't match */ + if (!match) + continue; - return 1; + if (tbm && GistPageIsLeaf(page)) + { + /* + * getbitmap scan, so just push heap tuple TIDs into the bitmap + * without worrying about ordering + */ + tbm_add_tuples(tbm, &it->t_tid, 1, recheck); + (*ntids)++; + } + else if (scan->numberOfOrderBys == 0 && GistPageIsLeaf(page)) + { + /* + * Non-ordered scan, so report heap tuples in so->pageData[] + */ + so->pageData[so->nPageData].heapPtr = it->t_tid; + so->pageData[so->nPageData].recheck = recheck; + so->nPageData++; } else { /* - * Go to the next page + * Must push item into search queue. We get here for any lower + * index page, and also for heap tuples if doing an ordered + * search. */ - stk = so->stack->next; - pfree(so->stack); - so->stack = stk; + GISTSearchItem *item; + + oldcxt = MemoryContextSwitchTo(so->queueCxt); - /* If we're out of stack entries, we're done */ - if (so->stack == NULL) + /* Create new GISTSearchItem for this item */ + item = palloc(sizeof(GISTSearchItem)); + item->next = NULL; + + if (GistPageIsLeaf(page)) { - ReleaseBuffer(so->curbuf); - so->curbuf = InvalidBuffer; - return 0; + /* Creating heap-tuple GISTSearchItem */ + item->blkno = InvalidBlockNumber; + item->data.heap.heapPtr = it->t_tid; + item->data.heap.recheck = recheck; } + else + { + /* Creating index-page GISTSearchItem */ + item->blkno = ItemPointerGetBlockNumber(&it->t_tid); + /* lsn of current page is lsn of parent page for child */ + item->data.parentlsn = PageGetLSN(page); + } + + /* Insert it into the queue using new distance data */ + tmpItem->head = item; + tmpItem->lastHeap = GISTSearchItemIsHeap(*item) ? item : NULL; + memcpy(tmpItem->distances, so->distances, + sizeof(double) * scan->numberOfOrderBys); - so->curbuf = ReleaseAndReadBuffer(so->curbuf, - scan->indexRelation, - stk->block); + (void) rb_insert(so->queue, (RBNode *) tmpItem, &isNew); + + MemoryContextSwitchTo(oldcxt); } } + UnlockReleaseBuffer(buffer); +} + +/* + * Extract next item (in order) from search queue + * + * Returns a GISTSearchItem or NULL. Caller must pfree item when done with it. + * + * NOTE: on successful return, so->curTreeItem is the GISTSearchTreeItem that + * contained the result item. Callers can use so->curTreeItem->distances as + * the distances value for the item. + */ +static GISTSearchItem * +getNextGISTSearchItem(GISTScanOpaque so) +{ for (;;) { - CHECK_FOR_INTERRUPTS(); + GISTSearchItem *item; - /* First of all, we need lock buffer */ - Assert(so->curbuf != InvalidBuffer); - LockBuffer(so->curbuf, GIST_SHARE); - gistcheckpage(scan->indexRelation, so->curbuf); - p = BufferGetPage(so->curbuf); - opaque = GistPageGetOpaque(p); - - /* remember lsn to identify page changed for tuple's killing */ - so->stack->lsn = PageGetLSN(p); - - /* check page split, occured since visit to parent */ - if (!XLogRecPtrIsInvalid(so->stack->parentlsn) && - XLByteLT(so->stack->parentlsn, opaque->nsn) && - opaque->rightlink != InvalidBlockNumber /* sanity check */ && - (so->stack->next == NULL || so->stack->next->block != opaque->rightlink) /* check if already - added */ ) + /* Update curTreeItem if we don't have one */ + if (so->curTreeItem == NULL) { - /* detect page split, follow right link to add pages */ - - stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack)); - stk->next = so->stack->next; - stk->block = opaque->rightlink; - stk->parentlsn = so->stack->parentlsn; - memset(&(stk->lsn), 0, sizeof(GistNSN)); - so->stack->next = stk; + so->curTreeItem = (GISTSearchTreeItem *) rb_leftmost(so->queue); + /* Done when tree is empty */ + if (so->curTreeItem == NULL) + break; } - /* if page is empty, then just skip it */ - if (PageIsEmpty(p)) + item = so->curTreeItem->head; + if (item != NULL) { - LockBuffer(so->curbuf, GIST_UNLOCK); - stk = so->stack->next; - pfree(so->stack); - so->stack = stk; - - if (so->stack == NULL) - { - ReleaseBuffer(so->curbuf); - so->curbuf = InvalidBuffer; - return ntids; - } - - so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation, - stk->block); - continue; + /* Delink item from chain */ + so->curTreeItem->head = item->next; + /* Return item; caller is responsible to pfree it */ + return item; } - n = FirstOffsetNumber; - - /* wonderful, we can look at page */ - so->nPageData = so->curPageData = 0; - - for (;;) - { - n = gistfindnext(scan, n); - - if (!OffsetNumberIsValid(n)) - { - /* - * If we was called from gistgettuple and current buffer - * contains something matched then make a recursive call - it - * will return ItemPointer from so->pageData. But we save - * buffer pinned to support tuple's killing - */ - if (!tbm && so->nPageData > 0) - { - LockBuffer(so->curbuf, GIST_UNLOCK); - return gistnext(scan, NULL); - } + /* curTreeItem is exhausted, so remove it from rbtree */ + rb_delete(so->queue, (RBNode *) so->curTreeItem); + so->curTreeItem = NULL; + } - /* - * We ran out of matching index entries on the current page, - * so pop the top stack entry and use it to continue the - * search. - */ - LockBuffer(so->curbuf, GIST_UNLOCK); - stk = so->stack->next; - pfree(so->stack); - so->stack = stk; - - /* If we're out of stack entries, we're done */ - - if (so->stack == NULL) - { - ReleaseBuffer(so->curbuf); - so->curbuf = InvalidBuffer; - return ntids; - } - - so->curbuf = ReleaseAndReadBuffer(so->curbuf, - scan->indexRelation, - stk->block); - /* XXX go up */ - break; - } + return NULL; +} - if (GistPageIsLeaf(p)) - { - /* - * We've found a matching index entry in a leaf page, so - * return success. Note that we keep "curbuf" pinned so that - * we can efficiently resume the index scan later. - */ +/* + * Fetch next heap tuple in an ordered search + */ +static bool +getNextNearest(IndexScanDesc scan) +{ + GISTScanOpaque so = (GISTScanOpaque) scan->opaque; + bool res = false; - if (!(scan->ignore_killed_tuples && - ItemIdIsDead(PageGetItemId(p, n)))) - { - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - ntids++; - if (tbm != NULL) - tbm_add_tuples(tbm, &it->t_tid, 1, scan->xs_recheck); - else - { - so->pageData[so->nPageData].heapPtr = it->t_tid; - so->pageData[so->nPageData].pageOffset = n; - so->pageData[so->nPageData].recheck = scan->xs_recheck; - so->nPageData++; - } - } - } - else - { - /* - * We've found an entry in an internal node whose key is - * consistent with the search key, so push it to stack - */ - stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack)); + do + { + GISTSearchItem *item = getNextGISTSearchItem(so); - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - stk->block = ItemPointerGetBlockNumber(&(it->t_tid)); - memset(&(stk->lsn), 0, sizeof(GistNSN)); - stk->parentlsn = so->stack->lsn; + if (!item) + break; - stk->next = so->stack->next; - so->stack->next = stk; - } + if (GISTSearchItemIsHeap(*item)) + { + /* found a heap item at currently minimal distance */ + scan->xs_ctup.t_self = item->data.heap.heapPtr; + scan->xs_recheck = item->data.heap.recheck; + res = true; + } + else + { + /* visit an index page, extract its items into queue */ + CHECK_FOR_INTERRUPTS(); - n = OffsetNumberNext(n); + gistScanPage(scan, item, so->curTreeItem->distances, NULL, NULL); } - } - return ntids; + pfree(item); + } while (!res); + + return res; } /* - * gistindex_keytest() -- does this index tuple satisfy the scan key(s)? - * - * On success return for a leaf tuple, scan->xs_recheck is set to indicate - * whether recheck is needed. We recheck if any of the consistent() functions - * request it. - * - * We must decompress the key in the IndexTuple before passing it to the - * sk_func (and we have previously overwritten the sk_func to use the - * user-defined Consistent method, so we actually are invoking that). - * - * Note that this function is always invoked in a short-lived memory context, - * so we don't need to worry about cleaning up allocated memory, either here - * or in the implementation of any Consistent methods. + * gistgettuple() -- Get the next tuple in the scan */ -static bool -gistindex_keytest(IndexTuple tuple, - IndexScanDesc scan, - OffsetNumber offset) +Datum +gistgettuple(PG_FUNCTION_ARGS) { - int keySize = scan->numberOfKeys; - ScanKey key = scan->keyData; - Relation r = scan->indexRelation; - GISTScanOpaque so; - Page p; - GISTSTATE *giststate; + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + GISTScanOpaque so = (GISTScanOpaque) scan->opaque; - so = (GISTScanOpaque) scan->opaque; - giststate = so->giststate; - p = BufferGetPage(so->curbuf); + if (!so->qual_ok) + PG_RETURN_BOOL(false); - scan->xs_recheck = false; + if (so->firstCall) + { + /* Begin the scan by processing the root page */ + GISTSearchItem fakeItem; - /* - * Tuple doesn't restore after crash recovery because of incomplete insert - */ - if (!GistPageIsLeaf(p) && GistTupleIsInvalid(tuple)) - return true; + pgstat_count_index_scan(scan->indexRelation); - while (keySize > 0) - { - Datum datum; - bool isNull; - Datum test; - bool recheck; - GISTENTRY de; + so->firstCall = false; + so->curTreeItem = NULL; + so->curPageData = so->nPageData = 0; - datum = index_getattr(tuple, - key->sk_attno, - giststate->tupdesc, - &isNull); + fakeItem.blkno = GIST_ROOT_BLKNO; + memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN)); + gistScanPage(scan, &fakeItem, NULL, NULL, NULL); + } - if (key->sk_flags & SK_ISNULL) + if (scan->numberOfOrderBys > 0) + { + /* Must fetch tuples in strict distance order */ + PG_RETURN_BOOL(getNextNearest(scan)); + } + else + { + /* Fetch tuples index-page-at-a-time */ + for (;;) { - /* - * On non-leaf page we can't conclude that child hasn't NULL - * values because of assumption in GiST: union (VAL, NULL) is VAL. - * But if on non-leaf page key IS NULL, then all children are - * NULL. - */ - if (key->sk_flags & SK_SEARCHNULL) + if (so->curPageData < so->nPageData) { - if (GistPageIsLeaf(p) && !isNull) - return false; + /* continuing to return tuples from a leaf page */ + scan->xs_ctup.t_self = so->pageData[so->curPageData].heapPtr; + scan->xs_recheck = so->pageData[so->curPageData].recheck; + so->curPageData++; + PG_RETURN_BOOL(true); } - else + + /* find and process the next index page */ + do { - Assert(key->sk_flags & SK_SEARCHNOTNULL); - if (isNull) - return false; - } - } - else if (isNull) - { - return false; - } - else - { - gistdentryinit(giststate, key->sk_attno - 1, &de, - datum, r, p, offset, - FALSE, isNull); + GISTSearchItem *item = getNextGISTSearchItem(so); - /* - * Call the Consistent function to evaluate the test. The - * arguments are the index datum (as a GISTENTRY*), the comparison - * datum, the comparison operator's strategy number and subtype - * from pg_amop, and the recheck flag. - * - * (Presently there's no need to pass the subtype since it'll - * always be zero, but might as well pass it for possible future - * use.) - * - * We initialize the recheck flag to true (the safest assumption) - * in case the Consistent function forgets to set it. - */ - recheck = true; + if (!item) + PG_RETURN_BOOL(false); - test = FunctionCall5(&key->sk_func, - PointerGetDatum(&de), - key->sk_argument, - Int32GetDatum(key->sk_strategy), - ObjectIdGetDatum(key->sk_subtype), - PointerGetDatum(&recheck)); + CHECK_FOR_INTERRUPTS(); - if (!DatumGetBool(test)) - return false; - scan->xs_recheck |= recheck; - } + /* + * While scanning a leaf page, ItemPointers of matching heap + * tuples are stored in so->pageData. If there are any on + * this page, we fall out of the inner "do" and loop around + * to return them. + */ + gistScanPage(scan, item, so->curTreeItem->distances, NULL, NULL); - keySize--; - key++; + pfree(item); + } while (so->nPageData == 0); + } } - return true; + PG_RETURN_BOOL(false); /* keep compiler quiet */ } /* - * Return the offset of the first index entry that is consistent with - * the search key after offset 'n' in the current page. If there are - * no more consistent entries, return InvalidOffsetNumber. - * On success, scan->xs_recheck is set correctly, too. - * Page should be locked.... + * gistgetbitmap() -- Get a bitmap of all heap tuple locations */ -static OffsetNumber -gistfindnext(IndexScanDesc scan, OffsetNumber n) +Datum +gistgetbitmap(PG_FUNCTION_ARGS) { - OffsetNumber maxoff; - IndexTuple it; - GISTScanOpaque so; - MemoryContext oldcxt; - Page p; + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + TIDBitmap *tbm = (TIDBitmap *) PG_GETARG_POINTER(1); + GISTScanOpaque so = (GISTScanOpaque) scan->opaque; + int64 ntids = 0; + GISTSearchItem fakeItem; + + if (!so->qual_ok) + PG_RETURN_INT64(0); + + pgstat_count_index_scan(scan->indexRelation); - so = (GISTScanOpaque) scan->opaque; - p = BufferGetPage(so->curbuf); - maxoff = PageGetMaxOffsetNumber(p); + /* Begin the scan by processing the root page */ + so->curTreeItem = NULL; + so->curPageData = so->nPageData = 0; + + fakeItem.blkno = GIST_ROOT_BLKNO; + memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN)); + gistScanPage(scan, &fakeItem, NULL, tbm, &ntids); /* - * Make sure we're in a short-lived memory context when we invoke a - * user-supplied GiST method in gistindex_keytest(), so we don't leak - * memory + * While scanning a leaf page, ItemPointers of matching heap tuples will + * be stored directly into tbm, so we don't need to deal with them here. */ - oldcxt = MemoryContextSwitchTo(so->tempCxt); - - while (n >= FirstOffsetNumber && n <= maxoff) + for (;;) { - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - if (gistindex_keytest(it, scan, n)) + GISTSearchItem *item = getNextGISTSearchItem(so); + + if (!item) break; - n = OffsetNumberNext(n); - } + CHECK_FOR_INTERRUPTS(); - MemoryContextSwitchTo(oldcxt); - MemoryContextReset(so->tempCxt); + gistScanPage(scan, item, so->curTreeItem->distances, tbm, &ntids); - /* - * If we found a matching entry, return its offset; otherwise return - * InvalidOffsetNumber to inform the caller to go to the next page. - */ - if (n >= FirstOffsetNumber && n <= maxoff) - return n; - else - return InvalidOffsetNumber; + pfree(item); + } + + PG_RETURN_INT64(ntids); } diff --git a/src/backend/access/gist/gistproc.c b/src/backend/access/gist/gistproc.c index 681ffd27d4f..bdc84befc64 100644 --- a/src/backend/access/gist/gistproc.c +++ b/src/backend/access/gist/gistproc.c @@ -904,6 +904,76 @@ gist_point_compress(PG_FUNCTION_ARGS) PG_RETURN_POINTER(entry); } +#define point_point_distance(p1,p2) \ + DatumGetFloat8(DirectFunctionCall2(point_distance, \ + PointPGetDatum(p1), PointPGetDatum(p2))) + +static double +computeDistance(bool isLeaf, BOX *box, Point *point) +{ + double result = 0.0; + + if (isLeaf) + { + /* simple point to point distance */ + result = point_point_distance(point, &box->low); + } + else if (point->x <= box->high.x && point->x >= box->low.x && + point->y <= box->high.y && point->y >= box->low.y) + { + /* point inside the box */ + result = 0.0; + } + else if (point->x <= box->high.x && point->x >= box->low.x) + { + /* point is over or below box */ + Assert(box->low.y <= box->high.y); + if (point->y > box->high.y) + result = point->y - box->high.y; + else if (point->y < box->low.y) + result = box->low.y - point->y; + else + elog(ERROR, "inconsistent point values"); + } + else if (point->y <= box->high.y && point->y >= box->low.y) + { + /* point is to left or right of box */ + Assert(box->low.x <= box->high.x); + if (point->x > box->high.x) + result = point->x - box->high.x; + else if (point->x < box->low.x) + result = box->low.x - point->x; + else + elog(ERROR, "inconsistent point values"); + } + else + { + /* closest point will be a vertex */ + Point p; + double subresult; + + result = point_point_distance(point, &box->low); + + subresult = point_point_distance(point, &box->high); + if (result > subresult) + result = subresult; + + p.x = box->low.x; + p.y = box->high.y; + subresult = point_point_distance(point, &p); + if (result > subresult) + result = subresult; + + p.x = box->high.x; + p.y = box->low.y; + subresult = point_point_distance(point, &p); + if (result > subresult) + result = subresult; + } + + return result; +} + static bool gist_point_consistent_internal(StrategyNumber strategy, bool isLeaf, BOX *key, Point *query) @@ -954,8 +1024,8 @@ gist_point_consistent(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); - bool result; bool *recheck = (bool *) PG_GETARG_POINTER(4); + bool result; StrategyNumber strategyGroup = strategy / GeoStrategyNumberOffset; switch (strategyGroup) @@ -1034,9 +1104,32 @@ gist_point_consistent(PG_FUNCTION_ARGS) } break; default: - result = false; /* silence compiler warning */ elog(ERROR, "unknown strategy number: %d", strategy); + result = false; /* keep compiler quiet */ } PG_RETURN_BOOL(result); } + +Datum +gist_point_distance(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + double distance; + StrategyNumber strategyGroup = strategy / GeoStrategyNumberOffset; + + switch (strategyGroup) + { + case PointStrategyNumberGroup: + distance = computeDistance(GIST_LEAF(entry), + DatumGetBoxP(entry->key), + PG_GETARG_POINT_P(1)); + break; + default: + elog(ERROR, "unknown strategy number: %d", strategy); + distance = 0.0; /* keep compiler quiet */ + } + + PG_RETURN_FLOAT8(distance); +} diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c index 106714511a8..546880c01e3 100644 --- a/src/backend/access/gist/gistscan.c +++ b/src/backend/access/gist/gistscan.c @@ -20,8 +20,84 @@ #include "access/relscan.h" #include "storage/bufmgr.h" #include "utils/memutils.h" +#include "utils/rel.h" -static void gistfreestack(GISTSearchStack *s); + +/* + * RBTree support functions for the GISTSearchTreeItem queue + */ + +static int +GISTSearchTreeItemComparator(const RBNode *a, const RBNode *b, void *arg) +{ + const GISTSearchTreeItem *sa = (const GISTSearchTreeItem *) a; + const GISTSearchTreeItem *sb = (const GISTSearchTreeItem *) b; + IndexScanDesc scan = (IndexScanDesc) arg; + int i; + + /* Order according to distance comparison */ + for (i = 0; i < scan->numberOfOrderBys; i++) + { + if (sa->distances[i] != sb->distances[i]) + return (sa->distances[i] > sb->distances[i]) ? 1 : -1; + } + + return 0; +} + +static void +GISTSearchTreeItemCombiner(RBNode *existing, const RBNode *newrb, void *arg) +{ + GISTSearchTreeItem *scurrent = (GISTSearchTreeItem *) existing; + const GISTSearchTreeItem *snew = (const GISTSearchTreeItem *) newrb; + GISTSearchItem *newitem = snew->head; + + /* snew should have just one item in its chain */ + Assert(newitem && newitem->next == NULL); + + /* + * If new item is heap tuple, it goes to front of chain; otherwise insert + * it before the first index-page item, so that index pages are visited + * in LIFO order, ensuring depth-first search of index pages. See + * comments in gist_private.h. + */ + if (GISTSearchItemIsHeap(*newitem)) + { + newitem->next = scurrent->head; + scurrent->head = newitem; + if (scurrent->lastHeap == NULL) + scurrent->lastHeap = newitem; + } + else if (scurrent->lastHeap == NULL) + { + newitem->next = scurrent->head; + scurrent->head = newitem; + } + else + { + newitem->next = scurrent->lastHeap->next; + scurrent->lastHeap->next = newitem; + } +} + +static RBNode * +GISTSearchTreeItemAllocator(void *arg) +{ + IndexScanDesc scan = (IndexScanDesc) arg; + + return palloc(GSTIHDRSZ + sizeof(double) * scan->numberOfOrderBys); +} + +static void +GISTSearchTreeItemDeleter(RBNode *rb, void *arg) +{ + pfree(rb); +} + + +/* + * Index AM API functions for scanning GiST indexes + */ Datum gistbeginscan(PG_FUNCTION_ARGS) @@ -32,18 +108,22 @@ gistbeginscan(PG_FUNCTION_ARGS) IndexScanDesc scan; GISTScanOpaque so; - /* no order by operators allowed */ - Assert(norderbys == 0); - scan = RelationGetIndexScan(r, nkeys, norderbys); /* initialize opaque data */ - so = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData)); - so->stack = NULL; + so = (GISTScanOpaque) palloc0(sizeof(GISTScanOpaqueData)); + so->queueCxt = AllocSetContextCreate(CurrentMemoryContext, + "GiST queue context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); so->tempCxt = createTempGistContext(); - so->curbuf = InvalidBuffer; so->giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE)); initGISTstate(so->giststate, scan->indexRelation); + /* workspaces with size dependent on numberOfOrderBys: */ + so->tmpTreeItem = palloc(GSTIHDRSZ + sizeof(double) * scan->numberOfOrderBys); + so->distances = palloc(sizeof(double) * scan->numberOfOrderBys); + so->qual_ok = true; /* in case there are zero keys */ scan->opaque = so; @@ -55,27 +135,27 @@ gistrescan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey key = (ScanKey) PG_GETARG_POINTER(1); - /* remaining arguments are ignored */ + ScanKey orderbys = (ScanKey) PG_GETARG_POINTER(3); + /* nkeys and norderbys arguments are ignored */ GISTScanOpaque so = (GISTScanOpaque) scan->opaque; int i; + MemoryContext oldCxt; /* rescan an existing indexscan --- reset state */ - gistfreestack(so->stack); - so->stack = NULL; - /* drop pins on buffers -- no locks held */ - if (BufferIsValid(so->curbuf)) - { - ReleaseBuffer(so->curbuf); - so->curbuf = InvalidBuffer; - } + MemoryContextReset(so->queueCxt); + so->curTreeItem = NULL; - /* - * Clear all the pointers. - */ - ItemPointerSetInvalid(&so->curpos); - so->nPageData = so->curPageData = 0; + /* create new, empty RBTree for search queue */ + oldCxt = MemoryContextSwitchTo(so->queueCxt); + so->queue = rb_create(GSTIHDRSZ + sizeof(double) * scan->numberOfOrderBys, + GISTSearchTreeItemComparator, + GISTSearchTreeItemCombiner, + GISTSearchTreeItemAllocator, + GISTSearchTreeItemDeleter, + scan); + MemoryContextSwitchTo(oldCxt); - so->qual_ok = true; + so->firstCall = true; /* Update scan key, if a new one is given */ if (key && scan->numberOfKeys > 0) @@ -84,7 +164,7 @@ gistrescan(PG_FUNCTION_ARGS) scan->numberOfKeys * sizeof(ScanKeyData)); /* - * Modify the scan key so that all the Consistent method is called for + * Modify the scan key so that the Consistent method is called for * all comparisons. The original operator is passed to the Consistent * function in the form of its strategy number, which is available * from the sk_strategy field, and its subtype from the sk_subtype @@ -94,9 +174,11 @@ gistrescan(PG_FUNCTION_ARGS) * SK_SEARCHNULL/SK_SEARCHNOTNULL then nothing can be found (ie, we * assume all indexable operators are strict). */ + so->qual_ok = true; + for (i = 0; i < scan->numberOfKeys; i++) { - ScanKey skey = &(scan->keyData[i]); + ScanKey skey = scan->keyData + i; skey->sk_func = so->giststate->consistentFn[skey->sk_attno - 1]; @@ -108,6 +190,33 @@ gistrescan(PG_FUNCTION_ARGS) } } + /* Update order-by key, if a new one is given */ + if (orderbys && scan->numberOfOrderBys > 0) + { + memmove(scan->orderByData, orderbys, + scan->numberOfOrderBys * sizeof(ScanKeyData)); + + /* + * Modify the order-by key so that the Distance method is called for + * all comparisons. The original operator is passed to the Distance + * function in the form of its strategy number, which is available + * from the sk_strategy field, and its subtype from the sk_subtype + * field. + */ + for (i = 0; i < scan->numberOfOrderBys; i++) + { + ScanKey skey = scan->orderByData + i; + + skey->sk_func = so->giststate->distanceFn[skey->sk_attno - 1]; + + /* Check we actually have a distance function ... */ + if (!OidIsValid(skey->sk_func.fn_oid)) + elog(ERROR, "missing support function %d for attribute %d of index \"%s\"", + GIST_DISTANCE_PROC, skey->sk_attno, + RelationGetRelationName(scan->indexRelation)); + } + } + PG_RETURN_VOID(); } @@ -131,26 +240,12 @@ gistendscan(PG_FUNCTION_ARGS) IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); GISTScanOpaque so = (GISTScanOpaque) scan->opaque; - gistfreestack(so->stack); - if (so->giststate != NULL) - freeGISTstate(so->giststate); - /* drop pins on buffers -- we aren't holding any locks */ - if (BufferIsValid(so->curbuf)) - ReleaseBuffer(so->curbuf); + freeGISTstate(so->giststate); + MemoryContextDelete(so->queueCxt); MemoryContextDelete(so->tempCxt); + pfree(so->tmpTreeItem); + pfree(so->distances); pfree(so); PG_RETURN_VOID(); } - -static void -gistfreestack(GISTSearchStack *s) -{ - while (s != NULL) - { - GISTSearchStack *p = s->next; - - pfree(s); - s = p; - } -} |