diff options
Diffstat (limited to 'src/backend/access/gin')
-rw-r--r-- | src/backend/access/gin/ginget.c | 348 | ||||
-rw-r--r-- | src/backend/access/gin/ginscan.c | 42 | ||||
-rw-r--r-- | src/backend/access/gin/ginutil.c | 18 |
3 files changed, 385 insertions, 23 deletions
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 3bedcc99606..3d60d337df4 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.15 2008/05/12 00:00:44 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.16 2008/05/16 16:31:01 tgl Exp $ *------------------------------------------------------------------------- */ @@ -18,8 +18,13 @@ #include "catalog/index.h" #include "miscadmin.h" #include "storage/bufmgr.h" +#include "utils/datum.h" #include "utils/memutils.h" + +/* + * Tries to refind previously taken ItemPointer on page. + */ static bool findItemInPage(Page page, ItemPointer item, OffsetNumber *off) { @@ -46,8 +51,204 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off) } /* - * Start* functions setup state of searches: find correct buffer and locks it, - * Stop* functions unlock buffer (but don't release!) + * Goes to the next page if current offset is outside of bounds + */ +static bool +moveRightIfItNeeded( GinBtreeData *btree, GinBtreeStack *stack ) +{ + Page page = BufferGetPage(stack->buffer); + + if ( stack->off > PageGetMaxOffsetNumber(page) ) + { + /* + * We scanned the whole page, so we should take right page + */ + stack->blkno = GinPageGetOpaque(page)->rightlink; + + if ( GinPageRightMost(page) ) + return false; /* no more pages */ + + LockBuffer(stack->buffer, GIN_UNLOCK); + stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno); + LockBuffer(stack->buffer, GIN_SHARE); + stack->off = FirstOffsetNumber; + } + + return true; +} + +/* + * Does fullscan of posting tree and saves ItemPointers + * in scanEntry->partialMatch TIDBitmap + */ +static void +scanForItems( Relation index, GinScanEntry scanEntry, BlockNumber rootPostingTree ) +{ + GinPostingTreeScan *gdi; + Buffer buffer; + Page page; + BlockNumber blkno; + + gdi = prepareScanPostingTree(index, rootPostingTree, TRUE); + + buffer = scanBeginPostingTree(gdi); + IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */ + + freeGinBtreeStack(gdi->stack); + pfree(gdi); + + /* + * Goes through all leaves + */ + for(;;) + { + page = BufferGetPage(buffer); + + if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber ) + { + tbm_add_tuples( scanEntry->partialMatch, + (ItemPointer)GinDataPageGetItem(page, FirstOffsetNumber), + GinPageGetOpaque(page)->maxoff, false); + scanEntry->predictNumberResult += GinPageGetOpaque(page)->maxoff; + } + + blkno = GinPageGetOpaque(page)->rightlink; + if ( GinPageRightMost(page) ) + { + UnlockReleaseBuffer(buffer); + return; /* no more pages */ + } + + LockBuffer(buffer, GIN_UNLOCK); + buffer = ReleaseAndReadBuffer(buffer, index, blkno); + LockBuffer(buffer, GIN_SHARE); + } +} + +/* + * Collects all ItemPointer into the TIDBitmap struct + * for entries partially matched to search entry. + * + * Returns true if done, false if it's needed to restart scan from scratch + */ +static bool +computePartialMatchList( GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry scanEntry ) +{ + Page page; + IndexTuple itup; + Datum idatum; + bool isnull; + int32 cmp; + + scanEntry->partialMatch = tbm_create( work_mem * 1024L ); + + for(;;) + { + /* + * stack->off points to the interested entry, buffer is already locked + */ + if ( moveRightIfItNeeded(btree, stack) == false ) + return true; + + page = BufferGetPage(stack->buffer); + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); + idatum = index_getattr(itup, 1, btree->ginstate->tupdesc, &isnull); + Assert(!isnull); + + /*---------- + * Check of partial match. + * case cmp == 0 => match + * case cmp > 0 => not match and finish scan + * case cmp < 0 => not match and continue scan + *---------- + */ + cmp = DatumGetInt32(FunctionCall3(&btree->ginstate->comparePartialFn, + scanEntry->entry, + idatum, + UInt16GetDatum(scanEntry->strategy))); + + if ( cmp > 0 ) + return true; + else if ( cmp < 0 ) + { + stack->off++; + continue; + } + + if ( GinIsPostingTree(itup) ) + { + BlockNumber rootPostingTree = GinGetPostingTree(itup); + Datum newDatum, + savedDatum = datumCopy ( + idatum, + btree->ginstate->tupdesc->attrs[0]->attbyval, + btree->ginstate->tupdesc->attrs[0]->attlen + ); + /* + * We should unlock current page (but not unpin) during + * tree scan to prevent deadlock with vacuum processes. + * + * We save current entry value (savedDatum) to be able to refind + * our tuple after re-locking + */ + LockBuffer(stack->buffer, GIN_UNLOCK); + scanForItems( btree->index, scanEntry, rootPostingTree ); + + /* + * We lock again the entry page and while it was unlocked + * insert might occured, so we need to refind our position + */ + LockBuffer(stack->buffer, GIN_SHARE); + page = BufferGetPage(stack->buffer); + if ( !GinPageIsLeaf(page) ) + { + /* + * Root page becomes non-leaf while we unlock it. We + * will start again, this situation doesn't cause + * often - root can became a non-leaf only one per + * life of index. + */ + + return false; + } + + for(;;) + { + if ( moveRightIfItNeeded(btree, stack) == false ) + elog(ERROR, "lost saved point in index"); /* must not happen !!! */ + + page = BufferGetPage(stack->buffer); + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); + newDatum = index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull); + + if ( compareEntries(btree->ginstate, newDatum, savedDatum) == 0 ) + { + /* Found! */ + if ( btree->ginstate->tupdesc->attrs[0]->attbyval == false ) + pfree( DatumGetPointer(savedDatum) ); + break; + } + + stack->off++; + } + } + else + { + tbm_add_tuples( scanEntry->partialMatch, GinGetPosting(itup), GinGetNPosting(itup), false); + scanEntry->predictNumberResult += GinGetNPosting(itup); + } + + /* + * Ok, we save ItemPointers, go to the next entry + */ + stack->off++; + } + + return true; +} + +/* + * Start* functions setup begining state of searches: finds correct buffer and pins it. */ static void startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) @@ -78,10 +279,45 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) entry->offset = InvalidOffsetNumber; entry->list = NULL; entry->nlist = 0; + entry->partialMatch = NULL; + entry->partialMatchResult = NULL; entry->reduceResult = FALSE; entry->predictNumberResult = 0; - if (btreeEntry.findItem(&btreeEntry, stackEntry)) + if ( entry->isPartialMatch ) + { + /* + * btreeEntry.findItem points to the first equal or greater value + * than needed. So we will scan further and collect all + * ItemPointers + */ + btreeEntry.findItem(&btreeEntry, stackEntry); + if ( computePartialMatchList( &btreeEntry, stackEntry, entry ) == false ) + { + /* + * GIN tree was seriously restructured, so we will + * cleanup all found data and rescan. See comments near + * 'return false' in computePartialMatchList() + */ + if ( entry->partialMatch ) + { + tbm_free( entry->partialMatch ); + entry->partialMatch = NULL; + } + LockBuffer(stackEntry->buffer, GIN_UNLOCK); + freeGinBtreeStack(stackEntry); + + startScanEntry(index, ginstate, entry); + return; + } + + if ( entry->partialMatch && !tbm_is_empty(entry->partialMatch) ) + { + tbm_begin_iterate(entry->partialMatch); + entry->isFinished = FALSE; + } + } + else if (btreeEntry.findItem(&btreeEntry, stackEntry)) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off)); @@ -91,6 +327,13 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) GinPostingTreeScan *gdi; Page page; + /* + * We should unlock entry page before make deal with + * posting tree to prevent deadlocks with vacuum processes. + * Because entry is never deleted from page and posting tree is + * never reduced to the posting list, we can unlock page after + * getting BlockNumber of root of posting tree. + */ LockBuffer(stackEntry->buffer, GIN_UNLOCK); needUnlock = FALSE; gdi = prepareScanPostingTree(index, rootPostingTree, TRUE); @@ -111,7 +354,7 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) */ entry->list = (ItemPointerData *) palloc( BLCKSZ ); entry->nlist = GinPageGetOpaque(page)->maxoff; - memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), + memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) ); LockBuffer(entry->buffer, GIN_UNLOCK); @@ -142,7 +385,14 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key) return; for (i = 0; i < key->nentries; i++) + { startScanEntry(index, ginstate, key->scanEntry + i); + /* + * Copy strategy number to each entry of key to + * use in comparePartialFn call + */ + key->scanEntry[i].strategy = key->strategy; + } memset(key->entryRes, TRUE, sizeof(bool) * key->nentries); key->isFinished = FALSE; @@ -233,12 +483,12 @@ entryGetNextItem(Relation index, GinScanEntry entry) * Found position equal to or greater than stored */ entry->nlist = GinPageGetOpaque(page)->maxoff; - memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), + memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) ); LockBuffer(entry->buffer, GIN_UNLOCK); - if ( !ItemPointerIsValid(&entry->curItem) || + if ( !ItemPointerIsValid(&entry->curItem) || compareItemPointers( &entry->curItem, entry->list + entry->offset - 1 ) == 0 ) { /* @@ -248,7 +498,7 @@ entryGetNextItem(Relation index, GinScanEntry entry) break; } - + /* * Find greater than entry->curItem position, store it. */ @@ -275,6 +525,38 @@ entryGetItem(Relation index, GinScanEntry entry) entry->isFinished = entry->master->isFinished; entry->curItem = entry->master->curItem; } + else if ( entry->partialMatch ) + { + do + { + if ( entry->partialMatchResult == NULL || entry->offset >= entry->partialMatchResult->ntuples ) + { + entry->partialMatchResult = tbm_iterate( entry->partialMatch ); + + if ( entry->partialMatchResult == NULL ) + { + ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber); + entry->isFinished = TRUE; + break; + } + else if ( entry->partialMatchResult->ntuples < 0 ) + { + /* bitmap became lossy */ + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("not enough memory to store result of partial match operator" ), + errhint("Increase the \"work_mem\" parameter."))); + } + entry->offset = 0; + } + + ItemPointerSet(&entry->curItem, + entry->partialMatchResult->blockno, + entry->partialMatchResult->offsets[ entry->offset ]); + entry->offset ++; + + } while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry)); + } else if (!BufferIsValid(entry->buffer)) { entry->offset++; @@ -298,6 +580,54 @@ entryGetItem(Relation index, GinScanEntry entry) } /* + * restart from saved position. Actually it's needed only for + * partial match. function is called only by ginrestpos() + */ +void +ginrestartentry(GinScanEntry entry) +{ + ItemPointerData stopItem = entry->curItem; + bool savedReduceResult; + + if ( entry->master || entry->partialMatch == NULL ) + return; /* entry is slave or not a partial match type*/ + + if ( entry->isFinished ) + return; /* entry was finished before ginmarkpos() call */ + + if ( ItemPointerGetBlockNumber(&stopItem) == InvalidBlockNumber ) + return; /* entry wasn't began before ginmarkpos() call */ + + /* + * Reset iterator + */ + tbm_begin_iterate( entry->partialMatch ); + entry->partialMatchResult = NULL; + entry->offset = 0; + + /* + * Temporary reset reduceResult flag to guarantee refinding + * of curItem + */ + savedReduceResult = entry->reduceResult; + entry->reduceResult = FALSE; + + do + { + /* + * We can use null instead of index because + * partial match doesn't use it + */ + if ( entryGetItem( NULL, entry ) == false ) + elog(ERROR, "cannot refind scan position"); /* must not be here! */ + } while( compareItemPointers( &stopItem, &entry->curItem ) != 0 ); + + Assert( entry->isFinished == FALSE ); + + entry->reduceResult = savedReduceResult; +} + +/* * Sets key->curItem to new found heap item pointer for one scan key * Returns isFinished, ie TRUE means we did NOT get a new item pointer! * Also, *keyrecheck is set true if recheck is needed for this scan key. @@ -494,7 +824,7 @@ gingettuple(PG_FUNCTION_ARGS) bool res; if (dir != ForwardScanDirection) - elog(ERROR, "Gin doesn't support other scan directions than forward"); + elog(ERROR, "GIN doesn't support other scan directions than forward"); if (GinIsNewKey(scan)) newScanKey(scan); diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index 10a528817e6..cec24fbfdbd 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.13 2008/05/12 00:00:44 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.14 2008/05/16 16:31:01 tgl Exp $ *------------------------------------------------------------------------- */ @@ -36,7 +36,8 @@ ginbeginscan(PG_FUNCTION_ARGS) static void fillScanKey(GinState *ginstate, GinScanKey key, Datum query, - Datum *entryValues, uint32 nEntryValues, StrategyNumber strategy) + Datum *entryValues, bool *partial_matches, uint32 nEntryValues, + StrategyNumber strategy) { uint32 i, j; @@ -58,6 +59,8 @@ fillScanKey(GinState *ginstate, GinScanKey key, Datum query, key->scanEntry[i].buffer = InvalidBuffer; key->scanEntry[i].list = NULL; key->scanEntry[i].nlist = 0; + key->scanEntry[i].isPartialMatch = ( ginstate->canPartialMatch && partial_matches ) + ? partial_matches[i] : false; /* link to the equals entry in current scan key */ key->scanEntry[i].master = NULL; @@ -98,6 +101,8 @@ resetScanKeys(GinScanKey keys, uint32 nkeys) key->scanEntry[j].buffer = InvalidBuffer; key->scanEntry[j].list = NULL; key->scanEntry[j].nlist = 0; + key->scanEntry[j].partialMatch = NULL; + key->scanEntry[j].partialMatchResult = NULL; } } } @@ -122,6 +127,8 @@ freeScanKeys(GinScanKey keys, uint32 nkeys, bool removeRes) ReleaseBuffer(key->scanEntry[j].buffer); if (removeRes && key->scanEntry[j].list) pfree(key->scanEntry[j].list); + if (removeRes && key->scanEntry[j].partialMatch) + tbm_free(key->scanEntry[j].partialMatch); } if (removeRes) @@ -153,19 +160,21 @@ newScanKey(IndexScanDesc scan) { Datum *entryValues; int32 nEntryValues; + bool *partial_matches = NULL; - if (scankey[i].sk_flags & SK_ISNULL) - elog(ERROR, "Gin doesn't support NULL as scan key"); Assert(scankey[i].sk_attno == 1); - entryValues = (Datum *) DatumGetPointer( - FunctionCall3( + /* XXX can't we treat nulls by just setting isVoidRes? */ + /* This would amount to assuming that all GIN operators are strict */ + if (scankey[i].sk_flags & SK_ISNULL) + elog(ERROR, "GIN doesn't support NULL as scan key"); + + entryValues = (Datum *) DatumGetPointer(FunctionCall4( &so->ginstate.extractQueryFn, scankey[i].sk_argument, PointerGetDatum(&nEntryValues), - UInt16GetDatum(scankey[i].sk_strategy) - ) - ); + UInt16GetDatum(scankey[i].sk_strategy), + PointerGetDatum(&partial_matches))); if (nEntryValues < 0) { /* @@ -175,12 +184,16 @@ newScanKey(IndexScanDesc scan) so->isVoidRes = true; break; } + + /* + * extractQueryFn signals that everything matches + */ if (entryValues == NULL || nEntryValues == 0) /* full scan... */ continue; fillScanKey(&so->ginstate, &(so->keys[nkeys]), scankey[i].sk_argument, - entryValues, nEntryValues, scankey[i].sk_strategy); + entryValues, partial_matches, nEntryValues, scankey[i].sk_strategy); nkeys++; } @@ -253,7 +266,7 @@ ginendscan(PG_FUNCTION_ARGS) } static GinScanKey -copyScanKeys(GinScanKey keys, uint32 nkeys) +copyScanKeys(GinScanKey keys, uint32 nkeys, bool restart) { GinScanKey newkeys; uint32 i, @@ -277,6 +290,9 @@ copyScanKeys(GinScanKey keys, uint32 nkeys) newkeys[i].scanEntry[j].master = newkeys[i].scanEntry + masterN; } + + if ( restart ) + ginrestartentry( &keys[i].scanEntry[j] ); } } @@ -290,7 +306,7 @@ ginmarkpos(PG_FUNCTION_ARGS) GinScanOpaque so = (GinScanOpaque) scan->opaque; freeScanKeys(so->markPos, so->nkeys, FALSE); - so->markPos = copyScanKeys(so->keys, so->nkeys); + so->markPos = copyScanKeys(so->keys, so->nkeys, FALSE); PG_RETURN_VOID(); } @@ -302,7 +318,7 @@ ginrestrpos(PG_FUNCTION_ARGS) GinScanOpaque so = (GinScanOpaque) scan->opaque; freeScanKeys(so->keys, so->nkeys, FALSE); - so->keys = copyScanKeys(so->markPos, so->nkeys); + so->keys = copyScanKeys(so->markPos, so->nkeys, TRUE); PG_RETURN_VOID(); } diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 7da7689f826..36105e20d2d 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.14 2008/05/12 00:00:44 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.15 2008/05/16 16:31:01 tgl Exp $ *------------------------------------------------------------------------- */ @@ -41,6 +41,22 @@ initGinState(GinState *state, Relation index) fmgr_info_copy(&(state->consistentFn), index_getprocinfo(index, 1, GIN_CONSISTENT_PROC), CurrentMemoryContext); + + /* + * Check opclass capability to do partial match. + */ + if ( index_getprocid(index, 1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid ) + { + fmgr_info_copy(&(state->comparePartialFn), + index_getprocinfo(index, 1, GIN_COMPARE_PARTIAL_PROC), + CurrentMemoryContext); + + state->canPartialMatch = true; + } + else + { + state->canPartialMatch = false; + } } /* |