aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/gin
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/gin')
-rw-r--r--src/backend/access/gin/ginget.c348
-rw-r--r--src/backend/access/gin/ginscan.c42
-rw-r--r--src/backend/access/gin/ginutil.c18
3 files changed, 385 insertions, 23 deletions
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c
index 3bedcc99606..3d60d337df4 100644
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.15 2008/05/12 00:00:44 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.16 2008/05/16 16:31:01 tgl Exp $
*-------------------------------------------------------------------------
*/
@@ -18,8 +18,13 @@
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
+#include "utils/datum.h"
#include "utils/memutils.h"
+
+/*
+ * Tries to refind previously taken ItemPointer on page.
+ */
static bool
findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
{
@@ -46,8 +51,204 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
}
/*
- * Start* functions setup state of searches: find correct buffer and locks it,
- * Stop* functions unlock buffer (but don't release!)
+ * Goes to the next page if current offset is outside of bounds
+ */
+static bool
+moveRightIfItNeeded( GinBtreeData *btree, GinBtreeStack *stack )
+{
+ Page page = BufferGetPage(stack->buffer);
+
+ if ( stack->off > PageGetMaxOffsetNumber(page) )
+ {
+ /*
+ * We scanned the whole page, so we should take right page
+ */
+ stack->blkno = GinPageGetOpaque(page)->rightlink;
+
+ if ( GinPageRightMost(page) )
+ return false; /* no more pages */
+
+ LockBuffer(stack->buffer, GIN_UNLOCK);
+ stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
+ LockBuffer(stack->buffer, GIN_SHARE);
+ stack->off = FirstOffsetNumber;
+ }
+
+ return true;
+}
+
+/*
+ * Does fullscan of posting tree and saves ItemPointers
+ * in scanEntry->partialMatch TIDBitmap
+ */
+static void
+scanForItems( Relation index, GinScanEntry scanEntry, BlockNumber rootPostingTree )
+{
+ GinPostingTreeScan *gdi;
+ Buffer buffer;
+ Page page;
+ BlockNumber blkno;
+
+ gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
+
+ buffer = scanBeginPostingTree(gdi);
+ IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
+
+ freeGinBtreeStack(gdi->stack);
+ pfree(gdi);
+
+ /*
+ * Goes through all leaves
+ */
+ for(;;)
+ {
+ page = BufferGetPage(buffer);
+
+ if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber )
+ {
+ tbm_add_tuples( scanEntry->partialMatch,
+ (ItemPointer)GinDataPageGetItem(page, FirstOffsetNumber),
+ GinPageGetOpaque(page)->maxoff, false);
+ scanEntry->predictNumberResult += GinPageGetOpaque(page)->maxoff;
+ }
+
+ blkno = GinPageGetOpaque(page)->rightlink;
+ if ( GinPageRightMost(page) )
+ {
+ UnlockReleaseBuffer(buffer);
+ return; /* no more pages */
+ }
+
+ LockBuffer(buffer, GIN_UNLOCK);
+ buffer = ReleaseAndReadBuffer(buffer, index, blkno);
+ LockBuffer(buffer, GIN_SHARE);
+ }
+}
+
+/*
+ * Collects all ItemPointer into the TIDBitmap struct
+ * for entries partially matched to search entry.
+ *
+ * Returns true if done, false if it's needed to restart scan from scratch
+ */
+static bool
+computePartialMatchList( GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry scanEntry )
+{
+ Page page;
+ IndexTuple itup;
+ Datum idatum;
+ bool isnull;
+ int32 cmp;
+
+ scanEntry->partialMatch = tbm_create( work_mem * 1024L );
+
+ for(;;)
+ {
+ /*
+ * stack->off points to the interested entry, buffer is already locked
+ */
+ if ( moveRightIfItNeeded(btree, stack) == false )
+ return true;
+
+ page = BufferGetPage(stack->buffer);
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
+ idatum = index_getattr(itup, 1, btree->ginstate->tupdesc, &isnull);
+ Assert(!isnull);
+
+ /*----------
+ * Check of partial match.
+ * case cmp == 0 => match
+ * case cmp > 0 => not match and finish scan
+ * case cmp < 0 => not match and continue scan
+ *----------
+ */
+ cmp = DatumGetInt32(FunctionCall3(&btree->ginstate->comparePartialFn,
+ scanEntry->entry,
+ idatum,
+ UInt16GetDatum(scanEntry->strategy)));
+
+ if ( cmp > 0 )
+ return true;
+ else if ( cmp < 0 )
+ {
+ stack->off++;
+ continue;
+ }
+
+ if ( GinIsPostingTree(itup) )
+ {
+ BlockNumber rootPostingTree = GinGetPostingTree(itup);
+ Datum newDatum,
+ savedDatum = datumCopy (
+ idatum,
+ btree->ginstate->tupdesc->attrs[0]->attbyval,
+ btree->ginstate->tupdesc->attrs[0]->attlen
+ );
+ /*
+ * We should unlock current page (but not unpin) during
+ * tree scan to prevent deadlock with vacuum processes.
+ *
+ * We save current entry value (savedDatum) to be able to refind
+ * our tuple after re-locking
+ */
+ LockBuffer(stack->buffer, GIN_UNLOCK);
+ scanForItems( btree->index, scanEntry, rootPostingTree );
+
+ /*
+ * We lock again the entry page and while it was unlocked
+ * insert might occured, so we need to refind our position
+ */
+ LockBuffer(stack->buffer, GIN_SHARE);
+ page = BufferGetPage(stack->buffer);
+ if ( !GinPageIsLeaf(page) )
+ {
+ /*
+ * Root page becomes non-leaf while we unlock it. We
+ * will start again, this situation doesn't cause
+ * often - root can became a non-leaf only one per
+ * life of index.
+ */
+
+ return false;
+ }
+
+ for(;;)
+ {
+ if ( moveRightIfItNeeded(btree, stack) == false )
+ elog(ERROR, "lost saved point in index"); /* must not happen !!! */
+
+ page = BufferGetPage(stack->buffer);
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
+ newDatum = index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull);
+
+ if ( compareEntries(btree->ginstate, newDatum, savedDatum) == 0 )
+ {
+ /* Found! */
+ if ( btree->ginstate->tupdesc->attrs[0]->attbyval == false )
+ pfree( DatumGetPointer(savedDatum) );
+ break;
+ }
+
+ stack->off++;
+ }
+ }
+ else
+ {
+ tbm_add_tuples( scanEntry->partialMatch, GinGetPosting(itup), GinGetNPosting(itup), false);
+ scanEntry->predictNumberResult += GinGetNPosting(itup);
+ }
+
+ /*
+ * Ok, we save ItemPointers, go to the next entry
+ */
+ stack->off++;
+ }
+
+ return true;
+}
+
+/*
+ * Start* functions setup begining state of searches: finds correct buffer and pins it.
*/
static void
startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
@@ -78,10 +279,45 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
entry->offset = InvalidOffsetNumber;
entry->list = NULL;
entry->nlist = 0;
+ entry->partialMatch = NULL;
+ entry->partialMatchResult = NULL;
entry->reduceResult = FALSE;
entry->predictNumberResult = 0;
- if (btreeEntry.findItem(&btreeEntry, stackEntry))
+ if ( entry->isPartialMatch )
+ {
+ /*
+ * btreeEntry.findItem points to the first equal or greater value
+ * than needed. So we will scan further and collect all
+ * ItemPointers
+ */
+ btreeEntry.findItem(&btreeEntry, stackEntry);
+ if ( computePartialMatchList( &btreeEntry, stackEntry, entry ) == false )
+ {
+ /*
+ * GIN tree was seriously restructured, so we will
+ * cleanup all found data and rescan. See comments near
+ * 'return false' in computePartialMatchList()
+ */
+ if ( entry->partialMatch )
+ {
+ tbm_free( entry->partialMatch );
+ entry->partialMatch = NULL;
+ }
+ LockBuffer(stackEntry->buffer, GIN_UNLOCK);
+ freeGinBtreeStack(stackEntry);
+
+ startScanEntry(index, ginstate, entry);
+ return;
+ }
+
+ if ( entry->partialMatch && !tbm_is_empty(entry->partialMatch) )
+ {
+ tbm_begin_iterate(entry->partialMatch);
+ entry->isFinished = FALSE;
+ }
+ }
+ else if (btreeEntry.findItem(&btreeEntry, stackEntry))
{
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
@@ -91,6 +327,13 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
GinPostingTreeScan *gdi;
Page page;
+ /*
+ * We should unlock entry page before make deal with
+ * posting tree to prevent deadlocks with vacuum processes.
+ * Because entry is never deleted from page and posting tree is
+ * never reduced to the posting list, we can unlock page after
+ * getting BlockNumber of root of posting tree.
+ */
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
needUnlock = FALSE;
gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
@@ -111,7 +354,7 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
*/
entry->list = (ItemPointerData *) palloc( BLCKSZ );
entry->nlist = GinPageGetOpaque(page)->maxoff;
- memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
+ memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) );
LockBuffer(entry->buffer, GIN_UNLOCK);
@@ -142,7 +385,14 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key)
return;
for (i = 0; i < key->nentries; i++)
+ {
startScanEntry(index, ginstate, key->scanEntry + i);
+ /*
+ * Copy strategy number to each entry of key to
+ * use in comparePartialFn call
+ */
+ key->scanEntry[i].strategy = key->strategy;
+ }
memset(key->entryRes, TRUE, sizeof(bool) * key->nentries);
key->isFinished = FALSE;
@@ -233,12 +483,12 @@ entryGetNextItem(Relation index, GinScanEntry entry)
* Found position equal to or greater than stored
*/
entry->nlist = GinPageGetOpaque(page)->maxoff;
- memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
+ memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) );
LockBuffer(entry->buffer, GIN_UNLOCK);
- if ( !ItemPointerIsValid(&entry->curItem) ||
+ if ( !ItemPointerIsValid(&entry->curItem) ||
compareItemPointers( &entry->curItem, entry->list + entry->offset - 1 ) == 0 )
{
/*
@@ -248,7 +498,7 @@ entryGetNextItem(Relation index, GinScanEntry entry)
break;
}
-
+
/*
* Find greater than entry->curItem position, store it.
*/
@@ -275,6 +525,38 @@ entryGetItem(Relation index, GinScanEntry entry)
entry->isFinished = entry->master->isFinished;
entry->curItem = entry->master->curItem;
}
+ else if ( entry->partialMatch )
+ {
+ do
+ {
+ if ( entry->partialMatchResult == NULL || entry->offset >= entry->partialMatchResult->ntuples )
+ {
+ entry->partialMatchResult = tbm_iterate( entry->partialMatch );
+
+ if ( entry->partialMatchResult == NULL )
+ {
+ ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber);
+ entry->isFinished = TRUE;
+ break;
+ }
+ else if ( entry->partialMatchResult->ntuples < 0 )
+ {
+ /* bitmap became lossy */
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("not enough memory to store result of partial match operator" ),
+ errhint("Increase the \"work_mem\" parameter.")));
+ }
+ entry->offset = 0;
+ }
+
+ ItemPointerSet(&entry->curItem,
+ entry->partialMatchResult->blockno,
+ entry->partialMatchResult->offsets[ entry->offset ]);
+ entry->offset ++;
+
+ } while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry));
+ }
else if (!BufferIsValid(entry->buffer))
{
entry->offset++;
@@ -298,6 +580,54 @@ entryGetItem(Relation index, GinScanEntry entry)
}
/*
+ * restart from saved position. Actually it's needed only for
+ * partial match. function is called only by ginrestpos()
+ */
+void
+ginrestartentry(GinScanEntry entry)
+{
+ ItemPointerData stopItem = entry->curItem;
+ bool savedReduceResult;
+
+ if ( entry->master || entry->partialMatch == NULL )
+ return; /* entry is slave or not a partial match type*/
+
+ if ( entry->isFinished )
+ return; /* entry was finished before ginmarkpos() call */
+
+ if ( ItemPointerGetBlockNumber(&stopItem) == InvalidBlockNumber )
+ return; /* entry wasn't began before ginmarkpos() call */
+
+ /*
+ * Reset iterator
+ */
+ tbm_begin_iterate( entry->partialMatch );
+ entry->partialMatchResult = NULL;
+ entry->offset = 0;
+
+ /*
+ * Temporary reset reduceResult flag to guarantee refinding
+ * of curItem
+ */
+ savedReduceResult = entry->reduceResult;
+ entry->reduceResult = FALSE;
+
+ do
+ {
+ /*
+ * We can use null instead of index because
+ * partial match doesn't use it
+ */
+ if ( entryGetItem( NULL, entry ) == false )
+ elog(ERROR, "cannot refind scan position"); /* must not be here! */
+ } while( compareItemPointers( &stopItem, &entry->curItem ) != 0 );
+
+ Assert( entry->isFinished == FALSE );
+
+ entry->reduceResult = savedReduceResult;
+}
+
+/*
* Sets key->curItem to new found heap item pointer for one scan key
* Returns isFinished, ie TRUE means we did NOT get a new item pointer!
* Also, *keyrecheck is set true if recheck is needed for this scan key.
@@ -494,7 +824,7 @@ gingettuple(PG_FUNCTION_ARGS)
bool res;
if (dir != ForwardScanDirection)
- elog(ERROR, "Gin doesn't support other scan directions than forward");
+ elog(ERROR, "GIN doesn't support other scan directions than forward");
if (GinIsNewKey(scan))
newScanKey(scan);
diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c
index 10a528817e6..cec24fbfdbd 100644
--- a/src/backend/access/gin/ginscan.c
+++ b/src/backend/access/gin/ginscan.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.13 2008/05/12 00:00:44 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.14 2008/05/16 16:31:01 tgl Exp $
*-------------------------------------------------------------------------
*/
@@ -36,7 +36,8 @@ ginbeginscan(PG_FUNCTION_ARGS)
static void
fillScanKey(GinState *ginstate, GinScanKey key, Datum query,
- Datum *entryValues, uint32 nEntryValues, StrategyNumber strategy)
+ Datum *entryValues, bool *partial_matches, uint32 nEntryValues,
+ StrategyNumber strategy)
{
uint32 i,
j;
@@ -58,6 +59,8 @@ fillScanKey(GinState *ginstate, GinScanKey key, Datum query,
key->scanEntry[i].buffer = InvalidBuffer;
key->scanEntry[i].list = NULL;
key->scanEntry[i].nlist = 0;
+ key->scanEntry[i].isPartialMatch = ( ginstate->canPartialMatch && partial_matches )
+ ? partial_matches[i] : false;
/* link to the equals entry in current scan key */
key->scanEntry[i].master = NULL;
@@ -98,6 +101,8 @@ resetScanKeys(GinScanKey keys, uint32 nkeys)
key->scanEntry[j].buffer = InvalidBuffer;
key->scanEntry[j].list = NULL;
key->scanEntry[j].nlist = 0;
+ key->scanEntry[j].partialMatch = NULL;
+ key->scanEntry[j].partialMatchResult = NULL;
}
}
}
@@ -122,6 +127,8 @@ freeScanKeys(GinScanKey keys, uint32 nkeys, bool removeRes)
ReleaseBuffer(key->scanEntry[j].buffer);
if (removeRes && key->scanEntry[j].list)
pfree(key->scanEntry[j].list);
+ if (removeRes && key->scanEntry[j].partialMatch)
+ tbm_free(key->scanEntry[j].partialMatch);
}
if (removeRes)
@@ -153,19 +160,21 @@ newScanKey(IndexScanDesc scan)
{
Datum *entryValues;
int32 nEntryValues;
+ bool *partial_matches = NULL;
- if (scankey[i].sk_flags & SK_ISNULL)
- elog(ERROR, "Gin doesn't support NULL as scan key");
Assert(scankey[i].sk_attno == 1);
- entryValues = (Datum *) DatumGetPointer(
- FunctionCall3(
+ /* XXX can't we treat nulls by just setting isVoidRes? */
+ /* This would amount to assuming that all GIN operators are strict */
+ if (scankey[i].sk_flags & SK_ISNULL)
+ elog(ERROR, "GIN doesn't support NULL as scan key");
+
+ entryValues = (Datum *) DatumGetPointer(FunctionCall4(
&so->ginstate.extractQueryFn,
scankey[i].sk_argument,
PointerGetDatum(&nEntryValues),
- UInt16GetDatum(scankey[i].sk_strategy)
- )
- );
+ UInt16GetDatum(scankey[i].sk_strategy),
+ PointerGetDatum(&partial_matches)));
if (nEntryValues < 0)
{
/*
@@ -175,12 +184,16 @@ newScanKey(IndexScanDesc scan)
so->isVoidRes = true;
break;
}
+
+ /*
+ * extractQueryFn signals that everything matches
+ */
if (entryValues == NULL || nEntryValues == 0)
/* full scan... */
continue;
fillScanKey(&so->ginstate, &(so->keys[nkeys]), scankey[i].sk_argument,
- entryValues, nEntryValues, scankey[i].sk_strategy);
+ entryValues, partial_matches, nEntryValues, scankey[i].sk_strategy);
nkeys++;
}
@@ -253,7 +266,7 @@ ginendscan(PG_FUNCTION_ARGS)
}
static GinScanKey
-copyScanKeys(GinScanKey keys, uint32 nkeys)
+copyScanKeys(GinScanKey keys, uint32 nkeys, bool restart)
{
GinScanKey newkeys;
uint32 i,
@@ -277,6 +290,9 @@ copyScanKeys(GinScanKey keys, uint32 nkeys)
newkeys[i].scanEntry[j].master = newkeys[i].scanEntry + masterN;
}
+
+ if ( restart )
+ ginrestartentry( &keys[i].scanEntry[j] );
}
}
@@ -290,7 +306,7 @@ ginmarkpos(PG_FUNCTION_ARGS)
GinScanOpaque so = (GinScanOpaque) scan->opaque;
freeScanKeys(so->markPos, so->nkeys, FALSE);
- so->markPos = copyScanKeys(so->keys, so->nkeys);
+ so->markPos = copyScanKeys(so->keys, so->nkeys, FALSE);
PG_RETURN_VOID();
}
@@ -302,7 +318,7 @@ ginrestrpos(PG_FUNCTION_ARGS)
GinScanOpaque so = (GinScanOpaque) scan->opaque;
freeScanKeys(so->keys, so->nkeys, FALSE);
- so->keys = copyScanKeys(so->markPos, so->nkeys);
+ so->keys = copyScanKeys(so->markPos, so->nkeys, TRUE);
PG_RETURN_VOID();
}
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 7da7689f826..36105e20d2d 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.14 2008/05/12 00:00:44 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.15 2008/05/16 16:31:01 tgl Exp $
*-------------------------------------------------------------------------
*/
@@ -41,6 +41,22 @@ initGinState(GinState *state, Relation index)
fmgr_info_copy(&(state->consistentFn),
index_getprocinfo(index, 1, GIN_CONSISTENT_PROC),
CurrentMemoryContext);
+
+ /*
+ * Check opclass capability to do partial match.
+ */
+ if ( index_getprocid(index, 1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid )
+ {
+ fmgr_info_copy(&(state->comparePartialFn),
+ index_getprocinfo(index, 1, GIN_COMPARE_PARTIAL_PROC),
+ CurrentMemoryContext);
+
+ state->canPartialMatch = true;
+ }
+ else
+ {
+ state->canPartialMatch = false;
+ }
}
/*