aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/gin/ginget.c348
-rw-r--r--src/backend/access/gin/ginscan.c42
-rw-r--r--src/backend/access/gin/ginutil.c18
-rw-r--r--src/backend/optimizer/path/indxpath.c77
-rw-r--r--src/backend/tsearch/to_tsany.c34
-rw-r--r--src/backend/tsearch/ts_parse.c7
-rw-r--r--src/backend/utils/adt/tsginidx.c57
-rw-r--r--src/backend/utils/adt/tsgistidx.c13
-rw-r--r--src/backend/utils/adt/tsquery.c48
-rw-r--r--src/backend/utils/adt/tsquery_util.c7
-rw-r--r--src/backend/utils/adt/tsrank.c286
-rw-r--r--src/backend/utils/adt/tsvector.c13
-rw-r--r--src/backend/utils/adt/tsvector_op.c367
-rw-r--r--src/include/access/gin.h16
-rw-r--r--src/include/catalog/catversion.h4
-rw-r--r--src/include/catalog/pg_am.h4
-rw-r--r--src/include/catalog/pg_amproc.h5
-rw-r--r--src/include/catalog/pg_proc.h10
-rw-r--r--src/include/tsearch/ts_public.h3
-rw-r--r--src/include/tsearch/ts_type.h3
-rw-r--r--src/include/tsearch/ts_utils.h13
-rw-r--r--src/test/regress/expected/opr_sanity.out29
-rw-r--r--src/test/regress/expected/tsdicts.out10
-rw-r--r--src/test/regress/expected/tsearch.out24
-rw-r--r--src/test/regress/expected/tstypes.out128
-rw-r--r--src/test/regress/sql/opr_sanity.sql24
-rw-r--r--src/test/regress/sql/tsearch.sql3
-rw-r--r--src/test/regress/sql/tstypes.sql20
28 files changed, 1139 insertions, 474 deletions
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c
index 3bedcc99606..3d60d337df4 100644
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.15 2008/05/12 00:00:44 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.16 2008/05/16 16:31:01 tgl Exp $
*-------------------------------------------------------------------------
*/
@@ -18,8 +18,13 @@
#include "catalog/index.h"
#include "miscadmin.h"
#include "storage/bufmgr.h"
+#include "utils/datum.h"
#include "utils/memutils.h"
+
+/*
+ * Tries to refind previously taken ItemPointer on page.
+ */
static bool
findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
{
@@ -46,8 +51,204 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off)
}
/*
- * Start* functions setup state of searches: find correct buffer and locks it,
- * Stop* functions unlock buffer (but don't release!)
+ * Goes to the next page if current offset is outside of bounds
+ */
+static bool
+moveRightIfItNeeded( GinBtreeData *btree, GinBtreeStack *stack )
+{
+ Page page = BufferGetPage(stack->buffer);
+
+ if ( stack->off > PageGetMaxOffsetNumber(page) )
+ {
+ /*
+ * We scanned the whole page, so we should take right page
+ */
+ stack->blkno = GinPageGetOpaque(page)->rightlink;
+
+ if ( GinPageRightMost(page) )
+ return false; /* no more pages */
+
+ LockBuffer(stack->buffer, GIN_UNLOCK);
+ stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno);
+ LockBuffer(stack->buffer, GIN_SHARE);
+ stack->off = FirstOffsetNumber;
+ }
+
+ return true;
+}
+
+/*
+ * Does fullscan of posting tree and saves ItemPointers
+ * in scanEntry->partialMatch TIDBitmap
+ */
+static void
+scanForItems( Relation index, GinScanEntry scanEntry, BlockNumber rootPostingTree )
+{
+ GinPostingTreeScan *gdi;
+ Buffer buffer;
+ Page page;
+ BlockNumber blkno;
+
+ gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
+
+ buffer = scanBeginPostingTree(gdi);
+ IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */
+
+ freeGinBtreeStack(gdi->stack);
+ pfree(gdi);
+
+ /*
+ * Goes through all leaves
+ */
+ for(;;)
+ {
+ page = BufferGetPage(buffer);
+
+ if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber )
+ {
+ tbm_add_tuples( scanEntry->partialMatch,
+ (ItemPointer)GinDataPageGetItem(page, FirstOffsetNumber),
+ GinPageGetOpaque(page)->maxoff, false);
+ scanEntry->predictNumberResult += GinPageGetOpaque(page)->maxoff;
+ }
+
+ blkno = GinPageGetOpaque(page)->rightlink;
+ if ( GinPageRightMost(page) )
+ {
+ UnlockReleaseBuffer(buffer);
+ return; /* no more pages */
+ }
+
+ LockBuffer(buffer, GIN_UNLOCK);
+ buffer = ReleaseAndReadBuffer(buffer, index, blkno);
+ LockBuffer(buffer, GIN_SHARE);
+ }
+}
+
+/*
+ * Collects all ItemPointer into the TIDBitmap struct
+ * for entries partially matched to search entry.
+ *
+ * Returns true if done, false if it's needed to restart scan from scratch
+ */
+static bool
+computePartialMatchList( GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry scanEntry )
+{
+ Page page;
+ IndexTuple itup;
+ Datum idatum;
+ bool isnull;
+ int32 cmp;
+
+ scanEntry->partialMatch = tbm_create( work_mem * 1024L );
+
+ for(;;)
+ {
+ /*
+ * stack->off points to the interested entry, buffer is already locked
+ */
+ if ( moveRightIfItNeeded(btree, stack) == false )
+ return true;
+
+ page = BufferGetPage(stack->buffer);
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
+ idatum = index_getattr(itup, 1, btree->ginstate->tupdesc, &isnull);
+ Assert(!isnull);
+
+ /*----------
+ * Check of partial match.
+ * case cmp == 0 => match
+ * case cmp > 0 => not match and finish scan
+ * case cmp < 0 => not match and continue scan
+ *----------
+ */
+ cmp = DatumGetInt32(FunctionCall3(&btree->ginstate->comparePartialFn,
+ scanEntry->entry,
+ idatum,
+ UInt16GetDatum(scanEntry->strategy)));
+
+ if ( cmp > 0 )
+ return true;
+ else if ( cmp < 0 )
+ {
+ stack->off++;
+ continue;
+ }
+
+ if ( GinIsPostingTree(itup) )
+ {
+ BlockNumber rootPostingTree = GinGetPostingTree(itup);
+ Datum newDatum,
+ savedDatum = datumCopy (
+ idatum,
+ btree->ginstate->tupdesc->attrs[0]->attbyval,
+ btree->ginstate->tupdesc->attrs[0]->attlen
+ );
+ /*
+ * We should unlock current page (but not unpin) during
+ * tree scan to prevent deadlock with vacuum processes.
+ *
+ * We save current entry value (savedDatum) to be able to refind
+ * our tuple after re-locking
+ */
+ LockBuffer(stack->buffer, GIN_UNLOCK);
+ scanForItems( btree->index, scanEntry, rootPostingTree );
+
+ /*
+ * We lock again the entry page and while it was unlocked
+ * insert might occured, so we need to refind our position
+ */
+ LockBuffer(stack->buffer, GIN_SHARE);
+ page = BufferGetPage(stack->buffer);
+ if ( !GinPageIsLeaf(page) )
+ {
+ /*
+ * Root page becomes non-leaf while we unlock it. We
+ * will start again, this situation doesn't cause
+ * often - root can became a non-leaf only one per
+ * life of index.
+ */
+
+ return false;
+ }
+
+ for(;;)
+ {
+ if ( moveRightIfItNeeded(btree, stack) == false )
+ elog(ERROR, "lost saved point in index"); /* must not happen !!! */
+
+ page = BufferGetPage(stack->buffer);
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off));
+ newDatum = index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull);
+
+ if ( compareEntries(btree->ginstate, newDatum, savedDatum) == 0 )
+ {
+ /* Found! */
+ if ( btree->ginstate->tupdesc->attrs[0]->attbyval == false )
+ pfree( DatumGetPointer(savedDatum) );
+ break;
+ }
+
+ stack->off++;
+ }
+ }
+ else
+ {
+ tbm_add_tuples( scanEntry->partialMatch, GinGetPosting(itup), GinGetNPosting(itup), false);
+ scanEntry->predictNumberResult += GinGetNPosting(itup);
+ }
+
+ /*
+ * Ok, we save ItemPointers, go to the next entry
+ */
+ stack->off++;
+ }
+
+ return true;
+}
+
+/*
+ * Start* functions setup begining state of searches: finds correct buffer and pins it.
*/
static void
startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
@@ -78,10 +279,45 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
entry->offset = InvalidOffsetNumber;
entry->list = NULL;
entry->nlist = 0;
+ entry->partialMatch = NULL;
+ entry->partialMatchResult = NULL;
entry->reduceResult = FALSE;
entry->predictNumberResult = 0;
- if (btreeEntry.findItem(&btreeEntry, stackEntry))
+ if ( entry->isPartialMatch )
+ {
+ /*
+ * btreeEntry.findItem points to the first equal or greater value
+ * than needed. So we will scan further and collect all
+ * ItemPointers
+ */
+ btreeEntry.findItem(&btreeEntry, stackEntry);
+ if ( computePartialMatchList( &btreeEntry, stackEntry, entry ) == false )
+ {
+ /*
+ * GIN tree was seriously restructured, so we will
+ * cleanup all found data and rescan. See comments near
+ * 'return false' in computePartialMatchList()
+ */
+ if ( entry->partialMatch )
+ {
+ tbm_free( entry->partialMatch );
+ entry->partialMatch = NULL;
+ }
+ LockBuffer(stackEntry->buffer, GIN_UNLOCK);
+ freeGinBtreeStack(stackEntry);
+
+ startScanEntry(index, ginstate, entry);
+ return;
+ }
+
+ if ( entry->partialMatch && !tbm_is_empty(entry->partialMatch) )
+ {
+ tbm_begin_iterate(entry->partialMatch);
+ entry->isFinished = FALSE;
+ }
+ }
+ else if (btreeEntry.findItem(&btreeEntry, stackEntry))
{
IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off));
@@ -91,6 +327,13 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
GinPostingTreeScan *gdi;
Page page;
+ /*
+ * We should unlock entry page before make deal with
+ * posting tree to prevent deadlocks with vacuum processes.
+ * Because entry is never deleted from page and posting tree is
+ * never reduced to the posting list, we can unlock page after
+ * getting BlockNumber of root of posting tree.
+ */
LockBuffer(stackEntry->buffer, GIN_UNLOCK);
needUnlock = FALSE;
gdi = prepareScanPostingTree(index, rootPostingTree, TRUE);
@@ -111,7 +354,7 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
*/
entry->list = (ItemPointerData *) palloc( BLCKSZ );
entry->nlist = GinPageGetOpaque(page)->maxoff;
- memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
+ memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) );
LockBuffer(entry->buffer, GIN_UNLOCK);
@@ -142,7 +385,14 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key)
return;
for (i = 0; i < key->nentries; i++)
+ {
startScanEntry(index, ginstate, key->scanEntry + i);
+ /*
+ * Copy strategy number to each entry of key to
+ * use in comparePartialFn call
+ */
+ key->scanEntry[i].strategy = key->strategy;
+ }
memset(key->entryRes, TRUE, sizeof(bool) * key->nentries);
key->isFinished = FALSE;
@@ -233,12 +483,12 @@ entryGetNextItem(Relation index, GinScanEntry entry)
* Found position equal to or greater than stored
*/
entry->nlist = GinPageGetOpaque(page)->maxoff;
- memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
+ memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber),
GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) );
LockBuffer(entry->buffer, GIN_UNLOCK);
- if ( !ItemPointerIsValid(&entry->curItem) ||
+ if ( !ItemPointerIsValid(&entry->curItem) ||
compareItemPointers( &entry->curItem, entry->list + entry->offset - 1 ) == 0 )
{
/*
@@ -248,7 +498,7 @@ entryGetNextItem(Relation index, GinScanEntry entry)
break;
}
-
+
/*
* Find greater than entry->curItem position, store it.
*/
@@ -275,6 +525,38 @@ entryGetItem(Relation index, GinScanEntry entry)
entry->isFinished = entry->master->isFinished;
entry->curItem = entry->master->curItem;
}
+ else if ( entry->partialMatch )
+ {
+ do
+ {
+ if ( entry->partialMatchResult == NULL || entry->offset >= entry->partialMatchResult->ntuples )
+ {
+ entry->partialMatchResult = tbm_iterate( entry->partialMatch );
+
+ if ( entry->partialMatchResult == NULL )
+ {
+ ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber);
+ entry->isFinished = TRUE;
+ break;
+ }
+ else if ( entry->partialMatchResult->ntuples < 0 )
+ {
+ /* bitmap became lossy */
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("not enough memory to store result of partial match operator" ),
+ errhint("Increase the \"work_mem\" parameter.")));
+ }
+ entry->offset = 0;
+ }
+
+ ItemPointerSet(&entry->curItem,
+ entry->partialMatchResult->blockno,
+ entry->partialMatchResult->offsets[ entry->offset ]);
+ entry->offset ++;
+
+ } while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry));
+ }
else if (!BufferIsValid(entry->buffer))
{
entry->offset++;
@@ -298,6 +580,54 @@ entryGetItem(Relation index, GinScanEntry entry)
}
/*
+ * restart from saved position. Actually it's needed only for
+ * partial match. function is called only by ginrestpos()
+ */
+void
+ginrestartentry(GinScanEntry entry)
+{
+ ItemPointerData stopItem = entry->curItem;
+ bool savedReduceResult;
+
+ if ( entry->master || entry->partialMatch == NULL )
+ return; /* entry is slave or not a partial match type*/
+
+ if ( entry->isFinished )
+ return; /* entry was finished before ginmarkpos() call */
+
+ if ( ItemPointerGetBlockNumber(&stopItem) == InvalidBlockNumber )
+ return; /* entry wasn't began before ginmarkpos() call */
+
+ /*
+ * Reset iterator
+ */
+ tbm_begin_iterate( entry->partialMatch );
+ entry->partialMatchResult = NULL;
+ entry->offset = 0;
+
+ /*
+ * Temporary reset reduceResult flag to guarantee refinding
+ * of curItem
+ */
+ savedReduceResult = entry->reduceResult;
+ entry->reduceResult = FALSE;
+
+ do
+ {
+ /*
+ * We can use null instead of index because
+ * partial match doesn't use it
+ */
+ if ( entryGetItem( NULL, entry ) == false )
+ elog(ERROR, "cannot refind scan position"); /* must not be here! */
+ } while( compareItemPointers( &stopItem, &entry->curItem ) != 0 );
+
+ Assert( entry->isFinished == FALSE );
+
+ entry->reduceResult = savedReduceResult;
+}
+
+/*
* Sets key->curItem to new found heap item pointer for one scan key
* Returns isFinished, ie TRUE means we did NOT get a new item pointer!
* Also, *keyrecheck is set true if recheck is needed for this scan key.
@@ -494,7 +824,7 @@ gingettuple(PG_FUNCTION_ARGS)
bool res;
if (dir != ForwardScanDirection)
- elog(ERROR, "Gin doesn't support other scan directions than forward");
+ elog(ERROR, "GIN doesn't support other scan directions than forward");
if (GinIsNewKey(scan))
newScanKey(scan);
diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c
index 10a528817e6..cec24fbfdbd 100644
--- a/src/backend/access/gin/ginscan.c
+++ b/src/backend/access/gin/ginscan.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.13 2008/05/12 00:00:44 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.14 2008/05/16 16:31:01 tgl Exp $
*-------------------------------------------------------------------------
*/
@@ -36,7 +36,8 @@ ginbeginscan(PG_FUNCTION_ARGS)
static void
fillScanKey(GinState *ginstate, GinScanKey key, Datum query,
- Datum *entryValues, uint32 nEntryValues, StrategyNumber strategy)
+ Datum *entryValues, bool *partial_matches, uint32 nEntryValues,
+ StrategyNumber strategy)
{
uint32 i,
j;
@@ -58,6 +59,8 @@ fillScanKey(GinState *ginstate, GinScanKey key, Datum query,
key->scanEntry[i].buffer = InvalidBuffer;
key->scanEntry[i].list = NULL;
key->scanEntry[i].nlist = 0;
+ key->scanEntry[i].isPartialMatch = ( ginstate->canPartialMatch && partial_matches )
+ ? partial_matches[i] : false;
/* link to the equals entry in current scan key */
key->scanEntry[i].master = NULL;
@@ -98,6 +101,8 @@ resetScanKeys(GinScanKey keys, uint32 nkeys)
key->scanEntry[j].buffer = InvalidBuffer;
key->scanEntry[j].list = NULL;
key->scanEntry[j].nlist = 0;
+ key->scanEntry[j].partialMatch = NULL;
+ key->scanEntry[j].partialMatchResult = NULL;
}
}
}
@@ -122,6 +127,8 @@ freeScanKeys(GinScanKey keys, uint32 nkeys, bool removeRes)
ReleaseBuffer(key->scanEntry[j].buffer);
if (removeRes && key->scanEntry[j].list)
pfree(key->scanEntry[j].list);
+ if (removeRes && key->scanEntry[j].partialMatch)
+ tbm_free(key->scanEntry[j].partialMatch);
}
if (removeRes)
@@ -153,19 +160,21 @@ newScanKey(IndexScanDesc scan)
{
Datum *entryValues;
int32 nEntryValues;
+ bool *partial_matches = NULL;
- if (scankey[i].sk_flags & SK_ISNULL)
- elog(ERROR, "Gin doesn't support NULL as scan key");
Assert(scankey[i].sk_attno == 1);
- entryValues = (Datum *) DatumGetPointer(
- FunctionCall3(
+ /* XXX can't we treat nulls by just setting isVoidRes? */
+ /* This would amount to assuming that all GIN operators are strict */
+ if (scankey[i].sk_flags & SK_ISNULL)
+ elog(ERROR, "GIN doesn't support NULL as scan key");
+
+ entryValues = (Datum *) DatumGetPointer(FunctionCall4(
&so->ginstate.extractQueryFn,
scankey[i].sk_argument,
PointerGetDatum(&nEntryValues),
- UInt16GetDatum(scankey[i].sk_strategy)
- )
- );
+ UInt16GetDatum(scankey[i].sk_strategy),
+ PointerGetDatum(&partial_matches)));
if (nEntryValues < 0)
{
/*
@@ -175,12 +184,16 @@ newScanKey(IndexScanDesc scan)
so->isVoidRes = true;
break;
}
+
+ /*
+ * extractQueryFn signals that everything matches
+ */
if (entryValues == NULL || nEntryValues == 0)
/* full scan... */
continue;
fillScanKey(&so->ginstate, &(so->keys[nkeys]), scankey[i].sk_argument,
- entryValues, nEntryValues, scankey[i].sk_strategy);
+ entryValues, partial_matches, nEntryValues, scankey[i].sk_strategy);
nkeys++;
}
@@ -253,7 +266,7 @@ ginendscan(PG_FUNCTION_ARGS)
}
static GinScanKey
-copyScanKeys(GinScanKey keys, uint32 nkeys)
+copyScanKeys(GinScanKey keys, uint32 nkeys, bool restart)
{
GinScanKey newkeys;
uint32 i,
@@ -277,6 +290,9 @@ copyScanKeys(GinScanKey keys, uint32 nkeys)
newkeys[i].scanEntry[j].master = newkeys[i].scanEntry + masterN;
}
+
+ if ( restart )
+ ginrestartentry( &keys[i].scanEntry[j] );
}
}
@@ -290,7 +306,7 @@ ginmarkpos(PG_FUNCTION_ARGS)
GinScanOpaque so = (GinScanOpaque) scan->opaque;
freeScanKeys(so->markPos, so->nkeys, FALSE);
- so->markPos = copyScanKeys(so->keys, so->nkeys);
+ so->markPos = copyScanKeys(so->keys, so->nkeys, FALSE);
PG_RETURN_VOID();
}
@@ -302,7 +318,7 @@ ginrestrpos(PG_FUNCTION_ARGS)
GinScanOpaque so = (GinScanOpaque) scan->opaque;
freeScanKeys(so->keys, so->nkeys, FALSE);
- so->keys = copyScanKeys(so->markPos, so->nkeys);
+ so->keys = copyScanKeys(so->markPos, so->nkeys, TRUE);
PG_RETURN_VOID();
}
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 7da7689f826..36105e20d2d 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.14 2008/05/12 00:00:44 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.15 2008/05/16 16:31:01 tgl Exp $
*-------------------------------------------------------------------------
*/
@@ -41,6 +41,22 @@ initGinState(GinState *state, Relation index)
fmgr_info_copy(&(state->consistentFn),
index_getprocinfo(index, 1, GIN_CONSISTENT_PROC),
CurrentMemoryContext);
+
+ /*
+ * Check opclass capability to do partial match.
+ */
+ if ( index_getprocid(index, 1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid )
+ {
+ fmgr_info_copy(&(state->comparePartialFn),
+ index_getprocinfo(index, 1, GIN_COMPARE_PARTIAL_PROC),
+ CurrentMemoryContext);
+
+ state->canPartialMatch = true;
+ }
+ else
+ {
+ state->canPartialMatch = false;
+ }
}
/*
diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c
index 908dbc481f2..4fc7c536548 100644
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.229 2008/04/13 20:51:20 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.230 2008/05/16 16:31:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -2364,7 +2364,10 @@ expand_boolean_index_clause(Node *clause,
* expand_indexqual_opclause --- expand a single indexqual condition
* that is an operator clause
*
- * The input is a single RestrictInfo, the output a list of RestrictInfos
+ * The input is a single RestrictInfo, the output a list of RestrictInfos.
+ *
+ * In the base case this is just list_make1(), but we have to be prepared to
+ * expand special cases that were accepted by match_special_index_operator().
*/
static List *
expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily)
@@ -2379,63 +2382,77 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily)
Const *prefix = NULL;
Const *rest = NULL;
Pattern_Prefix_Status pstatus;
- List *result;
+ /*
+ * LIKE and regex operators are not members of any btree index opfamily,
+ * but they can be members of opfamilies for more exotic index types such
+ * as GIN. Therefore, we should only do expansion if the operator is
+ * actually not in the opfamily. But checking that requires a syscache
+ * lookup, so it's best to first see if the operator is one we are
+ * interested in.
+ */
switch (expr_op)
{
- /*
- * LIKE and regex operators are not members of any index opfamily,
- * so if we find one in an indexqual list we can assume that it
- * was accepted by match_special_index_operator().
- */
case OID_TEXT_LIKE_OP:
case OID_BPCHAR_LIKE_OP:
case OID_NAME_LIKE_OP:
case OID_BYTEA_LIKE_OP:
- pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like,
- &prefix, &rest);
- result = prefix_quals(leftop, opfamily, prefix, pstatus);
+ if (!op_in_opfamily(expr_op, opfamily))
+ {
+ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like,
+ &prefix, &rest);
+ return prefix_quals(leftop, opfamily, prefix, pstatus);
+ }
break;
case OID_TEXT_ICLIKE_OP:
case OID_BPCHAR_ICLIKE_OP:
case OID_NAME_ICLIKE_OP:
- /* the right-hand const is type text for all of these */
- pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC,
- &prefix, &rest);
- result = prefix_quals(leftop, opfamily, prefix, pstatus);
+ if (!op_in_opfamily(expr_op, opfamily))
+ {
+ /* the right-hand const is type text for all of these */
+ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC,
+ &prefix, &rest);
+ return prefix_quals(leftop, opfamily, prefix, pstatus);
+ }
break;
case OID_TEXT_REGEXEQ_OP:
case OID_BPCHAR_REGEXEQ_OP:
case OID_NAME_REGEXEQ_OP:
- /* the right-hand const is type text for all of these */
- pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex,
- &prefix, &rest);
- result = prefix_quals(leftop, opfamily, prefix, pstatus);
+ if (!op_in_opfamily(expr_op, opfamily))
+ {
+ /* the right-hand const is type text for all of these */
+ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex,
+ &prefix, &rest);
+ return prefix_quals(leftop, opfamily, prefix, pstatus);
+ }
break;
case OID_TEXT_ICREGEXEQ_OP:
case OID_BPCHAR_ICREGEXEQ_OP:
case OID_NAME_ICREGEXEQ_OP:
- /* the right-hand const is type text for all of these */
- pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
- &prefix, &rest);
- result = prefix_quals(leftop, opfamily, prefix, pstatus);
+ if (!op_in_opfamily(expr_op, opfamily))
+ {
+ /* the right-hand const is type text for all of these */
+ pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC,
+ &prefix, &rest);
+ return prefix_quals(leftop, opfamily, prefix, pstatus);
+ }
break;
case OID_INET_SUB_OP:
case OID_INET_SUBEQ_OP:
- result = network_prefix_quals(leftop, expr_op, opfamily,
- patt->constvalue);
- break;
-
- default:
- result = list_make1(rinfo);
+ if (!op_in_opfamily(expr_op, opfamily))
+ {
+ return network_prefix_quals(leftop, expr_op, opfamily,
+ patt->constvalue);
+ }
break;
}
- return result;
+ /* Default case: just make a list of the unmodified indexqual */
+ return list_make1(rinfo);
}
/*
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index a217ff0c5bc..0e00252daae 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.11 2008/03/25 22:42:43 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.12 2008/05/16 16:31:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -32,23 +32,22 @@ get_current_ts_config(PG_FUNCTION_ARGS)
static int
compareWORD(const void *a, const void *b)
{
- if (((ParsedWord *) a)->len == ((ParsedWord *) b)->len)
+ int res;
+
+ res = tsCompareString(
+ ((ParsedWord *) a)->word, ((ParsedWord *) a)->len,
+ ((ParsedWord *) b)->word, ((ParsedWord *) b)->len,
+ false );
+
+ if (res == 0)
{
- int res = strncmp(
- ((ParsedWord *) a)->word,
- ((ParsedWord *) b)->word,
- ((ParsedWord *) b)->len);
+ if (((ParsedWord *) a)->pos.pos == ((ParsedWord *) b)->pos.pos)
+ return 0;
- if (res == 0)
- {
- if (((ParsedWord *) a)->pos.pos == ((ParsedWord *) b)->pos.pos)
- return 0;
-
- return (((ParsedWord *) a)->pos.pos > ((ParsedWord *) b)->pos.pos) ? 1 : -1;
- }
- return res;
+ res = (((ParsedWord *) a)->pos.pos > ((ParsedWord *) b)->pos.pos) ? 1 : -1;
}
- return (((ParsedWord *) a)->len > ((ParsedWord *) b)->len) ? 1 : -1;
+
+ return res;
}
static int
@@ -268,7 +267,7 @@ to_tsvector(PG_FUNCTION_ARGS)
* and different variants are ORred together.
*/
static void
-pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int2 weight)
+pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix)
{
int4 count = 0;
ParsedText prs;
@@ -302,7 +301,8 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
{
- pushValue(state, prs.words[count].word, prs.words[count].len, weight);
+ pushValue(state, prs.words[count].word, prs.words[count].len, weight,
+ ( (prs.words[count].flags & TSL_PREFIX) || prefix ) ? true : false );
pfree(prs.words[count].word);
if (cnt)
pushOperator(state, OP_AND);
diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c
index 22862bdb806..0634f54a71b 100644
--- a/src/backend/tsearch/ts_parse.c
+++ b/src/backend/tsearch/ts_parse.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.7 2008/01/01 19:45:52 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.8 2008/05/16 16:31:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -415,6 +415,7 @@ parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen)
prs->words[prs->curwords].len = strlen(ptr->lexeme);
prs->words[prs->curwords].word = ptr->lexeme;
prs->words[prs->curwords].nvariant = ptr->nvariant;
+ prs->words[prs->curwords].flags = ptr->flags & TSL_PREFIX;
prs->words[prs->curwords].alen = 0;
prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos);
ptr++;
@@ -463,8 +464,8 @@ hlfinditem(HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
for (i = 0; i < query->size; i++)
{
if (item->type == QI_VAL &&
- item->operand.length == buflen &&
- strncmp(GETOPERAND(query) + item->operand.distance, buf, buflen) == 0)
+ tsCompareString( GETOPERAND(query) + item->operand.distance, item->operand.length,
+ buf, buflen, item->operand.prefix ) == 0 )
{
if (word->item)
{
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
index 55518834ae9..a09c92eebea 100644
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.11 2008/04/14 17:05:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.12 2008/05/16 16:31:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -20,6 +20,46 @@
Datum
+gin_cmp_tslexeme(PG_FUNCTION_ARGS)
+{
+ text *a = PG_GETARG_TEXT_P(0);
+ text *b = PG_GETARG_TEXT_P(1);
+ int cmp;
+
+ cmp = tsCompareString(
+ VARDATA(a), VARSIZE(a) - VARHDRSZ,
+ VARDATA(b), VARSIZE(b) - VARHDRSZ,
+ false );
+
+ PG_FREE_IF_COPY(a,0);
+ PG_FREE_IF_COPY(b,1);
+ PG_RETURN_INT32( cmp );
+}
+
+Datum
+gin_cmp_prefix(PG_FUNCTION_ARGS)
+{
+ text *a = PG_GETARG_TEXT_P(0);
+ text *b = PG_GETARG_TEXT_P(1);
+#ifdef NOT_USED
+ StrategyNumber strategy = PG_GETARG_UINT16(2);
+#endif
+ int cmp;
+
+ cmp = tsCompareString(
+ VARDATA(a), VARSIZE(a) - VARHDRSZ,
+ VARDATA(b), VARSIZE(b) - VARHDRSZ,
+ true );
+
+ if ( cmp < 0 )
+ cmp = 1; /* prevent continue scan */
+
+ PG_FREE_IF_COPY(a,0);
+ PG_FREE_IF_COPY(b,1);
+ PG_RETURN_INT32( cmp );
+}
+
+Datum
gin_extract_tsvector(PG_FUNCTION_ARGS)
{
TSVector vector = PG_GETARG_TSVECTOR(0);
@@ -55,7 +95,9 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
TSQuery query = PG_GETARG_TSQUERY(0);
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
/* StrategyNumber strategy = PG_GETARG_UINT16(2); */
+ bool **ptr_partialmatch = (bool**) PG_GETARG_POINTER(3);
Datum *entries = NULL;
+ bool *partialmatch;
*nentries = 0;
@@ -65,12 +107,14 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
j = 0,
len;
QueryItem *item;
+ bool use_fullscan=false;
item = clean_NOT(GETQUERY(query), &len);
if (!item)
- ereport(ERROR,
- (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
- errmsg("query requires full scan, which is not supported by GIN indexes")));
+ {
+ use_fullscan = true;
+ *nentries = 1;
+ }
item = GETQUERY(query);
@@ -79,6 +123,7 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
(*nentries)++;
entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
+ partialmatch = *ptr_partialmatch = (bool*) palloc(sizeof(bool) * (*nentries));
for (i = 0; i < query->size; i++)
if (item[i].type == QI_VAL)
@@ -88,8 +133,12 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
val->length);
+ partialmatch[j] = val->prefix;
entries[j++] = PointerGetDatum(txt);
}
+
+ if ( use_fullscan )
+ entries[j++] = PointerGetDatum(cstring_to_text_with_len("", 0));
}
else
*nentries = -1; /* nothing can be found */
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index ecbac7b40f2..b18f7131545 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.8 2008/04/14 17:05:33 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.9 2008/05/16 16:31:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -307,6 +307,12 @@ checkcondition_arr(void *checkval, QueryOperand *val)
/* Loop invariant: StopLow <= val < StopHigh */
+ /*
+ * we are not able to find a a prefix by hash value
+ */
+ if ( val->prefix )
+ return true;
+
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
@@ -324,6 +330,11 @@ checkcondition_arr(void *checkval, QueryOperand *val)
static bool
checkcondition_bit(void *checkval, QueryOperand *val)
{
+ /*
+ * we are not able to find a a prefix in signature tree
+ */
+ if ( val->prefix )
+ return true;
return GETBIT(checkval, HASHVAL(val->valcrc));
}
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index e4f1beba905..9a890d2ae68 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.17 2008/04/11 22:52:05 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.18 2008/05/16 16:31:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -56,12 +56,14 @@ struct TSQueryParserStateData
#define WAITSINGLEOPERAND 4
/*
- * subroutine to parse the weight part, like ':1AB' of a query.
+ * subroutine to parse the modifiers (weight and prefix flag currently)
+ * part, like ':1AB' of a query.
*/
static char *
-get_weight(char *buf, int16 *weight)
+get_modifiers(char *buf, int16 *weight, bool *prefix)
{
*weight = 0;
+ *prefix = false;
if (!t_iseq(buf, ':'))
return buf;
@@ -87,6 +89,9 @@ get_weight(char *buf, int16 *weight)
case 'D':
*weight |= 1;
break;
+ case '*':
+ *prefix = true;
+ break;
default:
return buf;
}
@@ -118,8 +123,11 @@ typedef enum
static ts_tokentype
gettoken_query(TSQueryParserState state,
int8 *operator,
- int *lenval, char **strval, int16 *weight)
+ int *lenval, char **strval, int16 *weight, bool *prefix)
{
+ *weight = 0;
+ *prefix = false;
+
while (1)
{
switch (state->state)
@@ -157,7 +165,7 @@ gettoken_query(TSQueryParserState state,
reset_tsvector_parser(state->valstate, state->buf);
if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
{
- state->buf = get_weight(state->buf, weight);
+ state->buf = get_modifiers(state->buf, weight, prefix);
state->state = WAITOPERATOR;
return PT_VAL;
}
@@ -232,7 +240,7 @@ pushOperator(TSQueryParserState state, int8 oper)
}
static void
-pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight)
+pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
{
QueryOperand *tmp;
@@ -250,6 +258,7 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
tmp->type = QI_VAL;
tmp->weight = weight;
+ tmp->prefix = prefix;
tmp->valcrc = (int32) valcrc;
tmp->length = lenval;
tmp->distance = distance;
@@ -264,7 +273,7 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
* of the string.
*/
void
-pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
+pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix)
{
pg_crc32 valcrc;
@@ -277,7 +286,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
INIT_CRC32(valcrc);
COMP_CRC32(valcrc, strval, lenval);
FIN_CRC32(valcrc);
- pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight);
+ pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
/* append the value string to state.op, enlarging buffer if needed first */
while (state->curop - state->op + lenval + 1 >= state->lenop)
@@ -330,16 +339,17 @@ makepol(TSQueryParserState state,
int8 opstack[STACKDEPTH];
int lenstack = 0;
int16 weight = 0;
+ bool prefix;
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
- while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight)) != PT_END)
+ while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
{
switch (type)
{
case PT_VAL:
- pushval(opaque, state, strval, lenval, weight);
+ pushval(opaque, state, strval, lenval, weight, prefix);
while (lenstack && (opstack[lenstack - 1] == OP_AND ||
opstack[lenstack - 1] == OP_NOT))
{
@@ -549,9 +559,9 @@ parse_tsquery(char *buf,
static void
pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
- int16 weight)
+ int16 weight, bool prefix)
{
- pushValue(state, strval, lenval, weight);
+ pushValue(state, strval, lenval, weight, prefix);
}
/*
@@ -605,7 +615,7 @@ infix(INFIX *in, bool first)
char *op = in->op + curpol->distance;
int clen;
- RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
+ RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
*(in->cur) = '\'';
in->cur++;
while (*op)
@@ -628,10 +638,15 @@ infix(INFIX *in, bool first)
}
*(in->cur) = '\'';
in->cur++;
- if (curpol->weight)
+ if (curpol->weight || curpol->prefix)
{
*(in->cur) = ':';
in->cur++;
+ if ( curpol->prefix )
+ {
+ *(in->cur) = '*';
+ in->cur++;
+ }
if (curpol->weight & (1 << 3))
{
*(in->cur) = 'A';
@@ -769,6 +784,7 @@ tsqueryout(PG_FUNCTION_ARGS)
* uint8 type, QI_VAL
* uint8 weight
* operand text in client encoding, null-terminated
+ * uint8 prefix
*
* For each operator:
* uint8 type, QI_OPR
@@ -793,6 +809,7 @@ tsquerysend(PG_FUNCTION_ARGS)
{
case QI_VAL:
pq_sendint(&buf, item->operand.weight, sizeof(uint8));
+ pq_sendint(&buf, item->operand.prefix, sizeof(uint8));
pq_sendstring(&buf, GETOPERAND(query) + item->operand.distance);
break;
case QI_OPR:
@@ -844,10 +861,12 @@ tsqueryrecv(PG_FUNCTION_ARGS)
{
size_t val_len; /* length after recoding to server encoding */
uint8 weight;
+ uint8 prefix;
const char *val;
pg_crc32 valcrc;
weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
+ prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
val = pq_getmsgstring(buf);
val_len = strlen(val);
@@ -869,6 +888,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
FIN_CRC32(valcrc);
item->operand.weight = weight;
+ item->operand.prefix = (prefix) ? true : false;
item->operand.valcrc = (int32) valcrc;
item->operand.length = val_len;
item->operand.distance = datalen;
diff --git a/src/backend/utils/adt/tsquery_util.c b/src/backend/utils/adt/tsquery_util.c
index b81835c9692..4cc1a2a21eb 100644
--- a/src/backend/utils/adt/tsquery_util.c
+++ b/src/backend/utils/adt/tsquery_util.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.8 2008/01/01 19:45:53 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.9 2008/05/16 16:31:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -125,10 +125,7 @@ QTNodeCompare(QTNode *an, QTNode *bn)
return (ao->valcrc > bo->valcrc) ? -1 : 1;
}
- if (ao->length == bo->length)
- return strncmp(an->word, bn->word, ao->length);
- else
- return (ao->length > bo->length) ? -1 : 1;
+ return tsCompareString( an->word, ao->length, bn->word, bo->length, false);
}
}
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
index 065c94d2097..d23e05e9939 100644
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.12 2008/01/01 19:45:53 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.13 2008/05/16 16:31:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -71,45 +71,60 @@ cnt_length(TSVector t)
return len;
}
-static int
-WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item)
-{
- if (ptr->len == item->length)
- return strncmp(
- eval + ptr->pos,
- qval + item->distance,
- item->length);
- return (ptr->len > item->length) ? 1 : -1;
-}
+#define WordECompareQueryItem(e,q,p,i,m) \
+ tsCompareString((q) + (i)->distance, (i)->length, \
+ (e) + (p)->pos, (p)->len, (m))
+
/*
- * Returns a pointer to a WordEntry corresponding 'item' from tsvector 't'. 'q'
- * is the TSQuery containing 'item'. Returns NULL if not found.
+ * Returns a pointer to a WordEntry's array corresponding to 'item' from
+ * tsvector 't'. 'q' is the TSQuery containing 'item'.
+ * Returns NULL if not found.
*/
static WordEntry *
-find_wordentry(TSVector t, TSQuery q, QueryOperand *item)
+find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
{
WordEntry *StopLow = ARRPTR(t);
WordEntry *StopHigh = (WordEntry *) STRPTR(t);
- WordEntry *StopMiddle;
+ WordEntry *StopMiddle = StopHigh;
int difference;
- /* Loop invariant: StopLow <= item < StopHigh */
+ *nitem=0;
+ /* Loop invariant: StopLow <= item < StopHigh */
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
- difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+ difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
if (difference == 0)
- return StopMiddle;
- else if (difference < 0)
+ {
+ StopHigh = StopMiddle;
+ *nitem=1;
+ break;
+ }
+ else if (difference > 0)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
- return NULL;
+ if ( item->prefix == true )
+ {
+ if ( StopLow >= StopHigh )
+ StopMiddle = StopHigh;
+
+ *nitem=0;
+
+ while( StopMiddle < (WordEntry *) STRPTR(t) &&
+ WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0 )
+ {
+ (*nitem)++;
+ StopMiddle++;
+ }
+ }
+
+ return ( *nitem > 0 ) ? StopHigh : NULL;
}
@@ -123,12 +138,9 @@ compareQueryOperand(const void *a, const void *b, void *arg)
QueryOperand *qa = (*(QueryOperand **) a);
QueryOperand *qb = (*(QueryOperand **) b);
- if (qa->length == qb->length)
- return strncmp(operand + qa->distance,
- operand + qb->distance,
- qb->length);
-
- return (qa->length > qb->length) ? 1 : -1;
+ return tsCompareString(operand + qa->distance, qa->length,
+ operand + qb->distance, qb->length,
+ false);
}
/*
@@ -198,12 +210,14 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
k,
l,
p;
- WordEntry *entry;
+ WordEntry *entry,
+ *firstentry;
WordEntryPos *post,
*ct;
int4 dimt,
lenct,
- dist;
+ dist,
+ nitem;
float res = -1.0;
QueryOperand **item;
int size = q->size;
@@ -219,40 +233,44 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
for (i = 0; i < size; i++)
{
- entry = find_wordentry(t, q, item[i]);
+ firstentry = entry = find_wordentry(t, q, item[i], &nitem);
if (!entry)
continue;
- if (entry->haspos)
- pos[i] = _POSVECPTR(t, entry);
- else
- pos[i] = &POSNULL;
-
-
- dimt = pos[i]->npos;
- post = pos[i]->pos;
- for (k = 0; k < i; k++)
+ while( entry - firstentry < nitem )
{
- if (!pos[k])
- continue;
- lenct = pos[k]->npos;
- ct = pos[k]->pos;
- for (l = 0; l < dimt; l++)
+ if (entry->haspos)
+ pos[i] = _POSVECPTR(t, entry);
+ else
+ pos[i] = &POSNULL;
+
+ dimt = pos[i]->npos;
+ post = pos[i]->pos;
+ for (k = 0; k < i; k++)
{
- for (p = 0; p < lenct; p++)
+ if (!pos[k])
+ continue;
+ lenct = pos[k]->npos;
+ ct = pos[k]->pos;
+ for (l = 0; l < dimt; l++)
{
- dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
- if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL)))
+ for (p = 0; p < lenct; p++)
{
- float curw;
-
- if (!dist)
- dist = MAXENTRYPOS;
- curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
- res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
+ dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
+ if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL)))
+ {
+ float curw;
+
+ if (!dist)
+ dist = MAXENTRYPOS;
+ curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
+ res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
+ }
}
}
}
+
+ entry++;
}
}
pfree(pos);
@@ -263,11 +281,13 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
static float
calc_rank_or(float *w, TSVector t, TSQuery q)
{
- WordEntry *entry;
+ WordEntry *entry,
+ *firstentry;
WordEntryPos *post;
int4 dimt,
j,
- i;
+ i,
+ nitem;
float res = 0.0;
QueryOperand **item;
int size = q->size;
@@ -280,41 +300,46 @@ calc_rank_or(float *w, TSVector t, TSQuery q)
wjm;
int4 jm;
- entry = find_wordentry(t, q, item[i]);
+ firstentry = entry = find_wordentry(t, q, item[i], &nitem);
if (!entry)
continue;
- if (entry->haspos)
- {
- dimt = POSDATALEN(t, entry);
- post = POSDATAPTR(t, entry);
- }
- else
+ while( entry - firstentry < nitem )
{
- dimt = POSNULL.npos;
- post = POSNULL.pos;
- }
+ if (entry->haspos)
+ {
+ dimt = POSDATALEN(t, entry);
+ post = POSDATAPTR(t, entry);
+ }
+ else
+ {
+ dimt = POSNULL.npos;
+ post = POSNULL.pos;
+ }
- resj = 0.0;
- wjm = -1.0;
- jm = 0;
- for (j = 0; j < dimt; j++)
- {
- resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
- if (wpos(post[j]) > wjm)
+ resj = 0.0;
+ wjm = -1.0;
+ jm = 0;
+ for (j = 0; j < dimt; j++)
{
- wjm = wpos(post[j]);
- jm = j;
+ resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
+ if (wpos(post[j]) > wjm)
+ {
+ wjm = wpos(post[j]);
+ jm = j;
+ }
}
- }
/*
- limit (sum(i/i^2),i->inf) = pi^2/6
- resj = sum(wi/i^2),i=1,noccurence,
- wi - should be sorted desc,
- don't sort for now, just choose maximum weight. This should be corrected
- Oleg Bartunov
+ limit (sum(i/i^2),i->inf) = pi^2/6
+ resj = sum(wi/i^2),i=1,noccurence,
+ wi - should be sorted desc,
+ don't sort for now, just choose maximum weight. This should be corrected
+ Oleg Bartunov
*/
- res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
+ res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
+
+ entry++;
+ }
}
if (size > 0)
res = res / size;
@@ -594,11 +619,13 @@ static DocRepresentation *
get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
{
QueryItem *item = GETQUERY(qr->query);
- WordEntry *entry;
+ WordEntry *entry,
+ *firstentry;
WordEntryPos *post;
int4 dimt,
j,
- i;
+ i,
+ nitem;
int len = qr->query->size * 4,
cur = 0;
DocRepresentation *doc;
@@ -619,63 +646,68 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
if (QR_GET_OPERAND_EXISTS(qr, &item[i]))
continue;
- entry = find_wordentry(txt, qr->query, curoperand);
+ firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
if (!entry)
continue;
- if (entry->haspos)
- {
- dimt = POSDATALEN(txt, entry);
- post = POSDATAPTR(txt, entry);
- }
- else
- {
- dimt = POSNULL.npos;
- post = POSNULL.pos;
- }
-
- while (cur + dimt >= len)
+ while( entry - firstentry < nitem )
{
- len *= 2;
- doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
- }
-
- for (j = 0; j < dimt; j++)
- {
- if (j == 0)
+ if (entry->haspos)
+ {
+ dimt = POSDATALEN(txt, entry);
+ post = POSDATAPTR(txt, entry);
+ }
+ else
{
- int k;
+ dimt = POSNULL.npos;
+ post = POSNULL.pos;
+ }
- doc[cur].nitem = 0;
- doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size);
+ while (cur + dimt >= len)
+ {
+ len *= 2;
+ doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
+ }
- for (k = 0; k < qr->query->size; k++)
+ for (j = 0; j < dimt; j++)
+ {
+ if (j == 0)
{
- QueryOperand *kptr = &item[k].operand;
- QueryOperand *iptr = &item[i].operand;
-
- if (k == i ||
- (item[k].type == QI_VAL &&
- compareQueryOperand(&kptr, &iptr, operand) == 0))
+ int k;
+
+ doc[cur].nitem = 0;
+ doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size);
+
+ for (k = 0; k < qr->query->size; k++)
{
- /*
- * if k == i, we've already checked above that it's
- * type == Q_VAL
- */
- doc[cur].item[doc[cur].nitem] = item + k;
- doc[cur].nitem++;
- QR_SET_OPERAND_EXISTS(qr, item + k);
+ QueryOperand *kptr = &item[k].operand;
+ QueryOperand *iptr = &item[i].operand;
+
+ if (k == i ||
+ (item[k].type == QI_VAL &&
+ compareQueryOperand(&kptr, &iptr, operand) == 0))
+ {
+ /*
+ * if k == i, we've already checked above that it's
+ * type == Q_VAL
+ */
+ doc[cur].item[doc[cur].nitem] = item + k;
+ doc[cur].nitem++;
+ QR_SET_OPERAND_EXISTS(qr, item + k);
+ }
}
}
+ else
+ {
+ doc[cur].nitem = doc[cur - 1].nitem;
+ doc[cur].item = doc[cur - 1].item;
+ }
+ doc[cur].pos = WEP_GETPOS(post[j]);
+ doc[cur].wclass = WEP_GETWEIGHT(post[j]);
+ cur++;
}
- else
- {
- doc[cur].nitem = doc[cur - 1].nitem;
- doc[cur].item = doc[cur - 1].item;
- }
- doc[cur].pos = WEP_GETPOS(post[j]);
- doc[cur].wclass = WEP_GETWEIGHT(post[j]);
- cur++;
+
+ entry++;
}
}
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
index 18342800cc0..7a8da86423f 100644
--- a/src/backend/utils/adt/tsvector.c
+++ b/src/backend/utils/adt/tsvector.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.13 2008/03/10 12:57:05 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.14 2008/05/16 16:31:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -85,14 +85,9 @@ compareentry(const void *va, const void *vb, void *arg)
const WordEntryIN *b = (const WordEntryIN *) vb;
char *BufferStr = (char *) arg;
- if (a->entry.len == b->entry.len)
- {
- return strncmp(&BufferStr[a->entry.pos],
- &BufferStr[b->entry.pos],
- a->entry.len);
- }
-
- return (a->entry.len > b->entry.len) ? 1 : -1;
+ return tsCompareString( &BufferStr[a->entry.pos], a->entry.len,
+ &BufferStr[b->entry.pos], b->entry.len,
+ false );
}
/*
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 539a9be9789..4e7d50b526a 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.15 2008/04/08 18:20:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.16 2008/05/16 16:31:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -127,11 +127,7 @@ silly_cmp_tsvector(const TSVector a, const TSVector b)
{
return (aptr->haspos > bptr->haspos) ? -1 : 1;
}
- else if (aptr->len != bptr->len)
- {
- return (aptr->len > bptr->len) ? -1 : 1;
- }
- else if ((res = strncmp(STRPTR(a) + aptr->pos, STRPTR(b) + bptr->pos, bptr->len)) != 0)
+ else if ( (res=tsCompareString( STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) !=0 )
{
return res;
}
@@ -286,18 +282,10 @@ tsvector_setweight(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(out);
}
-static int
-compareEntry(char *ptra, WordEntry *a, char *ptrb, WordEntry *b)
-{
- if (a->len == b->len)
- {
- return strncmp(
- ptra + a->pos,
- ptrb + b->pos,
- a->len);
- }
- return (a->len > b->len) ? 1 : -1;
-}
+#define compareEntry(pa, a, pb, b) \
+ tsCompareString((pa) + (a)->pos, (a)->len, \
+ (pb) + (b)->pos, (b)->len, \
+ false)
/*
* Add positions from src to dest after offsetting them by maxpos.
@@ -534,18 +522,46 @@ tsvector_concat(PG_FUNCTION_ARGS)
}
/*
- * compare 2 string values
+ * Compare two strings by tsvector rules.
+ * if isPrefix = true then it returns not-zero value if b has prefix a
*/
-static int4
-ValCompare(CHKVAL *chkval, WordEntry *ptr, QueryOperand *item)
+int4
+tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
{
- if (ptr->len == item->length)
- return strncmp(
- &(chkval->values[ptr->pos]),
- &(chkval->operand[item->distance]),
- item->length);
+ int cmp;
+
+ if ( lena == 0 )
+ {
+ if ( prefix )
+ cmp = 0; /* emtry string is equal to any if a prefix match */
+ else
+ cmp = (lenb>0) ? -1 : 0;
+ }
+ else if ( lenb == 0 )
+ {
+ cmp = (lena>0) ? 1 : 0;
+ }
+ else
+ {
+ cmp = memcmp(a, b, Min(lena, lenb));
- return (ptr->len > item->length) ? 1 : -1;
+ if ( prefix )
+ {
+ if ( cmp == 0 && lena > lenb )
+ {
+ /*
+ * b argument is not beginning with argument a
+ */
+ cmp=1;
+ }
+ }
+ else if ( (cmp == 0) && (lena != lenb) )
+ {
+ cmp = (lena < lenb) ? -1 : 1;
+ }
+ }
+
+ return cmp;
}
/*
@@ -582,25 +598,52 @@ checkcondition_str(void *checkval, QueryOperand *val)
CHKVAL *chkval = (CHKVAL *) checkval;
WordEntry *StopLow = chkval->arrb;
WordEntry *StopHigh = chkval->arre;
- WordEntry *StopMiddle;
- int difference;
+ WordEntry *StopMiddle = StopHigh;
+ int difference = -1;
+ bool res=false;
/* Loop invariant: StopLow <= val < StopHigh */
-
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
- difference = ValCompare(chkval, StopMiddle, val);
+ difference = tsCompareString( chkval->operand + val->distance, val->length,
+ chkval->values + StopMiddle->pos, StopMiddle->len,
+ false);
+
if (difference == 0)
- return (val->weight && StopMiddle->haspos) ?
+ {
+ res = (val->weight && StopMiddle->haspos) ?
checkclass_str(chkval, StopMiddle, val) : true;
- else if (difference < 0)
+ break;
+ }
+ else if (difference > 0)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
- return (false);
+ if ( res == false && val->prefix == true )
+ {
+ /*
+ * there was a failed exact search, so we should scan further to find
+ * a prefix match.
+ */
+ if ( StopLow >= StopHigh )
+ StopMiddle = StopHigh;
+
+ while( res == false && StopMiddle < chkval->arre &&
+ tsCompareString( chkval->operand + val->distance, val->length,
+ chkval->values + StopMiddle->pos, StopMiddle->len,
+ true) == 0 )
+ {
+ res = (val->weight && StopMiddle->haspos) ?
+ checkclass_str(chkval, StopMiddle, val) : true;
+
+ StopMiddle++;
+ }
+ }
+
+ return res;
}
/*
@@ -758,50 +801,38 @@ check_weight(TSVector txt, WordEntry *wptr, int8 weight)
return num;
}
-static WordEntry **
-SEI_realloc(WordEntry **in, uint32 *len)
-{
- if (*len == 0 || in == NULL)
- {
- *len = 8;
- in = palloc(sizeof(WordEntry *) * (*len));
- }
- else
- {
- *len *= 2;
- in = repalloc(in, sizeof(WordEntry *) * (*len));
- }
- return in;
-}
+#define compareStatWord(a,e,s,t) \
+ tsCompareString(STATSTRPTR(s) + (a)->pos, (a)->len, \
+ STRPTR(t) + (e)->pos, (e)->len, \
+ false)
-static int
-compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, TSVector txt)
+typedef struct WordEntryMark
{
- if (a->len == b->len)
- return strncmp(
- STATSTRPTR(stat) + a->pos,
- STRPTR(txt) + b->pos,
- a->len
- );
- return (a->len > b->len) ? 1 : -1;
-}
+ WordEntry *newentry;
+ StatEntry *pos;
+} WordEntryMark;
static tsstat *
-formstat(tsstat *stat, TSVector txt, WordEntry **entry, uint32 len)
+formstat(tsstat *stat, TSVector txt, List *entries)
{
- tsstat *newstat;
- uint32 totallen,
- nentry;
- uint32 slen = 0;
- WordEntry **ptr = entry;
- char *curptr;
- StatEntry *sptr,
- *nptr;
-
- while (ptr - entry < len)
+ tsstat *newstat;
+ uint32 totallen,
+ nentry,
+ len = list_length(entries);
+ uint32 slen = 0;
+ WordEntry *ptr;
+ char *curptr;
+ StatEntry *sptr,
+ *nptr;
+ ListCell *entry;
+ StatEntry *PosSE = STATPTR(stat),
+ *prevPosSE;
+ WordEntryMark *mark;
+
+ foreach( entry, entries )
{
- slen += (*ptr)->len;
- ptr++;
+ mark = (WordEntryMark*)lfirst(entry);
+ slen += mark->newentry->len;
}
nentry = stat->size + len;
@@ -815,78 +846,46 @@ formstat(tsstat *stat, TSVector txt, WordEntry **entry, uint32 len)
memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
curptr = STATSTRPTR(newstat) + STATSTRSIZE(stat);
- ptr = entry;
sptr = STATPTR(stat);
nptr = STATPTR(newstat);
- if (len == 1)
+ foreach(entry, entries)
{
- StatEntry *StopLow = STATPTR(stat);
- StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
+ prevPosSE = PosSE;
- while (StopLow < StopHigh)
+ mark = (WordEntryMark*)lfirst(entry);
+ ptr = mark->newentry;
+ PosSE = mark->pos;
+
+ /*
+ * Copy missed entries
+ */
+ if ( PosSE > prevPosSE )
{
- sptr = StopLow + (StopHigh - StopLow) / 2;
- if (compareStatWord(sptr, *ptr, stat, txt) < 0)
- StopLow = sptr + 1;
- else
- StopHigh = sptr;
+ memcpy( nptr, prevPosSE, sizeof(StatEntry) * (PosSE-prevPosSE) );
+ nptr += PosSE-prevPosSE;
}
- nptr = STATPTR(newstat) + (StopLow - STATPTR(stat));
- memcpy(STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow - STATPTR(stat)));
- if ((*ptr)->haspos)
- nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
+
+ /*
+ * Copy new entry
+ */
+ if (ptr->haspos)
+ nptr->nentry = (stat->weight) ? check_weight(txt, ptr, stat->weight) : POSDATALEN(txt, ptr);
else
nptr->nentry = 1;
nptr->ndoc = 1;
- nptr->len = (*ptr)->len;
- memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
+ nptr->len = ptr->len;
+ memcpy(curptr, STRPTR(txt) + ptr->pos, nptr->len);
nptr->pos = curptr - STATSTRPTR(newstat);
- memcpy(nptr + 1, StopLow, sizeof(StatEntry) * (((StatEntry *) STATSTRPTR(stat)) - StopLow));
- }
- else
- {
- while (sptr - STATPTR(stat) < stat->size && ptr - entry < len)
- {
- if (compareStatWord(sptr, *ptr, stat, txt) < 0)
- {
- memcpy(nptr, sptr, sizeof(StatEntry));
- sptr++;
- }
- else
- {
- if ((*ptr)->haspos)
- nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
- else
- nptr->nentry = 1;
- nptr->ndoc = 1;
- nptr->len = (*ptr)->len;
- memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
- nptr->pos = curptr - STATSTRPTR(newstat);
- curptr += nptr->len;
- ptr++;
- }
- nptr++;
- }
+ curptr += nptr->len;
+ nptr++;
- memcpy(nptr, sptr, sizeof(StatEntry) * (stat->size - (sptr - STATPTR(stat))));
-
- while (ptr - entry < len)
- {
- if ((*ptr)->haspos)
- nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
- else
- nptr->nentry = 1;
- nptr->ndoc = 1;
- nptr->len = (*ptr)->len;
- memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
- nptr->pos = curptr - STATSTRPTR(newstat);
- curptr += nptr->len;
- ptr++;
- nptr++;
- }
+ pfree(mark);
}
+ if ( PosSE < (StatEntry *) STATSTRPTR(stat) )
+ memcpy(nptr, PosSE, sizeof(StatEntry) * (stat->size - (PosSE - STATPTR(stat))));
+
return newstat;
}
@@ -907,12 +906,11 @@ ts_accum(tsstat *stat, Datum data)
{
tsstat *newstat;
TSVector txt = DatumGetTSVector(data);
- WordEntry **newentry = NULL;
- uint32 len = 0,
- cur = 0;
StatEntry *sptr;
WordEntry *wptr;
int n = 0;
+ List *newentries=NIL;
+ StatEntry *StopLow;
if (stat == NULL)
{ /* Init in first */
@@ -932,16 +930,23 @@ ts_accum(tsstat *stat, Datum data)
sptr = STATPTR(stat);
wptr = ARRPTR(txt);
+ StopLow = STATPTR(stat);
- if (stat->size < 100 * txt->size)
- { /* merge */
- while (sptr - STATPTR(stat) < stat->size && wptr - ARRPTR(txt) < txt->size)
- {
- int cmp = compareStatWord(sptr, wptr, stat, txt);
+ while (wptr - ARRPTR(txt) < txt->size)
+ {
+ StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
+ int cmp;
+
+ /*
+ * We do not set StopLow to begin of array because tsvector is ordered
+ * with the sames rule, so we can search from last stopped position
+ */
- if (cmp < 0)
- sptr++;
- else if (cmp == 0)
+ while (StopLow < StopHigh)
+ {
+ sptr = StopLow + (StopHigh - StopLow) / 2;
+ cmp = compareStatWord(sptr, wptr, stat, txt);
+ if (cmp == 0)
{
if (stat->weight == 0)
{
@@ -953,90 +958,38 @@ ts_accum(tsstat *stat, Datum data)
sptr->ndoc++;
sptr->nentry += n;
}
- sptr++;
- wptr++;
+ break;
}
+ else if (cmp < 0)
+ StopLow = sptr + 1;
else
- {
- if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
- {
- if (cur == len)
- newentry = SEI_realloc(newentry, &len);
- newentry[cur] = wptr;
- cur++;
- }
- wptr++;
- }
+ StopHigh = sptr;
}
- while (wptr - ARRPTR(txt) < txt->size)
- {
+ if (StopLow >= StopHigh)
+ { /* not found */
if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
{
- if (cur == len)
- newentry = SEI_realloc(newentry, &len);
- newentry[cur] = wptr;
- cur++;
- }
- wptr++;
- }
- }
- else
- { /* search */
- while (wptr - ARRPTR(txt) < txt->size)
- {
- StatEntry *StopLow = STATPTR(stat);
- StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
- int cmp;
+ WordEntryMark *mark = (WordEntryMark*)palloc(sizeof(WordEntryMark));
- while (StopLow < StopHigh)
- {
- sptr = StopLow + (StopHigh - StopLow) / 2;
- cmp = compareStatWord(sptr, wptr, stat, txt);
- if (cmp == 0)
- {
- if (stat->weight == 0)
- {
- sptr->ndoc++;
- sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1;
- }
- else if (wptr->haspos && (n = check_weight(txt, wptr, stat->weight)) != 0)
- {
- sptr->ndoc++;
- sptr->nentry += n;
- }
- break;
- }
- else if (cmp < 0)
- StopLow = sptr + 1;
- else
- StopHigh = sptr;
- }
+ mark->newentry = wptr;
+ mark->pos = StopLow;
+ newentries = lappend( newentries, mark );
- if (StopLow >= StopHigh)
- { /* not found */
- if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
- {
- if (cur == len)
- newentry = SEI_realloc(newentry, &len);
- newentry[cur] = wptr;
- cur++;
- }
}
- wptr++;
}
+ wptr++;
}
-
- if (cur == 0)
+ if (list_length(newentries) == 0)
{ /* no new words */
if (txt != (TSVector) DatumGetPointer(data))
pfree(txt);
return stat;
}
- newstat = formstat(stat, txt, newentry, cur);
- pfree(newentry);
+ newstat = formstat(stat, txt, newentries);
+ list_free(newentries);
if (txt != (TSVector) DatumGetPointer(data))
pfree(txt);
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index add5df6ba11..353dd4e3f72 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -4,7 +4,7 @@
*
* Copyright (c) 2006-2008, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.19 2008/05/12 00:00:53 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.20 2008/05/16 16:31:01 tgl Exp $
*--------------------------------------------------------------------------
*/
@@ -15,6 +15,7 @@
#include "access/itup.h"
#include "access/relscan.h"
#include "fmgr.h"
+#include "nodes/tidbitmap.h"
#include "storage/block.h"
#include "storage/buf.h"
#include "storage/off.h"
@@ -28,7 +29,8 @@
#define GIN_EXTRACTVALUE_PROC 2
#define GIN_EXTRACTQUERY_PROC 3
#define GIN_CONSISTENT_PROC 4
-#define GINNProcs 4
+#define GIN_COMPARE_PARTIAL_PROC 5
+#define GINNProcs 5
/*
* Page opaque data in a inverted index page.
@@ -141,7 +143,10 @@ typedef struct GinState
FmgrInfo extractValueFn;
FmgrInfo extractQueryFn;
FmgrInfo consistentFn;
+ FmgrInfo comparePartialFn; /* optional method */
+ bool canPartialMatch; /* can opclass perform partial
+ * match (prefix search)? */
TupleDesc tupdesc;
} GinState;
@@ -360,6 +365,12 @@ typedef struct GinScanEntryData
/* current ItemPointer to heap */
ItemPointerData curItem;
+ /* partial match support */
+ bool isPartialMatch;
+ TIDBitmap *partialMatch;
+ TBMIterateResult *partialMatchResult;
+ StrategyNumber strategy;
+
/* used for Posting list and one page in Posting tree */
ItemPointerData *list;
uint32 nlist;
@@ -424,6 +435,7 @@ extern PGDLLIMPORT int GinFuzzySearchLimit;
extern Datum gingetbitmap(PG_FUNCTION_ARGS);
extern Datum gingettuple(PG_FUNCTION_ARGS);
+extern void ginrestartentry(GinScanEntry entry);
/* ginvacuum.c */
extern Datum ginbulkdelete(PG_FUNCTION_ARGS);
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index e0bd027fdac..94d70c2c877 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.459 2008/05/15 00:17:40 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.460 2008/05/16 16:31:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 200805141
+#define CATALOG_VERSION_NO 200805161
#endif
diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h
index 9837a8c2003..0fe5d05e7c6 100644
--- a/src/include/catalog/pg_am.h
+++ b/src/include/catalog/pg_am.h
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.55 2008/04/10 22:25:25 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.56 2008/05/16 16:31:01 tgl Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
@@ -114,7 +114,7 @@ DESCR("hash index access method");
DATA(insert OID = 783 ( gist 0 7 f f t t t t t t gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions ));
DESCR("GiST index access method");
#define GIST_AM_OID 783
-DATA(insert OID = 2742 ( gin 0 4 f f f f f f t f gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions ));
+DATA(insert OID = 2742 ( gin 0 5 f f f f f f t f gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions ));
DESCR("GIN index access method");
#define GIN_AM_OID 2742
diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h
index 8a804469538..36bd0f84096 100644
--- a/src/include/catalog/pg_amproc.h
+++ b/src/include/catalog/pg_amproc.h
@@ -22,7 +22,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/pg_amproc.h,v 1.71 2008/03/27 03:57:34 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_amproc.h,v 1.72 2008/05/16 16:31:01 tgl Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
@@ -321,10 +321,11 @@ DATA(insert ( 2745 1025 1025 1 381 ));
DATA(insert ( 2745 1025 1025 2 2743 ));
DATA(insert ( 2745 1025 1025 3 2774 ));
DATA(insert ( 2745 1025 1025 4 2744 ));
-DATA(insert ( 3659 3614 3614 1 360 ));
+DATA(insert ( 3659 3614 3614 1 3724 ));
DATA(insert ( 3659 3614 3614 2 3656 ));
DATA(insert ( 3659 3614 3614 3 3657 ));
DATA(insert ( 3659 3614 3614 4 3658 ));
+DATA(insert ( 3659 3614 3614 5 2700 ));
DATA(insert ( 3626 3614 3614 1 3622 ));
DATA(insert ( 3683 3615 3615 1 3668 ));
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index cc1d5db786b..21e094ad570 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.499 2008/05/15 00:17:40 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.500 2008/05/16 16:31:01 tgl Exp $
*
* NOTES
* The script catalog/genbki.sh reads this file and generates .bki
@@ -4018,7 +4018,7 @@ DESCR("gin(internal)");
/* GIN array support */
DATA(insert OID = 2743 ( ginarrayextract PGNSP PGUID 12 1 0 f f t f i 2 2281 "2277 2281" _null_ _null_ _null_ ginarrayextract - _null_ _null_ ));
DESCR("GIN array support");
-DATA(insert OID = 2774 ( ginqueryarrayextract PGNSP PGUID 12 1 0 f f t f i 3 2281 "2277 2281 21" _null_ _null_ _null_ ginqueryarrayextract - _null_ _null_ ));
+DATA(insert OID = 2774 ( ginqueryarrayextract PGNSP PGUID 12 1 0 f f t f i 4 2281 "2277 2281 21 2281" _null_ _null_ _null_ ginqueryarrayextract - _null_ _null_ ));
DESCR("GIN array support");
DATA(insert OID = 2744 ( ginarrayconsistent PGNSP PGUID 12 1 0 f f t f i 4 16 "2281 21 2281 2281" _null_ _null_ _null_ ginarrayconsistent - _null_ _null_ ));
DESCR("GIN array support");
@@ -4253,10 +4253,14 @@ DESCR("GiST tsvector support");
DATA(insert OID = 3656 ( gin_extract_tsvector PGNSP PGUID 12 1 0 f f t f i 2 2281 "3614 2281" _null_ _null_ _null_ gin_extract_tsvector - _null_ _null_ ));
DESCR("GIN tsvector support");
-DATA(insert OID = 3657 ( gin_extract_tsquery PGNSP PGUID 12 1 0 f f t f i 3 2281 "3615 2281 21" _null_ _null_ _null_ gin_extract_tsquery - _null_ _null_ ));
+DATA(insert OID = 3657 ( gin_extract_tsquery PGNSP PGUID 12 1 0 f f t f i 4 2281 "3615 2281 21 2281" _null_ _null_ _null_ gin_extract_tsquery - _null_ _null_ ));
DESCR("GIN tsvector support");
DATA(insert OID = 3658 ( gin_tsquery_consistent PGNSP PGUID 12 1 0 f f t f i 4 16 "2281 21 3615 2281" _null_ _null_ _null_ gin_tsquery_consistent - _null_ _null_ ));
DESCR("GIN tsvector support");
+DATA(insert OID = 3724 ( gin_cmp_tslexeme PGNSP PGUID 12 1 0 f f t f i 2 23 "25 25" _null_ _null_ _null_ gin_cmp_tslexeme - _null_ _null_ ));
+DESCR("GIN tsvector support");
+DATA(insert OID = 2700 ( gin_cmp_prefix PGNSP PGUID 12 1 0 f f t f i 3 23 "25 25 21" _null_ _null_ _null_ gin_cmp_prefix - _null_ _null_ ));
+DESCR("GIN tsvector support");
DATA(insert OID = 3662 ( tsquery_lt PGNSP PGUID 12 1 0 f f t f i 2 16 "3615 3615" _null_ _null_ _null_ tsquery_lt - _null_ _null_ ));
DESCR("less-than");
diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h
index 8f21abf032c..d08d35db193 100644
--- a/src/include/tsearch/ts_public.h
+++ b/src/include/tsearch/ts_public.h
@@ -6,7 +6,7 @@
*
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.8 2008/01/01 19:45:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.9 2008/05/16 16:31:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -99,6 +99,7 @@ typedef struct
} TSLexeme;
#define TSL_ADDPOS 0x01
+#define TSL_PREFIX 0x02
/*
* Struct for supporting complex dictionaries like thesaurus.
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h
index 9f5e63e2273..42680408a91 100644
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -5,7 +5,7 @@
*
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.10 2008/01/01 19:45:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.11 2008/05/16 16:31:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -179,6 +179,7 @@ typedef struct
* bitmask of allowed weights. if it =0 then
* any weight are allowed. Weights and bit
* map: A: 1<<3 B: 1<<2 C: 1<<1 D: 1<<0 */
+ bool prefix; /* true if it's a prefix search */
int32 valcrc; /* XXX: pg_crc32 would be a more appropriate
* data type, but we use comparisons to signed
* integers in the code. They would need to be
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index 0085b64951a..6afd4c99f7a 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -5,7 +5,7 @@
*
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.14 2008/04/21 00:26:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.15 2008/05/16 16:31:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -42,9 +42,10 @@ typedef struct TSQueryParserStateData *TSQueryParserState;
typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
char *token, int tokenlen,
- int2 tokenweights /* bitmap as described
+ int2 tokenweights, /* bitmap as described
* in QueryOperand
- struct */ );
+ * struct */
+ bool prefix);
extern TSQuery parse_tsquery(char *buf,
PushFunction pushval,
@@ -52,7 +53,7 @@ extern TSQuery parse_tsquery(char *buf,
/* Functions for use by PushFunction implementations */
extern void pushValue(TSQueryParserState state,
- char *strval, int lenval, int2 weight);
+ char *strval, int lenval, int2 weight, bool prefix);
extern void pushStop(TSQueryParserState state);
extern void pushOperator(TSQueryParserState state, int8 operator);
@@ -74,6 +75,7 @@ typedef struct
*/
uint16 *apos;
} pos;
+ uint16 flags; /* currently, only TSL_PREFIX */
char *word;
uint32 alen;
} ParsedWord;
@@ -110,6 +112,7 @@ extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
* to_ts* - text transformation to tsvector, tsquery
*/
extern TSVector make_tsvector(ParsedText *prs);
+extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix);
extern Datum to_tsvector_byid(PG_FUNCTION_ARGS);
extern Datum to_tsvector(PG_FUNCTION_ARGS);
@@ -142,6 +145,8 @@ extern Datum gtsvectorout(PG_FUNCTION_ARGS);
*/
extern Datum gin_extract_tsvector(PG_FUNCTION_ARGS);
+extern Datum gin_cmp_tslexeme(PG_FUNCTION_ARGS);
+extern Datum gin_cmp_prefix(PG_FUNCTION_ARGS);
extern Datum gin_extract_tsquery(PG_FUNCTION_ARGS);
extern Datum gin_tsquery_consistent(PG_FUNCTION_ARGS);
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out
index ef787115742..efab1354fe3 100644
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -935,9 +935,11 @@ WHERE p1.amprocfamily = p3.oid AND p3.opfmethod = p2.oid AND
-- Detect missing pg_amproc entries: should have as many support functions
-- as AM expects for each datatype combination supported by the opfamily.
+-- GIN is a special case because it has an optional support function.
SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype
FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3
WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND
+ p1.amname <> 'gin' AND
p1.amsupport != (SELECT count(*) FROM pg_amproc AS p4
WHERE p4.amprocfamily = p2.oid AND
p4.amproclefttype = p3.amproclefttype AND
@@ -946,18 +948,43 @@ WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND
--------+---------+----------------+-----------------
(0 rows)
+-- Similar check for GIN, allowing one optional proc
+SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype
+FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3
+WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND
+ p1.amname = 'gin' AND
+ p1.amsupport - 1 > (SELECT count(*) FROM pg_amproc AS p4
+ WHERE p4.amprocfamily = p2.oid AND
+ p4.amproclefttype = p3.amproclefttype AND
+ p4.amprocrighttype = p3.amprocrighttype);
+ amname | opfname | amproclefttype | amprocrighttype
+--------+---------+----------------+-----------------
+(0 rows)
+
-- Also, check if there are any pg_opclass entries that don't seem to have
--- pg_amproc support.
+-- pg_amproc support. Again, GIN has to be checked separately.
SELECT amname, opcname, count(*)
FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid
LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND
amproclefttype = amprocrighttype AND amproclefttype = opcintype
+WHERE am.amname <> 'gin'
GROUP BY amname, amsupport, opcname, amprocfamily
HAVING count(*) != amsupport OR amprocfamily IS NULL;
amname | opcname | count
--------+---------+-------
(0 rows)
+SELECT amname, opcname, count(*)
+FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid
+ LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND
+ amproclefttype = amprocrighttype AND amproclefttype = opcintype
+WHERE am.amname = 'gin'
+GROUP BY amname, amsupport, opcname, amprocfamily
+HAVING count(*) < amsupport - 1 OR amprocfamily IS NULL;
+ amname | opcname | count
+--------+---------+-------
+(0 rows)
+
-- Unfortunately, we can't check the amproc link very well because the
-- signature of the function may be different for different support routines
-- or different base data types.
diff --git a/src/test/regress/expected/tsdicts.out b/src/test/regress/expected/tsdicts.out
index 4b8929361a8..3ae6a671dad 100644
--- a/src/test/regress/expected/tsdicts.out
+++ b/src/test/regress/expected/tsdicts.out
@@ -232,7 +232,7 @@ ALTER TEXT SEARCH CONFIGURATION ispell_tst ALTER MAPPING FOR
SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
to_tsvector
----------------------------------------------------------------------------------------------------
- 'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7
+ 'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3
(1 row)
SELECT to_tsquery('ispell_tst', 'footballklubber');
@@ -256,7 +256,7 @@ ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot');
to_tsvector
----------------------------------------------------------------------------------------------------
- 'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7
+ 'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballklubber');
@@ -287,7 +287,7 @@ SELECT to_tsvector('synonym_tst', 'Postgresql is often called as postgres or pgs
SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google');
to_tsvector
----------------------------------------------------------
- 'googl':7,10 'write':6 'common':2 'mistak':3 'instead':8
+ 'common':2 'googl':7,10 'instead':8 'mistak':3 'write':6
(1 row)
-- test thesaurus in configuration
@@ -307,12 +307,12 @@ SELECT to_tsvector('thesaurus_tst', 'one postgres one two one two three one');
SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbrevation SN)');
to_tsvector
-------------------------------------------------------------
- 'sn':1,9,11 'new':4 'call':8 'star':5 'usual':7 'abbrev':10
+ 'abbrev':10 'call':8 'new':4 'sn':1,9,11 'star':5 'usual':7
(1 row)
SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets');
to_tsvector
-------------------------------------------------------
- 'card':3,10 'like':6 'look':5 'invit':2,9 'order':1,8
+ 'card':3,10 'invit':2,9 'like':6 'look':5 'order':1,8
(1 row)
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index 1f7e6ffbafc..468a623e973 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -92,6 +92,12 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
39
(1 row)
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
+ count
+-------
+ 494
+(1 row)
+
create index wowidx on test_tsvector using gist (a);
SET enable_seqscan=OFF;
SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh';
@@ -130,6 +136,12 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
39
(1 row)
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
+ count
+-------
+ 494
+(1 row)
+
RESET enable_seqscan;
DROP INDEX wowidx;
CREATE INDEX wowidx ON test_tsvector USING gin (a);
@@ -170,6 +182,12 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
39
(1 row)
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
+ count
+-------
+ 494
+(1 row)
+
RESET enable_seqscan;
INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');
@@ -380,7 +398,7 @@ SELECT to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.
<i <b> wow < jqw <> qwerty');
to_tsvector
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
- 'ad':17 'dw':19 'jf':39 '234':61 '345':1 '4.2':54,55,56 '455':31 'jqw':64 'qwe':2,18,27,28,35 'wer':36 'wow':63 '-4.2':58,60 'asdf':37 'ewr1':43 'qwer':38 'sdjk':40 '5.005':32 'efd.r':3 'ewri2':44 'hjwer':42 'qwqwe':29 'wefjn':48 'gist.c':52 'gist.h':50 'qwerti':65 '234.435':30 'qwe-wer':34 'readlin':53,57,59 'www.com':4 '+4.0e-10':26 'gist.h.c':51 'rewt/ewr':47 '/?ad=qwe&dw':7,10,14,22 '/wqe-324/ewr':49 'aew.werc.ewr':6 '1aew.werc.ewr':9 '2aew.werc.ewr':11 '3aew.werc.ewr':13 '4aew.werc.ewr':15 '/usr/local/fff':45 '/awdf/dwqe/4325':46 'teodor@stack.net':33 '/?ad=qwe&dw=%20%32':25 '5aew.werc.ewr:8100':16 '6aew.werc.ewr:8100':21 '7aew.werc.ewr:8100':24 'aew.werc.ewr/?ad=qwe&dw':5 '1aew.werc.ewr/?ad=qwe&dw':8 '3aew.werc.ewr/?ad=qwe&dw':12 '6aew.werc.ewr:8100/?ad=qwe&dw':20 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':23
+ '+4.0e-10':26 '-4.2':58,60 '/?ad=qwe&dw':7,10,14,22 '/?ad=qwe&dw=%20%32':25 '/awdf/dwqe/4325':46 '/usr/local/fff':45 '/wqe-324/ewr':49 '1aew.werc.ewr':9 '1aew.werc.ewr/?ad=qwe&dw':8 '234':61 '234.435':30 '2aew.werc.ewr':11 '345':1 '3aew.werc.ewr':13 '3aew.werc.ewr/?ad=qwe&dw':12 '4.2':54,55,56 '455':31 '4aew.werc.ewr':15 '5.005':32 '5aew.werc.ewr:8100':16 '6aew.werc.ewr:8100':21 '6aew.werc.ewr:8100/?ad=qwe&dw':20 '7aew.werc.ewr:8100':24 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':23 'ad':17 'aew.werc.ewr':6 'aew.werc.ewr/?ad=qwe&dw':5 'asdf':37 'dw':19 'efd.r':3 'ewr1':43 'ewri2':44 'gist.c':52 'gist.h':50 'gist.h.c':51 'hjwer':42 'jf':39 'jqw':64 'qwe':2,18,27,28,35 'qwe-wer':34 'qwer':38 'qwerti':65 'qwqwe':29 'readlin':53,57,59 'rewt/ewr':47 'sdjk':40 'teodor@stack.net':33 'wefjn':48 'wer':36 'wow':63 'www.com':4
(1 row)
SELECT length(to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>">
@@ -852,7 +870,7 @@ SET default_text_search_config=simple;
SELECT to_tsvector('SKIES My booKs');
to_tsvector
----------------------------
- 'my':2 'books':3 'skies':1
+ 'books':3 'my':2 'skies':1
(1 row)
SELECT plainto_tsquery('SKIES My booKs');
@@ -871,7 +889,7 @@ SET default_text_search_config=english;
SELECT to_tsvector('SKIES My booKs');
to_tsvector
------------------
- 'sky':1 'book':3
+ 'book':3 'sky':1
(1 row)
SELECT plainto_tsquery('SKIES My booKs');
diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out
index 4672f099e0a..6284fb61813 100644
--- a/src/test/regress/expected/tstypes.out
+++ b/src/test/regress/expected/tstypes.out
@@ -44,31 +44,31 @@ SELECT E'''1 \\''2'''::tsvector;
SELECT E'''1 \\''2''3'::tsvector;
tsvector
-------------
- '3' '1 ''2'
+ '1 ''2' '3'
(1 row)
SELECT E'''1 \\''2'' 3'::tsvector;
tsvector
-------------
- '3' '1 ''2'
+ '1 ''2' '3'
(1 row)
SELECT E'''1 \\''2'' '' 3'' 4 '::tsvector;
tsvector
------------------
- '4' ' 3' '1 ''2'
+ ' 3' '1 ''2' '4'
(1 row)
SELECT $$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector;
tsvector
----------------------------------------
- '\\as' 'abc' 'AB\\c' 'ab\\c' 'ab\\\\c'
+ 'AB\\c' '\\as' 'ab\\\\c' 'ab\\c' 'abc'
(1 row)
SELECT tsvectorin(tsvectorout($$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector));
tsvectorin
----------------------------------------
- '\\as' 'abc' 'AB\\c' 'ab\\c' 'ab\\\\c'
+ 'AB\\c' '\\as' 'ab\\\\c' 'ab\\c' 'abc'
(1 row)
SELECT '''w'':4A,3B,2C,1D,5 a:8';
@@ -86,13 +86,13 @@ SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
setweight
----------------------------------------------------------
- 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C
+ 'a':1C,3C 'asd':1C 'w':5C,6C,12C,13C 'zxc':81C,222C,567C
(1 row)
SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
strip
---------------
- 'a' 'w' 'asd'
+ 'a' 'asd' 'w'
(1 row)
--Base tsquery test
@@ -336,6 +336,12 @@ SELECT $$'\\as'$$::tsquery;
'\\as'
(1 row)
+SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery;
+ tsquery
+------------------------------------------
+ ( 'a':* & 'nbb':*AC | 'doo':*A ) | 'goo'
+(1 row)
+
SELECT 'a' < 'b & c'::tsquery as "true";
true
------
@@ -439,12 +445,96 @@ SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB' as "true";
t
(1 row)
+SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*C' as "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*CB' as "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'a b:89 ca:23A,64b cb:80c d:34c'::tsvector @@ 'd:AC & c:*C' as "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'a b:89 ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*C' as "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'a b:89 ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*B' as "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'supernova'::tsvector @@ 'super'::tsquery AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'supeanova supernova'::tsvector @@ 'super'::tsquery AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'supeznova supernova'::tsvector @@ 'super'::tsquery AS "false";
+ false
+-------
+ f
+(1 row)
+
+SELECT 'supernova'::tsvector @@ 'super:*'::tsquery AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'supeanova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
+ true
+------
+ t
+(1 row)
+
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
ts_rank
-----------
0.0911891
(1 row)
+SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s');
+ ts_rank
+-----------
+ 0.0303964
+(1 row)
+
+SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s:*');
+ ts_rank
+-----------
+ 0.0911891
+(1 row)
+
+SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | sa:*');
+ ts_rank
+-----------
+ 0.0911891
+(1 row)
+
SELECT ts_rank(' a:1 s:2B d g'::tsvector, 'a | s');
ts_rank
----------
@@ -481,6 +571,30 @@ SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a | s');
0.3
(1 row)
+SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s');
+ ts_rank_cd
+------------
+ 0.1
+(1 row)
+
+SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s:*');
+ ts_rank_cd
+------------
+ 0.3
+(1 row)
+
+SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | sa:*');
+ ts_rank_cd
+------------
+ 0.3
+(1 row)
+
+SELECT ts_rank_cd(' a:1 sa:3C sab:2c d g'::tsvector, 'a | sa:*');
+ ts_rank_cd
+------------
+ 0.5
+(1 row)
+
SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a | s');
ts_rank_cd
------------
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql
index deaa3714fef..84b2d800f78 100644
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -746,25 +746,47 @@ WHERE p1.amprocfamily = p3.oid AND p3.opfmethod = p2.oid AND
-- Detect missing pg_amproc entries: should have as many support functions
-- as AM expects for each datatype combination supported by the opfamily.
+-- GIN is a special case because it has an optional support function.
SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype
FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3
WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND
+ p1.amname <> 'gin' AND
p1.amsupport != (SELECT count(*) FROM pg_amproc AS p4
WHERE p4.amprocfamily = p2.oid AND
p4.amproclefttype = p3.amproclefttype AND
p4.amprocrighttype = p3.amprocrighttype);
+-- Similar check for GIN, allowing one optional proc
+
+SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype
+FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3
+WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND
+ p1.amname = 'gin' AND
+ p1.amsupport - 1 > (SELECT count(*) FROM pg_amproc AS p4
+ WHERE p4.amprocfamily = p2.oid AND
+ p4.amproclefttype = p3.amproclefttype AND
+ p4.amprocrighttype = p3.amprocrighttype);
+
-- Also, check if there are any pg_opclass entries that don't seem to have
--- pg_amproc support.
+-- pg_amproc support. Again, GIN has to be checked separately.
SELECT amname, opcname, count(*)
FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid
LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND
amproclefttype = amprocrighttype AND amproclefttype = opcintype
+WHERE am.amname <> 'gin'
GROUP BY amname, amsupport, opcname, amprocfamily
HAVING count(*) != amsupport OR amprocfamily IS NULL;
+SELECT amname, opcname, count(*)
+FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid
+ LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND
+ amproclefttype = amprocrighttype AND amproclefttype = opcintype
+WHERE am.amname = 'gin'
+GROUP BY amname, amsupport, opcname, amprocfamily
+HAVING count(*) < amsupport - 1 OR amprocfamily IS NULL;
+
-- Unfortunately, we can't check the amproc link very well because the
-- signature of the function may be different for different support routines
-- or different base data types.
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql
index 3cf34524d34..dc7427d3b1d 100644
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -47,6 +47,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
create index wowidx on test_tsvector using gist (a);
@@ -58,6 +59,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
RESET enable_seqscan;
@@ -73,6 +75,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt';
SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)';
SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)';
+SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*';
RESET enable_seqscan;
INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH');
diff --git a/src/test/regress/sql/tstypes.sql b/src/test/regress/sql/tstypes.sql
index 49afc3b23a2..fd7c7024f5e 100644
--- a/src/test/regress/sql/tstypes.sql
+++ b/src/test/regress/sql/tstypes.sql
@@ -58,6 +58,7 @@ SELECT '1&(2&(4&(5|6)))'::tsquery;
SELECT '1&(2&(4&(5|!6)))'::tsquery;
SELECT E'1&(''2''&('' 4''&(\\|5 | ''6 \\'' !|&'')))'::tsquery;
SELECT $$'\\as'$$::tsquery;
+SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery;
SELECT 'a' < 'b & c'::tsquery as "true";
SELECT 'a' > 'b & c'::tsquery as "false";
@@ -81,8 +82,23 @@ SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B' as "true";
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A' as "true";
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C' as "false";
SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB' as "true";
+SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*C' as "false";
+SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*CB' as "true";
+SELECT 'a b:89 ca:23A,64b cb:80c d:34c'::tsvector @@ 'd:AC & c:*C' as "true";
+SELECT 'a b:89 ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*C' as "true";
+SELECT 'a b:89 ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*B' as "true";
+
+SELECT 'supernova'::tsvector @@ 'super'::tsquery AS "false";
+SELECT 'supeanova supernova'::tsvector @@ 'super'::tsquery AS "false";
+SELECT 'supeznova supernova'::tsvector @@ 'super'::tsquery AS "false";
+SELECT 'supernova'::tsvector @@ 'super:*'::tsquery AS "true";
+SELECT 'supeanova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
+SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
+SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s');
+SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s:*');
+SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | sa:*');
SELECT ts_rank(' a:1 s:2B d g'::tsvector, 'a | s');
SELECT ts_rank(' a:1 s:2 d g'::tsvector, 'a | s');
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a & s');
@@ -90,6 +106,10 @@ SELECT ts_rank(' a:1 s:2B d g'::tsvector, 'a & s');
SELECT ts_rank(' a:1 s:2 d g'::tsvector, 'a & s');
SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a | s');
+SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s');
+SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s:*');
+SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | sa:*');
+SELECT ts_rank_cd(' a:1 sa:3C sab:2c d g'::tsvector, 'a | sa:*');
SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a | s');
SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a | s');
SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s');