diff options
Diffstat (limited to 'src')
28 files changed, 1139 insertions, 474 deletions
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 3bedcc99606..3d60d337df4 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.15 2008/05/12 00:00:44 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.16 2008/05/16 16:31:01 tgl Exp $ *------------------------------------------------------------------------- */ @@ -18,8 +18,13 @@ #include "catalog/index.h" #include "miscadmin.h" #include "storage/bufmgr.h" +#include "utils/datum.h" #include "utils/memutils.h" + +/* + * Tries to refind previously taken ItemPointer on page. + */ static bool findItemInPage(Page page, ItemPointer item, OffsetNumber *off) { @@ -46,8 +51,204 @@ findItemInPage(Page page, ItemPointer item, OffsetNumber *off) } /* - * Start* functions setup state of searches: find correct buffer and locks it, - * Stop* functions unlock buffer (but don't release!) + * Goes to the next page if current offset is outside of bounds + */ +static bool +moveRightIfItNeeded( GinBtreeData *btree, GinBtreeStack *stack ) +{ + Page page = BufferGetPage(stack->buffer); + + if ( stack->off > PageGetMaxOffsetNumber(page) ) + { + /* + * We scanned the whole page, so we should take right page + */ + stack->blkno = GinPageGetOpaque(page)->rightlink; + + if ( GinPageRightMost(page) ) + return false; /* no more pages */ + + LockBuffer(stack->buffer, GIN_UNLOCK); + stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno); + LockBuffer(stack->buffer, GIN_SHARE); + stack->off = FirstOffsetNumber; + } + + return true; +} + +/* + * Does fullscan of posting tree and saves ItemPointers + * in scanEntry->partialMatch TIDBitmap + */ +static void +scanForItems( Relation index, GinScanEntry scanEntry, BlockNumber rootPostingTree ) +{ + GinPostingTreeScan *gdi; + Buffer buffer; + Page page; + BlockNumber blkno; + + gdi = prepareScanPostingTree(index, rootPostingTree, TRUE); + + buffer = scanBeginPostingTree(gdi); + IncrBufferRefCount(buffer); /* prevent unpin in freeGinBtreeStack */ + + freeGinBtreeStack(gdi->stack); + pfree(gdi); + + /* + * Goes through all leaves + */ + for(;;) + { + page = BufferGetPage(buffer); + + if ((GinPageGetOpaque(page)->flags & GIN_DELETED) == 0 && GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber ) + { + tbm_add_tuples( scanEntry->partialMatch, + (ItemPointer)GinDataPageGetItem(page, FirstOffsetNumber), + GinPageGetOpaque(page)->maxoff, false); + scanEntry->predictNumberResult += GinPageGetOpaque(page)->maxoff; + } + + blkno = GinPageGetOpaque(page)->rightlink; + if ( GinPageRightMost(page) ) + { + UnlockReleaseBuffer(buffer); + return; /* no more pages */ + } + + LockBuffer(buffer, GIN_UNLOCK); + buffer = ReleaseAndReadBuffer(buffer, index, blkno); + LockBuffer(buffer, GIN_SHARE); + } +} + +/* + * Collects all ItemPointer into the TIDBitmap struct + * for entries partially matched to search entry. + * + * Returns true if done, false if it's needed to restart scan from scratch + */ +static bool +computePartialMatchList( GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry scanEntry ) +{ + Page page; + IndexTuple itup; + Datum idatum; + bool isnull; + int32 cmp; + + scanEntry->partialMatch = tbm_create( work_mem * 1024L ); + + for(;;) + { + /* + * stack->off points to the interested entry, buffer is already locked + */ + if ( moveRightIfItNeeded(btree, stack) == false ) + return true; + + page = BufferGetPage(stack->buffer); + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); + idatum = index_getattr(itup, 1, btree->ginstate->tupdesc, &isnull); + Assert(!isnull); + + /*---------- + * Check of partial match. + * case cmp == 0 => match + * case cmp > 0 => not match and finish scan + * case cmp < 0 => not match and continue scan + *---------- + */ + cmp = DatumGetInt32(FunctionCall3(&btree->ginstate->comparePartialFn, + scanEntry->entry, + idatum, + UInt16GetDatum(scanEntry->strategy))); + + if ( cmp > 0 ) + return true; + else if ( cmp < 0 ) + { + stack->off++; + continue; + } + + if ( GinIsPostingTree(itup) ) + { + BlockNumber rootPostingTree = GinGetPostingTree(itup); + Datum newDatum, + savedDatum = datumCopy ( + idatum, + btree->ginstate->tupdesc->attrs[0]->attbyval, + btree->ginstate->tupdesc->attrs[0]->attlen + ); + /* + * We should unlock current page (but not unpin) during + * tree scan to prevent deadlock with vacuum processes. + * + * We save current entry value (savedDatum) to be able to refind + * our tuple after re-locking + */ + LockBuffer(stack->buffer, GIN_UNLOCK); + scanForItems( btree->index, scanEntry, rootPostingTree ); + + /* + * We lock again the entry page and while it was unlocked + * insert might occured, so we need to refind our position + */ + LockBuffer(stack->buffer, GIN_SHARE); + page = BufferGetPage(stack->buffer); + if ( !GinPageIsLeaf(page) ) + { + /* + * Root page becomes non-leaf while we unlock it. We + * will start again, this situation doesn't cause + * often - root can became a non-leaf only one per + * life of index. + */ + + return false; + } + + for(;;) + { + if ( moveRightIfItNeeded(btree, stack) == false ) + elog(ERROR, "lost saved point in index"); /* must not happen !!! */ + + page = BufferGetPage(stack->buffer); + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stack->off)); + newDatum = index_getattr(itup, FirstOffsetNumber, btree->ginstate->tupdesc, &isnull); + + if ( compareEntries(btree->ginstate, newDatum, savedDatum) == 0 ) + { + /* Found! */ + if ( btree->ginstate->tupdesc->attrs[0]->attbyval == false ) + pfree( DatumGetPointer(savedDatum) ); + break; + } + + stack->off++; + } + } + else + { + tbm_add_tuples( scanEntry->partialMatch, GinGetPosting(itup), GinGetNPosting(itup), false); + scanEntry->predictNumberResult += GinGetNPosting(itup); + } + + /* + * Ok, we save ItemPointers, go to the next entry + */ + stack->off++; + } + + return true; +} + +/* + * Start* functions setup begining state of searches: finds correct buffer and pins it. */ static void startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) @@ -78,10 +279,45 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) entry->offset = InvalidOffsetNumber; entry->list = NULL; entry->nlist = 0; + entry->partialMatch = NULL; + entry->partialMatchResult = NULL; entry->reduceResult = FALSE; entry->predictNumberResult = 0; - if (btreeEntry.findItem(&btreeEntry, stackEntry)) + if ( entry->isPartialMatch ) + { + /* + * btreeEntry.findItem points to the first equal or greater value + * than needed. So we will scan further and collect all + * ItemPointers + */ + btreeEntry.findItem(&btreeEntry, stackEntry); + if ( computePartialMatchList( &btreeEntry, stackEntry, entry ) == false ) + { + /* + * GIN tree was seriously restructured, so we will + * cleanup all found data and rescan. See comments near + * 'return false' in computePartialMatchList() + */ + if ( entry->partialMatch ) + { + tbm_free( entry->partialMatch ); + entry->partialMatch = NULL; + } + LockBuffer(stackEntry->buffer, GIN_UNLOCK); + freeGinBtreeStack(stackEntry); + + startScanEntry(index, ginstate, entry); + return; + } + + if ( entry->partialMatch && !tbm_is_empty(entry->partialMatch) ) + { + tbm_begin_iterate(entry->partialMatch); + entry->isFinished = FALSE; + } + } + else if (btreeEntry.findItem(&btreeEntry, stackEntry)) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, stackEntry->off)); @@ -91,6 +327,13 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) GinPostingTreeScan *gdi; Page page; + /* + * We should unlock entry page before make deal with + * posting tree to prevent deadlocks with vacuum processes. + * Because entry is never deleted from page and posting tree is + * never reduced to the posting list, we can unlock page after + * getting BlockNumber of root of posting tree. + */ LockBuffer(stackEntry->buffer, GIN_UNLOCK); needUnlock = FALSE; gdi = prepareScanPostingTree(index, rootPostingTree, TRUE); @@ -111,7 +354,7 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry) */ entry->list = (ItemPointerData *) palloc( BLCKSZ ); entry->nlist = GinPageGetOpaque(page)->maxoff; - memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), + memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) ); LockBuffer(entry->buffer, GIN_UNLOCK); @@ -142,7 +385,14 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key) return; for (i = 0; i < key->nentries; i++) + { startScanEntry(index, ginstate, key->scanEntry + i); + /* + * Copy strategy number to each entry of key to + * use in comparePartialFn call + */ + key->scanEntry[i].strategy = key->strategy; + } memset(key->entryRes, TRUE, sizeof(bool) * key->nentries); key->isFinished = FALSE; @@ -233,12 +483,12 @@ entryGetNextItem(Relation index, GinScanEntry entry) * Found position equal to or greater than stored */ entry->nlist = GinPageGetOpaque(page)->maxoff; - memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), + memcpy( entry->list, GinDataPageGetItem(page, FirstOffsetNumber), GinPageGetOpaque(page)->maxoff * sizeof(ItemPointerData) ); LockBuffer(entry->buffer, GIN_UNLOCK); - if ( !ItemPointerIsValid(&entry->curItem) || + if ( !ItemPointerIsValid(&entry->curItem) || compareItemPointers( &entry->curItem, entry->list + entry->offset - 1 ) == 0 ) { /* @@ -248,7 +498,7 @@ entryGetNextItem(Relation index, GinScanEntry entry) break; } - + /* * Find greater than entry->curItem position, store it. */ @@ -275,6 +525,38 @@ entryGetItem(Relation index, GinScanEntry entry) entry->isFinished = entry->master->isFinished; entry->curItem = entry->master->curItem; } + else if ( entry->partialMatch ) + { + do + { + if ( entry->partialMatchResult == NULL || entry->offset >= entry->partialMatchResult->ntuples ) + { + entry->partialMatchResult = tbm_iterate( entry->partialMatch ); + + if ( entry->partialMatchResult == NULL ) + { + ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber); + entry->isFinished = TRUE; + break; + } + else if ( entry->partialMatchResult->ntuples < 0 ) + { + /* bitmap became lossy */ + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("not enough memory to store result of partial match operator" ), + errhint("Increase the \"work_mem\" parameter."))); + } + entry->offset = 0; + } + + ItemPointerSet(&entry->curItem, + entry->partialMatchResult->blockno, + entry->partialMatchResult->offsets[ entry->offset ]); + entry->offset ++; + + } while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry)); + } else if (!BufferIsValid(entry->buffer)) { entry->offset++; @@ -298,6 +580,54 @@ entryGetItem(Relation index, GinScanEntry entry) } /* + * restart from saved position. Actually it's needed only for + * partial match. function is called only by ginrestpos() + */ +void +ginrestartentry(GinScanEntry entry) +{ + ItemPointerData stopItem = entry->curItem; + bool savedReduceResult; + + if ( entry->master || entry->partialMatch == NULL ) + return; /* entry is slave or not a partial match type*/ + + if ( entry->isFinished ) + return; /* entry was finished before ginmarkpos() call */ + + if ( ItemPointerGetBlockNumber(&stopItem) == InvalidBlockNumber ) + return; /* entry wasn't began before ginmarkpos() call */ + + /* + * Reset iterator + */ + tbm_begin_iterate( entry->partialMatch ); + entry->partialMatchResult = NULL; + entry->offset = 0; + + /* + * Temporary reset reduceResult flag to guarantee refinding + * of curItem + */ + savedReduceResult = entry->reduceResult; + entry->reduceResult = FALSE; + + do + { + /* + * We can use null instead of index because + * partial match doesn't use it + */ + if ( entryGetItem( NULL, entry ) == false ) + elog(ERROR, "cannot refind scan position"); /* must not be here! */ + } while( compareItemPointers( &stopItem, &entry->curItem ) != 0 ); + + Assert( entry->isFinished == FALSE ); + + entry->reduceResult = savedReduceResult; +} + +/* * Sets key->curItem to new found heap item pointer for one scan key * Returns isFinished, ie TRUE means we did NOT get a new item pointer! * Also, *keyrecheck is set true if recheck is needed for this scan key. @@ -494,7 +824,7 @@ gingettuple(PG_FUNCTION_ARGS) bool res; if (dir != ForwardScanDirection) - elog(ERROR, "Gin doesn't support other scan directions than forward"); + elog(ERROR, "GIN doesn't support other scan directions than forward"); if (GinIsNewKey(scan)) newScanKey(scan); diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index 10a528817e6..cec24fbfdbd 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.13 2008/05/12 00:00:44 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.14 2008/05/16 16:31:01 tgl Exp $ *------------------------------------------------------------------------- */ @@ -36,7 +36,8 @@ ginbeginscan(PG_FUNCTION_ARGS) static void fillScanKey(GinState *ginstate, GinScanKey key, Datum query, - Datum *entryValues, uint32 nEntryValues, StrategyNumber strategy) + Datum *entryValues, bool *partial_matches, uint32 nEntryValues, + StrategyNumber strategy) { uint32 i, j; @@ -58,6 +59,8 @@ fillScanKey(GinState *ginstate, GinScanKey key, Datum query, key->scanEntry[i].buffer = InvalidBuffer; key->scanEntry[i].list = NULL; key->scanEntry[i].nlist = 0; + key->scanEntry[i].isPartialMatch = ( ginstate->canPartialMatch && partial_matches ) + ? partial_matches[i] : false; /* link to the equals entry in current scan key */ key->scanEntry[i].master = NULL; @@ -98,6 +101,8 @@ resetScanKeys(GinScanKey keys, uint32 nkeys) key->scanEntry[j].buffer = InvalidBuffer; key->scanEntry[j].list = NULL; key->scanEntry[j].nlist = 0; + key->scanEntry[j].partialMatch = NULL; + key->scanEntry[j].partialMatchResult = NULL; } } } @@ -122,6 +127,8 @@ freeScanKeys(GinScanKey keys, uint32 nkeys, bool removeRes) ReleaseBuffer(key->scanEntry[j].buffer); if (removeRes && key->scanEntry[j].list) pfree(key->scanEntry[j].list); + if (removeRes && key->scanEntry[j].partialMatch) + tbm_free(key->scanEntry[j].partialMatch); } if (removeRes) @@ -153,19 +160,21 @@ newScanKey(IndexScanDesc scan) { Datum *entryValues; int32 nEntryValues; + bool *partial_matches = NULL; - if (scankey[i].sk_flags & SK_ISNULL) - elog(ERROR, "Gin doesn't support NULL as scan key"); Assert(scankey[i].sk_attno == 1); - entryValues = (Datum *) DatumGetPointer( - FunctionCall3( + /* XXX can't we treat nulls by just setting isVoidRes? */ + /* This would amount to assuming that all GIN operators are strict */ + if (scankey[i].sk_flags & SK_ISNULL) + elog(ERROR, "GIN doesn't support NULL as scan key"); + + entryValues = (Datum *) DatumGetPointer(FunctionCall4( &so->ginstate.extractQueryFn, scankey[i].sk_argument, PointerGetDatum(&nEntryValues), - UInt16GetDatum(scankey[i].sk_strategy) - ) - ); + UInt16GetDatum(scankey[i].sk_strategy), + PointerGetDatum(&partial_matches))); if (nEntryValues < 0) { /* @@ -175,12 +184,16 @@ newScanKey(IndexScanDesc scan) so->isVoidRes = true; break; } + + /* + * extractQueryFn signals that everything matches + */ if (entryValues == NULL || nEntryValues == 0) /* full scan... */ continue; fillScanKey(&so->ginstate, &(so->keys[nkeys]), scankey[i].sk_argument, - entryValues, nEntryValues, scankey[i].sk_strategy); + entryValues, partial_matches, nEntryValues, scankey[i].sk_strategy); nkeys++; } @@ -253,7 +266,7 @@ ginendscan(PG_FUNCTION_ARGS) } static GinScanKey -copyScanKeys(GinScanKey keys, uint32 nkeys) +copyScanKeys(GinScanKey keys, uint32 nkeys, bool restart) { GinScanKey newkeys; uint32 i, @@ -277,6 +290,9 @@ copyScanKeys(GinScanKey keys, uint32 nkeys) newkeys[i].scanEntry[j].master = newkeys[i].scanEntry + masterN; } + + if ( restart ) + ginrestartentry( &keys[i].scanEntry[j] ); } } @@ -290,7 +306,7 @@ ginmarkpos(PG_FUNCTION_ARGS) GinScanOpaque so = (GinScanOpaque) scan->opaque; freeScanKeys(so->markPos, so->nkeys, FALSE); - so->markPos = copyScanKeys(so->keys, so->nkeys); + so->markPos = copyScanKeys(so->keys, so->nkeys, FALSE); PG_RETURN_VOID(); } @@ -302,7 +318,7 @@ ginrestrpos(PG_FUNCTION_ARGS) GinScanOpaque so = (GinScanOpaque) scan->opaque; freeScanKeys(so->keys, so->nkeys, FALSE); - so->keys = copyScanKeys(so->markPos, so->nkeys); + so->keys = copyScanKeys(so->markPos, so->nkeys, TRUE); PG_RETURN_VOID(); } diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index 7da7689f826..36105e20d2d 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.14 2008/05/12 00:00:44 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.15 2008/05/16 16:31:01 tgl Exp $ *------------------------------------------------------------------------- */ @@ -41,6 +41,22 @@ initGinState(GinState *state, Relation index) fmgr_info_copy(&(state->consistentFn), index_getprocinfo(index, 1, GIN_CONSISTENT_PROC), CurrentMemoryContext); + + /* + * Check opclass capability to do partial match. + */ + if ( index_getprocid(index, 1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid ) + { + fmgr_info_copy(&(state->comparePartialFn), + index_getprocinfo(index, 1, GIN_COMPARE_PARTIAL_PROC), + CurrentMemoryContext); + + state->canPartialMatch = true; + } + else + { + state->canPartialMatch = false; + } } /* diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 908dbc481f2..4fc7c536548 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.229 2008/04/13 20:51:20 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.230 2008/05/16 16:31:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -2364,7 +2364,10 @@ expand_boolean_index_clause(Node *clause, * expand_indexqual_opclause --- expand a single indexqual condition * that is an operator clause * - * The input is a single RestrictInfo, the output a list of RestrictInfos + * The input is a single RestrictInfo, the output a list of RestrictInfos. + * + * In the base case this is just list_make1(), but we have to be prepared to + * expand special cases that were accepted by match_special_index_operator(). */ static List * expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily) @@ -2379,63 +2382,77 @@ expand_indexqual_opclause(RestrictInfo *rinfo, Oid opfamily) Const *prefix = NULL; Const *rest = NULL; Pattern_Prefix_Status pstatus; - List *result; + /* + * LIKE and regex operators are not members of any btree index opfamily, + * but they can be members of opfamilies for more exotic index types such + * as GIN. Therefore, we should only do expansion if the operator is + * actually not in the opfamily. But checking that requires a syscache + * lookup, so it's best to first see if the operator is one we are + * interested in. + */ switch (expr_op) { - /* - * LIKE and regex operators are not members of any index opfamily, - * so if we find one in an indexqual list we can assume that it - * was accepted by match_special_index_operator(). - */ case OID_TEXT_LIKE_OP: case OID_BPCHAR_LIKE_OP: case OID_NAME_LIKE_OP: case OID_BYTEA_LIKE_OP: - pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, - &prefix, &rest); - result = prefix_quals(leftop, opfamily, prefix, pstatus); + if (!op_in_opfamily(expr_op, opfamily)) + { + pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like, + &prefix, &rest); + return prefix_quals(leftop, opfamily, prefix, pstatus); + } break; case OID_TEXT_ICLIKE_OP: case OID_BPCHAR_ICLIKE_OP: case OID_NAME_ICLIKE_OP: - /* the right-hand const is type text for all of these */ - pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, - &prefix, &rest); - result = prefix_quals(leftop, opfamily, prefix, pstatus); + if (!op_in_opfamily(expr_op, opfamily)) + { + /* the right-hand const is type text for all of these */ + pstatus = pattern_fixed_prefix(patt, Pattern_Type_Like_IC, + &prefix, &rest); + return prefix_quals(leftop, opfamily, prefix, pstatus); + } break; case OID_TEXT_REGEXEQ_OP: case OID_BPCHAR_REGEXEQ_OP: case OID_NAME_REGEXEQ_OP: - /* the right-hand const is type text for all of these */ - pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex, - &prefix, &rest); - result = prefix_quals(leftop, opfamily, prefix, pstatus); + if (!op_in_opfamily(expr_op, opfamily)) + { + /* the right-hand const is type text for all of these */ + pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex, + &prefix, &rest); + return prefix_quals(leftop, opfamily, prefix, pstatus); + } break; case OID_TEXT_ICREGEXEQ_OP: case OID_BPCHAR_ICREGEXEQ_OP: case OID_NAME_ICREGEXEQ_OP: - /* the right-hand const is type text for all of these */ - pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, - &prefix, &rest); - result = prefix_quals(leftop, opfamily, prefix, pstatus); + if (!op_in_opfamily(expr_op, opfamily)) + { + /* the right-hand const is type text for all of these */ + pstatus = pattern_fixed_prefix(patt, Pattern_Type_Regex_IC, + &prefix, &rest); + return prefix_quals(leftop, opfamily, prefix, pstatus); + } break; case OID_INET_SUB_OP: case OID_INET_SUBEQ_OP: - result = network_prefix_quals(leftop, expr_op, opfamily, - patt->constvalue); - break; - - default: - result = list_make1(rinfo); + if (!op_in_opfamily(expr_op, opfamily)) + { + return network_prefix_quals(leftop, expr_op, opfamily, + patt->constvalue); + } break; } - return result; + /* Default case: just make a list of the unmodified indexqual */ + return list_make1(rinfo); } /* diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c index a217ff0c5bc..0e00252daae 100644 --- a/src/backend/tsearch/to_tsany.c +++ b/src/backend/tsearch/to_tsany.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.11 2008/03/25 22:42:43 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.12 2008/05/16 16:31:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -32,23 +32,22 @@ get_current_ts_config(PG_FUNCTION_ARGS) static int compareWORD(const void *a, const void *b) { - if (((ParsedWord *) a)->len == ((ParsedWord *) b)->len) + int res; + + res = tsCompareString( + ((ParsedWord *) a)->word, ((ParsedWord *) a)->len, + ((ParsedWord *) b)->word, ((ParsedWord *) b)->len, + false ); + + if (res == 0) { - int res = strncmp( - ((ParsedWord *) a)->word, - ((ParsedWord *) b)->word, - ((ParsedWord *) b)->len); + if (((ParsedWord *) a)->pos.pos == ((ParsedWord *) b)->pos.pos) + return 0; - if (res == 0) - { - if (((ParsedWord *) a)->pos.pos == ((ParsedWord *) b)->pos.pos) - return 0; - - return (((ParsedWord *) a)->pos.pos > ((ParsedWord *) b)->pos.pos) ? 1 : -1; - } - return res; + res = (((ParsedWord *) a)->pos.pos > ((ParsedWord *) b)->pos.pos) ? 1 : -1; } - return (((ParsedWord *) a)->len > ((ParsedWord *) b)->len) ? 1 : -1; + + return res; } static int @@ -268,7 +267,7 @@ to_tsvector(PG_FUNCTION_ARGS) * and different variants are ORred together. */ static void -pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int2 weight) +pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix) { int4 count = 0; ParsedText prs; @@ -302,7 +301,8 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant) { - pushValue(state, prs.words[count].word, prs.words[count].len, weight); + pushValue(state, prs.words[count].word, prs.words[count].len, weight, + ( (prs.words[count].flags & TSL_PREFIX) || prefix ) ? true : false ); pfree(prs.words[count].word); if (cnt) pushOperator(state, OP_AND); diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c index 22862bdb806..0634f54a71b 100644 --- a/src/backend/tsearch/ts_parse.c +++ b/src/backend/tsearch/ts_parse.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.7 2008/01/01 19:45:52 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.8 2008/05/16 16:31:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -415,6 +415,7 @@ parsetext(Oid cfgId, ParsedText *prs, char *buf, int buflen) prs->words[prs->curwords].len = strlen(ptr->lexeme); prs->words[prs->curwords].word = ptr->lexeme; prs->words[prs->curwords].nvariant = ptr->nvariant; + prs->words[prs->curwords].flags = ptr->flags & TSL_PREFIX; prs->words[prs->curwords].alen = 0; prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos); ptr++; @@ -463,8 +464,8 @@ hlfinditem(HeadlineParsedText *prs, TSQuery query, char *buf, int buflen) for (i = 0; i < query->size; i++) { if (item->type == QI_VAL && - item->operand.length == buflen && - strncmp(GETOPERAND(query) + item->operand.distance, buf, buflen) == 0) + tsCompareString( GETOPERAND(query) + item->operand.distance, item->operand.length, + buf, buflen, item->operand.prefix ) == 0 ) { if (word->item) { diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c index 55518834ae9..a09c92eebea 100644 --- a/src/backend/utils/adt/tsginidx.c +++ b/src/backend/utils/adt/tsginidx.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.11 2008/04/14 17:05:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.12 2008/05/16 16:31:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,6 +20,46 @@ Datum +gin_cmp_tslexeme(PG_FUNCTION_ARGS) +{ + text *a = PG_GETARG_TEXT_P(0); + text *b = PG_GETARG_TEXT_P(1); + int cmp; + + cmp = tsCompareString( + VARDATA(a), VARSIZE(a) - VARHDRSZ, + VARDATA(b), VARSIZE(b) - VARHDRSZ, + false ); + + PG_FREE_IF_COPY(a,0); + PG_FREE_IF_COPY(b,1); + PG_RETURN_INT32( cmp ); +} + +Datum +gin_cmp_prefix(PG_FUNCTION_ARGS) +{ + text *a = PG_GETARG_TEXT_P(0); + text *b = PG_GETARG_TEXT_P(1); +#ifdef NOT_USED + StrategyNumber strategy = PG_GETARG_UINT16(2); +#endif + int cmp; + + cmp = tsCompareString( + VARDATA(a), VARSIZE(a) - VARHDRSZ, + VARDATA(b), VARSIZE(b) - VARHDRSZ, + true ); + + if ( cmp < 0 ) + cmp = 1; /* prevent continue scan */ + + PG_FREE_IF_COPY(a,0); + PG_FREE_IF_COPY(b,1); + PG_RETURN_INT32( cmp ); +} + +Datum gin_extract_tsvector(PG_FUNCTION_ARGS) { TSVector vector = PG_GETARG_TSVECTOR(0); @@ -55,7 +95,9 @@ gin_extract_tsquery(PG_FUNCTION_ARGS) TSQuery query = PG_GETARG_TSQUERY(0); int32 *nentries = (int32 *) PG_GETARG_POINTER(1); /* StrategyNumber strategy = PG_GETARG_UINT16(2); */ + bool **ptr_partialmatch = (bool**) PG_GETARG_POINTER(3); Datum *entries = NULL; + bool *partialmatch; *nentries = 0; @@ -65,12 +107,14 @@ gin_extract_tsquery(PG_FUNCTION_ARGS) j = 0, len; QueryItem *item; + bool use_fullscan=false; item = clean_NOT(GETQUERY(query), &len); if (!item) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("query requires full scan, which is not supported by GIN indexes"))); + { + use_fullscan = true; + *nentries = 1; + } item = GETQUERY(query); @@ -79,6 +123,7 @@ gin_extract_tsquery(PG_FUNCTION_ARGS) (*nentries)++; entries = (Datum *) palloc(sizeof(Datum) * (*nentries)); + partialmatch = *ptr_partialmatch = (bool*) palloc(sizeof(bool) * (*nentries)); for (i = 0; i < query->size; i++) if (item[i].type == QI_VAL) @@ -88,8 +133,12 @@ gin_extract_tsquery(PG_FUNCTION_ARGS) txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance, val->length); + partialmatch[j] = val->prefix; entries[j++] = PointerGetDatum(txt); } + + if ( use_fullscan ) + entries[j++] = PointerGetDatum(cstring_to_text_with_len("", 0)); } else *nentries = -1; /* nothing can be found */ diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c index ecbac7b40f2..b18f7131545 100644 --- a/src/backend/utils/adt/tsgistidx.c +++ b/src/backend/utils/adt/tsgistidx.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.8 2008/04/14 17:05:33 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.9 2008/05/16 16:31:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -307,6 +307,12 @@ checkcondition_arr(void *checkval, QueryOperand *val) /* Loop invariant: StopLow <= val < StopHigh */ + /* + * we are not able to find a a prefix by hash value + */ + if ( val->prefix ) + return true; + while (StopLow < StopHigh) { StopMiddle = StopLow + (StopHigh - StopLow) / 2; @@ -324,6 +330,11 @@ checkcondition_arr(void *checkval, QueryOperand *val) static bool checkcondition_bit(void *checkval, QueryOperand *val) { + /* + * we are not able to find a a prefix in signature tree + */ + if ( val->prefix ) + return true; return GETBIT(checkval, HASHVAL(val->valcrc)); } diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c index e4f1beba905..9a890d2ae68 100644 --- a/src/backend/utils/adt/tsquery.c +++ b/src/backend/utils/adt/tsquery.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.17 2008/04/11 22:52:05 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.18 2008/05/16 16:31:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -56,12 +56,14 @@ struct TSQueryParserStateData #define WAITSINGLEOPERAND 4 /* - * subroutine to parse the weight part, like ':1AB' of a query. + * subroutine to parse the modifiers (weight and prefix flag currently) + * part, like ':1AB' of a query. */ static char * -get_weight(char *buf, int16 *weight) +get_modifiers(char *buf, int16 *weight, bool *prefix) { *weight = 0; + *prefix = false; if (!t_iseq(buf, ':')) return buf; @@ -87,6 +89,9 @@ get_weight(char *buf, int16 *weight) case 'D': *weight |= 1; break; + case '*': + *prefix = true; + break; default: return buf; } @@ -118,8 +123,11 @@ typedef enum static ts_tokentype gettoken_query(TSQueryParserState state, int8 *operator, - int *lenval, char **strval, int16 *weight) + int *lenval, char **strval, int16 *weight, bool *prefix) { + *weight = 0; + *prefix = false; + while (1) { switch (state->state) @@ -157,7 +165,7 @@ gettoken_query(TSQueryParserState state, reset_tsvector_parser(state->valstate, state->buf); if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf)) { - state->buf = get_weight(state->buf, weight); + state->buf = get_modifiers(state->buf, weight, prefix); state->state = WAITOPERATOR; return PT_VAL; } @@ -232,7 +240,7 @@ pushOperator(TSQueryParserState state, int8 oper) } static void -pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight) +pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix) { QueryOperand *tmp; @@ -250,6 +258,7 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int tmp = (QueryOperand *) palloc0(sizeof(QueryOperand)); tmp->type = QI_VAL; tmp->weight = weight; + tmp->prefix = prefix; tmp->valcrc = (int32) valcrc; tmp->length = lenval; tmp->distance = distance; @@ -264,7 +273,7 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int * of the string. */ void -pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight) +pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix) { pg_crc32 valcrc; @@ -277,7 +286,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight) INIT_CRC32(valcrc); COMP_CRC32(valcrc, strval, lenval); FIN_CRC32(valcrc); - pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight); + pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix); /* append the value string to state.op, enlarging buffer if needed first */ while (state->curop - state->op + lenval + 1 >= state->lenop) @@ -330,16 +339,17 @@ makepol(TSQueryParserState state, int8 opstack[STACKDEPTH]; int lenstack = 0; int16 weight = 0; + bool prefix; /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); - while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight)) != PT_END) + while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END) { switch (type) { case PT_VAL: - pushval(opaque, state, strval, lenval, weight); + pushval(opaque, state, strval, lenval, weight, prefix); while (lenstack && (opstack[lenstack - 1] == OP_AND || opstack[lenstack - 1] == OP_NOT)) { @@ -549,9 +559,9 @@ parse_tsquery(char *buf, static void pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval, - int16 weight) + int16 weight, bool prefix) { - pushValue(state, strval, lenval, weight); + pushValue(state, strval, lenval, weight, prefix); } /* @@ -605,7 +615,7 @@ infix(INFIX *in, bool first) char *op = in->op + curpol->distance; int clen; - RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5); + RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6); *(in->cur) = '\''; in->cur++; while (*op) @@ -628,10 +638,15 @@ infix(INFIX *in, bool first) } *(in->cur) = '\''; in->cur++; - if (curpol->weight) + if (curpol->weight || curpol->prefix) { *(in->cur) = ':'; in->cur++; + if ( curpol->prefix ) + { + *(in->cur) = '*'; + in->cur++; + } if (curpol->weight & (1 << 3)) { *(in->cur) = 'A'; @@ -769,6 +784,7 @@ tsqueryout(PG_FUNCTION_ARGS) * uint8 type, QI_VAL * uint8 weight * operand text in client encoding, null-terminated + * uint8 prefix * * For each operator: * uint8 type, QI_OPR @@ -793,6 +809,7 @@ tsquerysend(PG_FUNCTION_ARGS) { case QI_VAL: pq_sendint(&buf, item->operand.weight, sizeof(uint8)); + pq_sendint(&buf, item->operand.prefix, sizeof(uint8)); pq_sendstring(&buf, GETOPERAND(query) + item->operand.distance); break; case QI_OPR: @@ -844,10 +861,12 @@ tsqueryrecv(PG_FUNCTION_ARGS) { size_t val_len; /* length after recoding to server encoding */ uint8 weight; + uint8 prefix; const char *val; pg_crc32 valcrc; weight = (uint8) pq_getmsgint(buf, sizeof(uint8)); + prefix = (uint8) pq_getmsgint(buf, sizeof(uint8)); val = pq_getmsgstring(buf); val_len = strlen(val); @@ -869,6 +888,7 @@ tsqueryrecv(PG_FUNCTION_ARGS) FIN_CRC32(valcrc); item->operand.weight = weight; + item->operand.prefix = (prefix) ? true : false; item->operand.valcrc = (int32) valcrc; item->operand.length = val_len; item->operand.distance = datalen; diff --git a/src/backend/utils/adt/tsquery_util.c b/src/backend/utils/adt/tsquery_util.c index b81835c9692..4cc1a2a21eb 100644 --- a/src/backend/utils/adt/tsquery_util.c +++ b/src/backend/utils/adt/tsquery_util.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.8 2008/01/01 19:45:53 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.9 2008/05/16 16:31:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -125,10 +125,7 @@ QTNodeCompare(QTNode *an, QTNode *bn) return (ao->valcrc > bo->valcrc) ? -1 : 1; } - if (ao->length == bo->length) - return strncmp(an->word, bn->word, ao->length); - else - return (ao->length > bo->length) ? -1 : 1; + return tsCompareString( an->word, ao->length, bn->word, bo->length, false); } } diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c index 065c94d2097..d23e05e9939 100644 --- a/src/backend/utils/adt/tsrank.c +++ b/src/backend/utils/adt/tsrank.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.12 2008/01/01 19:45:53 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.13 2008/05/16 16:31:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -71,45 +71,60 @@ cnt_length(TSVector t) return len; } -static int -WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item) -{ - if (ptr->len == item->length) - return strncmp( - eval + ptr->pos, - qval + item->distance, - item->length); - return (ptr->len > item->length) ? 1 : -1; -} +#define WordECompareQueryItem(e,q,p,i,m) \ + tsCompareString((q) + (i)->distance, (i)->length, \ + (e) + (p)->pos, (p)->len, (m)) + /* - * Returns a pointer to a WordEntry corresponding 'item' from tsvector 't'. 'q' - * is the TSQuery containing 'item'. Returns NULL if not found. + * Returns a pointer to a WordEntry's array corresponding to 'item' from + * tsvector 't'. 'q' is the TSQuery containing 'item'. + * Returns NULL if not found. */ static WordEntry * -find_wordentry(TSVector t, TSQuery q, QueryOperand *item) +find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem) { WordEntry *StopLow = ARRPTR(t); WordEntry *StopHigh = (WordEntry *) STRPTR(t); - WordEntry *StopMiddle; + WordEntry *StopMiddle = StopHigh; int difference; - /* Loop invariant: StopLow <= item < StopHigh */ + *nitem=0; + /* Loop invariant: StopLow <= item < StopHigh */ while (StopLow < StopHigh) { StopMiddle = StopLow + (StopHigh - StopLow) / 2; - difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item); + difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false); if (difference == 0) - return StopMiddle; - else if (difference < 0) + { + StopHigh = StopMiddle; + *nitem=1; + break; + } + else if (difference > 0) StopLow = StopMiddle + 1; else StopHigh = StopMiddle; } - return NULL; + if ( item->prefix == true ) + { + if ( StopLow >= StopHigh ) + StopMiddle = StopHigh; + + *nitem=0; + + while( StopMiddle < (WordEntry *) STRPTR(t) && + WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0 ) + { + (*nitem)++; + StopMiddle++; + } + } + + return ( *nitem > 0 ) ? StopHigh : NULL; } @@ -123,12 +138,9 @@ compareQueryOperand(const void *a, const void *b, void *arg) QueryOperand *qa = (*(QueryOperand **) a); QueryOperand *qb = (*(QueryOperand **) b); - if (qa->length == qb->length) - return strncmp(operand + qa->distance, - operand + qb->distance, - qb->length); - - return (qa->length > qb->length) ? 1 : -1; + return tsCompareString(operand + qa->distance, qa->length, + operand + qb->distance, qb->length, + false); } /* @@ -198,12 +210,14 @@ calc_rank_and(float *w, TSVector t, TSQuery q) k, l, p; - WordEntry *entry; + WordEntry *entry, + *firstentry; WordEntryPos *post, *ct; int4 dimt, lenct, - dist; + dist, + nitem; float res = -1.0; QueryOperand **item; int size = q->size; @@ -219,40 +233,44 @@ calc_rank_and(float *w, TSVector t, TSQuery q) for (i = 0; i < size; i++) { - entry = find_wordentry(t, q, item[i]); + firstentry = entry = find_wordentry(t, q, item[i], &nitem); if (!entry) continue; - if (entry->haspos) - pos[i] = _POSVECPTR(t, entry); - else - pos[i] = &POSNULL; - - - dimt = pos[i]->npos; - post = pos[i]->pos; - for (k = 0; k < i; k++) + while( entry - firstentry < nitem ) { - if (!pos[k]) - continue; - lenct = pos[k]->npos; - ct = pos[k]->pos; - for (l = 0; l < dimt; l++) + if (entry->haspos) + pos[i] = _POSVECPTR(t, entry); + else + pos[i] = &POSNULL; + + dimt = pos[i]->npos; + post = pos[i]->pos; + for (k = 0; k < i; k++) { - for (p = 0; p < lenct; p++) + if (!pos[k]) + continue; + lenct = pos[k]->npos; + ct = pos[k]->pos; + for (l = 0; l < dimt; l++) { - dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p])); - if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL))) + for (p = 0; p < lenct; p++) { - float curw; - - if (!dist) - dist = MAXENTRYPOS; - curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist)); - res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw); + dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p])); + if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL))) + { + float curw; + + if (!dist) + dist = MAXENTRYPOS; + curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist)); + res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw); + } } } } + + entry++; } } pfree(pos); @@ -263,11 +281,13 @@ calc_rank_and(float *w, TSVector t, TSQuery q) static float calc_rank_or(float *w, TSVector t, TSQuery q) { - WordEntry *entry; + WordEntry *entry, + *firstentry; WordEntryPos *post; int4 dimt, j, - i; + i, + nitem; float res = 0.0; QueryOperand **item; int size = q->size; @@ -280,41 +300,46 @@ calc_rank_or(float *w, TSVector t, TSQuery q) wjm; int4 jm; - entry = find_wordentry(t, q, item[i]); + firstentry = entry = find_wordentry(t, q, item[i], &nitem); if (!entry) continue; - if (entry->haspos) - { - dimt = POSDATALEN(t, entry); - post = POSDATAPTR(t, entry); - } - else + while( entry - firstentry < nitem ) { - dimt = POSNULL.npos; - post = POSNULL.pos; - } + if (entry->haspos) + { + dimt = POSDATALEN(t, entry); + post = POSDATAPTR(t, entry); + } + else + { + dimt = POSNULL.npos; + post = POSNULL.pos; + } - resj = 0.0; - wjm = -1.0; - jm = 0; - for (j = 0; j < dimt; j++) - { - resj = resj + wpos(post[j]) / ((j + 1) * (j + 1)); - if (wpos(post[j]) > wjm) + resj = 0.0; + wjm = -1.0; + jm = 0; + for (j = 0; j < dimt; j++) { - wjm = wpos(post[j]); - jm = j; + resj = resj + wpos(post[j]) / ((j + 1) * (j + 1)); + if (wpos(post[j]) > wjm) + { + wjm = wpos(post[j]); + jm = j; + } } - } /* - limit (sum(i/i^2),i->inf) = pi^2/6 - resj = sum(wi/i^2),i=1,noccurence, - wi - should be sorted desc, - don't sort for now, just choose maximum weight. This should be corrected - Oleg Bartunov + limit (sum(i/i^2),i->inf) = pi^2/6 + resj = sum(wi/i^2),i=1,noccurence, + wi - should be sorted desc, + don't sort for now, just choose maximum weight. This should be corrected + Oleg Bartunov */ - res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685; + res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685; + + entry++; + } } if (size > 0) res = res / size; @@ -594,11 +619,13 @@ static DocRepresentation * get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen) { QueryItem *item = GETQUERY(qr->query); - WordEntry *entry; + WordEntry *entry, + *firstentry; WordEntryPos *post; int4 dimt, j, - i; + i, + nitem; int len = qr->query->size * 4, cur = 0; DocRepresentation *doc; @@ -619,63 +646,68 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen) if (QR_GET_OPERAND_EXISTS(qr, &item[i])) continue; - entry = find_wordentry(txt, qr->query, curoperand); + firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem); if (!entry) continue; - if (entry->haspos) - { - dimt = POSDATALEN(txt, entry); - post = POSDATAPTR(txt, entry); - } - else - { - dimt = POSNULL.npos; - post = POSNULL.pos; - } - - while (cur + dimt >= len) + while( entry - firstentry < nitem ) { - len *= 2; - doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len); - } - - for (j = 0; j < dimt; j++) - { - if (j == 0) + if (entry->haspos) + { + dimt = POSDATALEN(txt, entry); + post = POSDATAPTR(txt, entry); + } + else { - int k; + dimt = POSNULL.npos; + post = POSNULL.pos; + } - doc[cur].nitem = 0; - doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size); + while (cur + dimt >= len) + { + len *= 2; + doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len); + } - for (k = 0; k < qr->query->size; k++) + for (j = 0; j < dimt; j++) + { + if (j == 0) { - QueryOperand *kptr = &item[k].operand; - QueryOperand *iptr = &item[i].operand; - - if (k == i || - (item[k].type == QI_VAL && - compareQueryOperand(&kptr, &iptr, operand) == 0)) + int k; + + doc[cur].nitem = 0; + doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size); + + for (k = 0; k < qr->query->size; k++) { - /* - * if k == i, we've already checked above that it's - * type == Q_VAL - */ - doc[cur].item[doc[cur].nitem] = item + k; - doc[cur].nitem++; - QR_SET_OPERAND_EXISTS(qr, item + k); + QueryOperand *kptr = &item[k].operand; + QueryOperand *iptr = &item[i].operand; + + if (k == i || + (item[k].type == QI_VAL && + compareQueryOperand(&kptr, &iptr, operand) == 0)) + { + /* + * if k == i, we've already checked above that it's + * type == Q_VAL + */ + doc[cur].item[doc[cur].nitem] = item + k; + doc[cur].nitem++; + QR_SET_OPERAND_EXISTS(qr, item + k); + } } } + else + { + doc[cur].nitem = doc[cur - 1].nitem; + doc[cur].item = doc[cur - 1].item; + } + doc[cur].pos = WEP_GETPOS(post[j]); + doc[cur].wclass = WEP_GETWEIGHT(post[j]); + cur++; } - else - { - doc[cur].nitem = doc[cur - 1].nitem; - doc[cur].item = doc[cur - 1].item; - } - doc[cur].pos = WEP_GETPOS(post[j]); - doc[cur].wclass = WEP_GETWEIGHT(post[j]); - cur++; + + entry++; } } diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c index 18342800cc0..7a8da86423f 100644 --- a/src/backend/utils/adt/tsvector.c +++ b/src/backend/utils/adt/tsvector.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.13 2008/03/10 12:57:05 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.14 2008/05/16 16:31:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -85,14 +85,9 @@ compareentry(const void *va, const void *vb, void *arg) const WordEntryIN *b = (const WordEntryIN *) vb; char *BufferStr = (char *) arg; - if (a->entry.len == b->entry.len) - { - return strncmp(&BufferStr[a->entry.pos], - &BufferStr[b->entry.pos], - a->entry.len); - } - - return (a->entry.len > b->entry.len) ? 1 : -1; + return tsCompareString( &BufferStr[a->entry.pos], a->entry.len, + &BufferStr[b->entry.pos], b->entry.len, + false ); } /* diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index 539a9be9789..4e7d50b526a 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.15 2008/04/08 18:20:29 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.16 2008/05/16 16:31:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -127,11 +127,7 @@ silly_cmp_tsvector(const TSVector a, const TSVector b) { return (aptr->haspos > bptr->haspos) ? -1 : 1; } - else if (aptr->len != bptr->len) - { - return (aptr->len > bptr->len) ? -1 : 1; - } - else if ((res = strncmp(STRPTR(a) + aptr->pos, STRPTR(b) + bptr->pos, bptr->len)) != 0) + else if ( (res=tsCompareString( STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) !=0 ) { return res; } @@ -286,18 +282,10 @@ tsvector_setweight(PG_FUNCTION_ARGS) PG_RETURN_POINTER(out); } -static int -compareEntry(char *ptra, WordEntry *a, char *ptrb, WordEntry *b) -{ - if (a->len == b->len) - { - return strncmp( - ptra + a->pos, - ptrb + b->pos, - a->len); - } - return (a->len > b->len) ? 1 : -1; -} +#define compareEntry(pa, a, pb, b) \ + tsCompareString((pa) + (a)->pos, (a)->len, \ + (pb) + (b)->pos, (b)->len, \ + false) /* * Add positions from src to dest after offsetting them by maxpos. @@ -534,18 +522,46 @@ tsvector_concat(PG_FUNCTION_ARGS) } /* - * compare 2 string values + * Compare two strings by tsvector rules. + * if isPrefix = true then it returns not-zero value if b has prefix a */ -static int4 -ValCompare(CHKVAL *chkval, WordEntry *ptr, QueryOperand *item) +int4 +tsCompareString(char *a, int lena, char *b, int lenb, bool prefix) { - if (ptr->len == item->length) - return strncmp( - &(chkval->values[ptr->pos]), - &(chkval->operand[item->distance]), - item->length); + int cmp; + + if ( lena == 0 ) + { + if ( prefix ) + cmp = 0; /* emtry string is equal to any if a prefix match */ + else + cmp = (lenb>0) ? -1 : 0; + } + else if ( lenb == 0 ) + { + cmp = (lena>0) ? 1 : 0; + } + else + { + cmp = memcmp(a, b, Min(lena, lenb)); - return (ptr->len > item->length) ? 1 : -1; + if ( prefix ) + { + if ( cmp == 0 && lena > lenb ) + { + /* + * b argument is not beginning with argument a + */ + cmp=1; + } + } + else if ( (cmp == 0) && (lena != lenb) ) + { + cmp = (lena < lenb) ? -1 : 1; + } + } + + return cmp; } /* @@ -582,25 +598,52 @@ checkcondition_str(void *checkval, QueryOperand *val) CHKVAL *chkval = (CHKVAL *) checkval; WordEntry *StopLow = chkval->arrb; WordEntry *StopHigh = chkval->arre; - WordEntry *StopMiddle; - int difference; + WordEntry *StopMiddle = StopHigh; + int difference = -1; + bool res=false; /* Loop invariant: StopLow <= val < StopHigh */ - while (StopLow < StopHigh) { StopMiddle = StopLow + (StopHigh - StopLow) / 2; - difference = ValCompare(chkval, StopMiddle, val); + difference = tsCompareString( chkval->operand + val->distance, val->length, + chkval->values + StopMiddle->pos, StopMiddle->len, + false); + if (difference == 0) - return (val->weight && StopMiddle->haspos) ? + { + res = (val->weight && StopMiddle->haspos) ? checkclass_str(chkval, StopMiddle, val) : true; - else if (difference < 0) + break; + } + else if (difference > 0) StopLow = StopMiddle + 1; else StopHigh = StopMiddle; } - return (false); + if ( res == false && val->prefix == true ) + { + /* + * there was a failed exact search, so we should scan further to find + * a prefix match. + */ + if ( StopLow >= StopHigh ) + StopMiddle = StopHigh; + + while( res == false && StopMiddle < chkval->arre && + tsCompareString( chkval->operand + val->distance, val->length, + chkval->values + StopMiddle->pos, StopMiddle->len, + true) == 0 ) + { + res = (val->weight && StopMiddle->haspos) ? + checkclass_str(chkval, StopMiddle, val) : true; + + StopMiddle++; + } + } + + return res; } /* @@ -758,50 +801,38 @@ check_weight(TSVector txt, WordEntry *wptr, int8 weight) return num; } -static WordEntry ** -SEI_realloc(WordEntry **in, uint32 *len) -{ - if (*len == 0 || in == NULL) - { - *len = 8; - in = palloc(sizeof(WordEntry *) * (*len)); - } - else - { - *len *= 2; - in = repalloc(in, sizeof(WordEntry *) * (*len)); - } - return in; -} +#define compareStatWord(a,e,s,t) \ + tsCompareString(STATSTRPTR(s) + (a)->pos, (a)->len, \ + STRPTR(t) + (e)->pos, (e)->len, \ + false) -static int -compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, TSVector txt) +typedef struct WordEntryMark { - if (a->len == b->len) - return strncmp( - STATSTRPTR(stat) + a->pos, - STRPTR(txt) + b->pos, - a->len - ); - return (a->len > b->len) ? 1 : -1; -} + WordEntry *newentry; + StatEntry *pos; +} WordEntryMark; static tsstat * -formstat(tsstat *stat, TSVector txt, WordEntry **entry, uint32 len) +formstat(tsstat *stat, TSVector txt, List *entries) { - tsstat *newstat; - uint32 totallen, - nentry; - uint32 slen = 0; - WordEntry **ptr = entry; - char *curptr; - StatEntry *sptr, - *nptr; - - while (ptr - entry < len) + tsstat *newstat; + uint32 totallen, + nentry, + len = list_length(entries); + uint32 slen = 0; + WordEntry *ptr; + char *curptr; + StatEntry *sptr, + *nptr; + ListCell *entry; + StatEntry *PosSE = STATPTR(stat), + *prevPosSE; + WordEntryMark *mark; + + foreach( entry, entries ) { - slen += (*ptr)->len; - ptr++; + mark = (WordEntryMark*)lfirst(entry); + slen += mark->newentry->len; } nentry = stat->size + len; @@ -815,78 +846,46 @@ formstat(tsstat *stat, TSVector txt, WordEntry **entry, uint32 len) memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat)); curptr = STATSTRPTR(newstat) + STATSTRSIZE(stat); - ptr = entry; sptr = STATPTR(stat); nptr = STATPTR(newstat); - if (len == 1) + foreach(entry, entries) { - StatEntry *StopLow = STATPTR(stat); - StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat); + prevPosSE = PosSE; - while (StopLow < StopHigh) + mark = (WordEntryMark*)lfirst(entry); + ptr = mark->newentry; + PosSE = mark->pos; + + /* + * Copy missed entries + */ + if ( PosSE > prevPosSE ) { - sptr = StopLow + (StopHigh - StopLow) / 2; - if (compareStatWord(sptr, *ptr, stat, txt) < 0) - StopLow = sptr + 1; - else - StopHigh = sptr; + memcpy( nptr, prevPosSE, sizeof(StatEntry) * (PosSE-prevPosSE) ); + nptr += PosSE-prevPosSE; } - nptr = STATPTR(newstat) + (StopLow - STATPTR(stat)); - memcpy(STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow - STATPTR(stat))); - if ((*ptr)->haspos) - nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr); + + /* + * Copy new entry + */ + if (ptr->haspos) + nptr->nentry = (stat->weight) ? check_weight(txt, ptr, stat->weight) : POSDATALEN(txt, ptr); else nptr->nentry = 1; nptr->ndoc = 1; - nptr->len = (*ptr)->len; - memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len); + nptr->len = ptr->len; + memcpy(curptr, STRPTR(txt) + ptr->pos, nptr->len); nptr->pos = curptr - STATSTRPTR(newstat); - memcpy(nptr + 1, StopLow, sizeof(StatEntry) * (((StatEntry *) STATSTRPTR(stat)) - StopLow)); - } - else - { - while (sptr - STATPTR(stat) < stat->size && ptr - entry < len) - { - if (compareStatWord(sptr, *ptr, stat, txt) < 0) - { - memcpy(nptr, sptr, sizeof(StatEntry)); - sptr++; - } - else - { - if ((*ptr)->haspos) - nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr); - else - nptr->nentry = 1; - nptr->ndoc = 1; - nptr->len = (*ptr)->len; - memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len); - nptr->pos = curptr - STATSTRPTR(newstat); - curptr += nptr->len; - ptr++; - } - nptr++; - } + curptr += nptr->len; + nptr++; - memcpy(nptr, sptr, sizeof(StatEntry) * (stat->size - (sptr - STATPTR(stat)))); - - while (ptr - entry < len) - { - if ((*ptr)->haspos) - nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr); - else - nptr->nentry = 1; - nptr->ndoc = 1; - nptr->len = (*ptr)->len; - memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len); - nptr->pos = curptr - STATSTRPTR(newstat); - curptr += nptr->len; - ptr++; - nptr++; - } + pfree(mark); } + if ( PosSE < (StatEntry *) STATSTRPTR(stat) ) + memcpy(nptr, PosSE, sizeof(StatEntry) * (stat->size - (PosSE - STATPTR(stat)))); + return newstat; } @@ -907,12 +906,11 @@ ts_accum(tsstat *stat, Datum data) { tsstat *newstat; TSVector txt = DatumGetTSVector(data); - WordEntry **newentry = NULL; - uint32 len = 0, - cur = 0; StatEntry *sptr; WordEntry *wptr; int n = 0; + List *newentries=NIL; + StatEntry *StopLow; if (stat == NULL) { /* Init in first */ @@ -932,16 +930,23 @@ ts_accum(tsstat *stat, Datum data) sptr = STATPTR(stat); wptr = ARRPTR(txt); + StopLow = STATPTR(stat); - if (stat->size < 100 * txt->size) - { /* merge */ - while (sptr - STATPTR(stat) < stat->size && wptr - ARRPTR(txt) < txt->size) - { - int cmp = compareStatWord(sptr, wptr, stat, txt); + while (wptr - ARRPTR(txt) < txt->size) + { + StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat); + int cmp; + + /* + * We do not set StopLow to begin of array because tsvector is ordered + * with the sames rule, so we can search from last stopped position + */ - if (cmp < 0) - sptr++; - else if (cmp == 0) + while (StopLow < StopHigh) + { + sptr = StopLow + (StopHigh - StopLow) / 2; + cmp = compareStatWord(sptr, wptr, stat, txt); + if (cmp == 0) { if (stat->weight == 0) { @@ -953,90 +958,38 @@ ts_accum(tsstat *stat, Datum data) sptr->ndoc++; sptr->nentry += n; } - sptr++; - wptr++; + break; } + else if (cmp < 0) + StopLow = sptr + 1; else - { - if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0) - { - if (cur == len) - newentry = SEI_realloc(newentry, &len); - newentry[cur] = wptr; - cur++; - } - wptr++; - } + StopHigh = sptr; } - while (wptr - ARRPTR(txt) < txt->size) - { + if (StopLow >= StopHigh) + { /* not found */ if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0) { - if (cur == len) - newentry = SEI_realloc(newentry, &len); - newentry[cur] = wptr; - cur++; - } - wptr++; - } - } - else - { /* search */ - while (wptr - ARRPTR(txt) < txt->size) - { - StatEntry *StopLow = STATPTR(stat); - StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat); - int cmp; + WordEntryMark *mark = (WordEntryMark*)palloc(sizeof(WordEntryMark)); - while (StopLow < StopHigh) - { - sptr = StopLow + (StopHigh - StopLow) / 2; - cmp = compareStatWord(sptr, wptr, stat, txt); - if (cmp == 0) - { - if (stat->weight == 0) - { - sptr->ndoc++; - sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1; - } - else if (wptr->haspos && (n = check_weight(txt, wptr, stat->weight)) != 0) - { - sptr->ndoc++; - sptr->nentry += n; - } - break; - } - else if (cmp < 0) - StopLow = sptr + 1; - else - StopHigh = sptr; - } + mark->newentry = wptr; + mark->pos = StopLow; + newentries = lappend( newentries, mark ); - if (StopLow >= StopHigh) - { /* not found */ - if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0) - { - if (cur == len) - newentry = SEI_realloc(newentry, &len); - newentry[cur] = wptr; - cur++; - } } - wptr++; } + wptr++; } - - if (cur == 0) + if (list_length(newentries) == 0) { /* no new words */ if (txt != (TSVector) DatumGetPointer(data)) pfree(txt); return stat; } - newstat = formstat(stat, txt, newentry, cur); - pfree(newentry); + newstat = formstat(stat, txt, newentries); + list_free(newentries); if (txt != (TSVector) DatumGetPointer(data)) pfree(txt); diff --git a/src/include/access/gin.h b/src/include/access/gin.h index add5df6ba11..353dd4e3f72 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -4,7 +4,7 @@ * * Copyright (c) 2006-2008, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.19 2008/05/12 00:00:53 alvherre Exp $ + * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.20 2008/05/16 16:31:01 tgl Exp $ *-------------------------------------------------------------------------- */ @@ -15,6 +15,7 @@ #include "access/itup.h" #include "access/relscan.h" #include "fmgr.h" +#include "nodes/tidbitmap.h" #include "storage/block.h" #include "storage/buf.h" #include "storage/off.h" @@ -28,7 +29,8 @@ #define GIN_EXTRACTVALUE_PROC 2 #define GIN_EXTRACTQUERY_PROC 3 #define GIN_CONSISTENT_PROC 4 -#define GINNProcs 4 +#define GIN_COMPARE_PARTIAL_PROC 5 +#define GINNProcs 5 /* * Page opaque data in a inverted index page. @@ -141,7 +143,10 @@ typedef struct GinState FmgrInfo extractValueFn; FmgrInfo extractQueryFn; FmgrInfo consistentFn; + FmgrInfo comparePartialFn; /* optional method */ + bool canPartialMatch; /* can opclass perform partial + * match (prefix search)? */ TupleDesc tupdesc; } GinState; @@ -360,6 +365,12 @@ typedef struct GinScanEntryData /* current ItemPointer to heap */ ItemPointerData curItem; + /* partial match support */ + bool isPartialMatch; + TIDBitmap *partialMatch; + TBMIterateResult *partialMatchResult; + StrategyNumber strategy; + /* used for Posting list and one page in Posting tree */ ItemPointerData *list; uint32 nlist; @@ -424,6 +435,7 @@ extern PGDLLIMPORT int GinFuzzySearchLimit; extern Datum gingetbitmap(PG_FUNCTION_ARGS); extern Datum gingettuple(PG_FUNCTION_ARGS); +extern void ginrestartentry(GinScanEntry entry); /* ginvacuum.c */ extern Datum ginbulkdelete(PG_FUNCTION_ARGS); diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index e0bd027fdac..94d70c2c877 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -37,7 +37,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.459 2008/05/15 00:17:40 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.460 2008/05/16 16:31:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 200805141 +#define CATALOG_VERSION_NO 200805161 #endif diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h index 9837a8c2003..0fe5d05e7c6 100644 --- a/src/include/catalog/pg_am.h +++ b/src/include/catalog/pg_am.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.55 2008/04/10 22:25:25 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.56 2008/05/16 16:31:01 tgl Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -114,7 +114,7 @@ DESCR("hash index access method"); DATA(insert OID = 783 ( gist 0 7 f f t t t t t t gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions )); DESCR("GiST index access method"); #define GIST_AM_OID 783 -DATA(insert OID = 2742 ( gin 0 4 f f f f f f t f gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions )); +DATA(insert OID = 2742 ( gin 0 5 f f f f f f t f gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions )); DESCR("GIN index access method"); #define GIN_AM_OID 2742 diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h index 8a804469538..36bd0f84096 100644 --- a/src/include/catalog/pg_amproc.h +++ b/src/include/catalog/pg_amproc.h @@ -22,7 +22,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_amproc.h,v 1.71 2008/03/27 03:57:34 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_amproc.h,v 1.72 2008/05/16 16:31:01 tgl Exp $ * * NOTES * the genbki.sh script reads this file and generates .bki @@ -321,10 +321,11 @@ DATA(insert ( 2745 1025 1025 1 381 )); DATA(insert ( 2745 1025 1025 2 2743 )); DATA(insert ( 2745 1025 1025 3 2774 )); DATA(insert ( 2745 1025 1025 4 2744 )); -DATA(insert ( 3659 3614 3614 1 360 )); +DATA(insert ( 3659 3614 3614 1 3724 )); DATA(insert ( 3659 3614 3614 2 3656 )); DATA(insert ( 3659 3614 3614 3 3657 )); DATA(insert ( 3659 3614 3614 4 3658 )); +DATA(insert ( 3659 3614 3614 5 2700 )); DATA(insert ( 3626 3614 3614 1 3622 )); DATA(insert ( 3683 3615 3615 1 3668 )); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index cc1d5db786b..21e094ad570 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.499 2008/05/15 00:17:40 tgl Exp $ + * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.500 2008/05/16 16:31:01 tgl Exp $ * * NOTES * The script catalog/genbki.sh reads this file and generates .bki @@ -4018,7 +4018,7 @@ DESCR("gin(internal)"); /* GIN array support */ DATA(insert OID = 2743 ( ginarrayextract PGNSP PGUID 12 1 0 f f t f i 2 2281 "2277 2281" _null_ _null_ _null_ ginarrayextract - _null_ _null_ )); DESCR("GIN array support"); -DATA(insert OID = 2774 ( ginqueryarrayextract PGNSP PGUID 12 1 0 f f t f i 3 2281 "2277 2281 21" _null_ _null_ _null_ ginqueryarrayextract - _null_ _null_ )); +DATA(insert OID = 2774 ( ginqueryarrayextract PGNSP PGUID 12 1 0 f f t f i 4 2281 "2277 2281 21 2281" _null_ _null_ _null_ ginqueryarrayextract - _null_ _null_ )); DESCR("GIN array support"); DATA(insert OID = 2744 ( ginarrayconsistent PGNSP PGUID 12 1 0 f f t f i 4 16 "2281 21 2281 2281" _null_ _null_ _null_ ginarrayconsistent - _null_ _null_ )); DESCR("GIN array support"); @@ -4253,10 +4253,14 @@ DESCR("GiST tsvector support"); DATA(insert OID = 3656 ( gin_extract_tsvector PGNSP PGUID 12 1 0 f f t f i 2 2281 "3614 2281" _null_ _null_ _null_ gin_extract_tsvector - _null_ _null_ )); DESCR("GIN tsvector support"); -DATA(insert OID = 3657 ( gin_extract_tsquery PGNSP PGUID 12 1 0 f f t f i 3 2281 "3615 2281 21" _null_ _null_ _null_ gin_extract_tsquery - _null_ _null_ )); +DATA(insert OID = 3657 ( gin_extract_tsquery PGNSP PGUID 12 1 0 f f t f i 4 2281 "3615 2281 21 2281" _null_ _null_ _null_ gin_extract_tsquery - _null_ _null_ )); DESCR("GIN tsvector support"); DATA(insert OID = 3658 ( gin_tsquery_consistent PGNSP PGUID 12 1 0 f f t f i 4 16 "2281 21 3615 2281" _null_ _null_ _null_ gin_tsquery_consistent - _null_ _null_ )); DESCR("GIN tsvector support"); +DATA(insert OID = 3724 ( gin_cmp_tslexeme PGNSP PGUID 12 1 0 f f t f i 2 23 "25 25" _null_ _null_ _null_ gin_cmp_tslexeme - _null_ _null_ )); +DESCR("GIN tsvector support"); +DATA(insert OID = 2700 ( gin_cmp_prefix PGNSP PGUID 12 1 0 f f t f i 3 23 "25 25 21" _null_ _null_ _null_ gin_cmp_prefix - _null_ _null_ )); +DESCR("GIN tsvector support"); DATA(insert OID = 3662 ( tsquery_lt PGNSP PGUID 12 1 0 f f t f i 2 16 "3615 3615" _null_ _null_ _null_ tsquery_lt - _null_ _null_ )); DESCR("less-than"); diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h index 8f21abf032c..d08d35db193 100644 --- a/src/include/tsearch/ts_public.h +++ b/src/include/tsearch/ts_public.h @@ -6,7 +6,7 @@ * * Copyright (c) 1998-2008, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.8 2008/01/01 19:45:59 momjian Exp $ + * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.9 2008/05/16 16:31:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -99,6 +99,7 @@ typedef struct } TSLexeme; #define TSL_ADDPOS 0x01 +#define TSL_PREFIX 0x02 /* * Struct for supporting complex dictionaries like thesaurus. diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h index 9f5e63e2273..42680408a91 100644 --- a/src/include/tsearch/ts_type.h +++ b/src/include/tsearch/ts_type.h @@ -5,7 +5,7 @@ * * Copyright (c) 1998-2008, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.10 2008/01/01 19:45:59 momjian Exp $ + * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.11 2008/05/16 16:31:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -179,6 +179,7 @@ typedef struct * bitmask of allowed weights. if it =0 then * any weight are allowed. Weights and bit * map: A: 1<<3 B: 1<<2 C: 1<<1 D: 1<<0 */ + bool prefix; /* true if it's a prefix search */ int32 valcrc; /* XXX: pg_crc32 would be a more appropriate * data type, but we use comparisons to signed * integers in the code. They would need to be diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h index 0085b64951a..6afd4c99f7a 100644 --- a/src/include/tsearch/ts_utils.h +++ b/src/include/tsearch/ts_utils.h @@ -5,7 +5,7 @@ * * Copyright (c) 1998-2008, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.14 2008/04/21 00:26:47 tgl Exp $ + * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.15 2008/05/16 16:31:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -42,9 +42,10 @@ typedef struct TSQueryParserStateData *TSQueryParserState; typedef void (*PushFunction) (Datum opaque, TSQueryParserState state, char *token, int tokenlen, - int2 tokenweights /* bitmap as described + int2 tokenweights, /* bitmap as described * in QueryOperand - struct */ ); + * struct */ + bool prefix); extern TSQuery parse_tsquery(char *buf, PushFunction pushval, @@ -52,7 +53,7 @@ extern TSQuery parse_tsquery(char *buf, /* Functions for use by PushFunction implementations */ extern void pushValue(TSQueryParserState state, - char *strval, int lenval, int2 weight); + char *strval, int lenval, int2 weight, bool prefix); extern void pushStop(TSQueryParserState state); extern void pushOperator(TSQueryParserState state, int8 operator); @@ -74,6 +75,7 @@ typedef struct */ uint16 *apos; } pos; + uint16 flags; /* currently, only TSL_PREFIX */ char *word; uint32 alen; } ParsedWord; @@ -110,6 +112,7 @@ extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot, * to_ts* - text transformation to tsvector, tsquery */ extern TSVector make_tsvector(ParsedText *prs); +extern int32 tsCompareString(char *a, int lena, char *b, int lenb, bool prefix); extern Datum to_tsvector_byid(PG_FUNCTION_ARGS); extern Datum to_tsvector(PG_FUNCTION_ARGS); @@ -142,6 +145,8 @@ extern Datum gtsvectorout(PG_FUNCTION_ARGS); */ extern Datum gin_extract_tsvector(PG_FUNCTION_ARGS); +extern Datum gin_cmp_tslexeme(PG_FUNCTION_ARGS); +extern Datum gin_cmp_prefix(PG_FUNCTION_ARGS); extern Datum gin_extract_tsquery(PG_FUNCTION_ARGS); extern Datum gin_tsquery_consistent(PG_FUNCTION_ARGS); diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index ef787115742..efab1354fe3 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -935,9 +935,11 @@ WHERE p1.amprocfamily = p3.oid AND p3.opfmethod = p2.oid AND -- Detect missing pg_amproc entries: should have as many support functions -- as AM expects for each datatype combination supported by the opfamily. +-- GIN is a special case because it has an optional support function. SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3 WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND + p1.amname <> 'gin' AND p1.amsupport != (SELECT count(*) FROM pg_amproc AS p4 WHERE p4.amprocfamily = p2.oid AND p4.amproclefttype = p3.amproclefttype AND @@ -946,18 +948,43 @@ WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND --------+---------+----------------+----------------- (0 rows) +-- Similar check for GIN, allowing one optional proc +SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype +FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3 +WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND + p1.amname = 'gin' AND + p1.amsupport - 1 > (SELECT count(*) FROM pg_amproc AS p4 + WHERE p4.amprocfamily = p2.oid AND + p4.amproclefttype = p3.amproclefttype AND + p4.amprocrighttype = p3.amprocrighttype); + amname | opfname | amproclefttype | amprocrighttype +--------+---------+----------------+----------------- +(0 rows) + -- Also, check if there are any pg_opclass entries that don't seem to have --- pg_amproc support. +-- pg_amproc support. Again, GIN has to be checked separately. SELECT amname, opcname, count(*) FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND amproclefttype = amprocrighttype AND amproclefttype = opcintype +WHERE am.amname <> 'gin' GROUP BY amname, amsupport, opcname, amprocfamily HAVING count(*) != amsupport OR amprocfamily IS NULL; amname | opcname | count --------+---------+------- (0 rows) +SELECT amname, opcname, count(*) +FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid + LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND + amproclefttype = amprocrighttype AND amproclefttype = opcintype +WHERE am.amname = 'gin' +GROUP BY amname, amsupport, opcname, amprocfamily +HAVING count(*) < amsupport - 1 OR amprocfamily IS NULL; + amname | opcname | count +--------+---------+------- +(0 rows) + -- Unfortunately, we can't check the amproc link very well because the -- signature of the function may be different for different support routines -- or different base data types. diff --git a/src/test/regress/expected/tsdicts.out b/src/test/regress/expected/tsdicts.out index 4b8929361a8..3ae6a671dad 100644 --- a/src/test/regress/expected/tsdicts.out +++ b/src/test/regress/expected/tsdicts.out @@ -232,7 +232,7 @@ ALTER TEXT SEARCH CONFIGURATION ispell_tst ALTER MAPPING FOR SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footballklubber from a foot'); to_tsvector ---------------------------------------------------------------------------------------------------- - 'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7 + 'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3 (1 row) SELECT to_tsquery('ispell_tst', 'footballklubber'); @@ -256,7 +256,7 @@ ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footballklubber from a foot'); to_tsvector ---------------------------------------------------------------------------------------------------- - 'sky':3 'ball':7 'book':1,5 'foot':7,10 'booking':1,5 'klubber':7 'football':7 'footballklubber':7 + 'ball':7 'book':1,5 'booking':1,5 'foot':7,10 'football':7 'footballklubber':7 'klubber':7 'sky':3 (1 row) SELECT to_tsquery('hunspell_tst', 'footballklubber'); @@ -287,7 +287,7 @@ SELECT to_tsvector('synonym_tst', 'Postgresql is often called as postgres or pgs SELECT to_tsvector('synonym_tst', 'Most common mistake is to write Gogle instead of Google'); to_tsvector ---------------------------------------------------------- - 'googl':7,10 'write':6 'common':2 'mistak':3 'instead':8 + 'common':2 'googl':7,10 'instead':8 'mistak':3 'write':6 (1 row) -- test thesaurus in configuration @@ -307,12 +307,12 @@ SELECT to_tsvector('thesaurus_tst', 'one postgres one two one two three one'); SELECT to_tsvector('thesaurus_tst', 'Supernovae star is very new star and usually called supernovae (abbrevation SN)'); to_tsvector ------------------------------------------------------------- - 'sn':1,9,11 'new':4 'call':8 'star':5 'usual':7 'abbrev':10 + 'abbrev':10 'call':8 'new':4 'sn':1,9,11 'star':5 'usual':7 (1 row) SELECT to_tsvector('thesaurus_tst', 'Booking tickets is looking like a booking a tickets'); to_tsvector ------------------------------------------------------- - 'card':3,10 'like':6 'look':5 'invit':2,9 'order':1,8 + 'card':3,10 'invit':2,9 'like':6 'look':5 'order':1,8 (1 row) diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index 1f7e6ffbafc..468a623e973 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -92,6 +92,12 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)'; 39 (1 row) +SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*'; + count +------- + 494 +(1 row) + create index wowidx on test_tsvector using gist (a); SET enable_seqscan=OFF; SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh'; @@ -130,6 +136,12 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)'; 39 (1 row) +SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*'; + count +------- + 494 +(1 row) + RESET enable_seqscan; DROP INDEX wowidx; CREATE INDEX wowidx ON test_tsvector USING gin (a); @@ -170,6 +182,12 @@ SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)'; 39 (1 row) +SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*'; + count +------- + 494 +(1 row) + RESET enable_seqscan; INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH'); @@ -380,7 +398,7 @@ SELECT to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc. <i <b> wow < jqw <> qwerty'); to_tsvector -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - 'ad':17 'dw':19 'jf':39 '234':61 '345':1 '4.2':54,55,56 '455':31 'jqw':64 'qwe':2,18,27,28,35 'wer':36 'wow':63 '-4.2':58,60 'asdf':37 'ewr1':43 'qwer':38 'sdjk':40 '5.005':32 'efd.r':3 'ewri2':44 'hjwer':42 'qwqwe':29 'wefjn':48 'gist.c':52 'gist.h':50 'qwerti':65 '234.435':30 'qwe-wer':34 'readlin':53,57,59 'www.com':4 '+4.0e-10':26 'gist.h.c':51 'rewt/ewr':47 '/?ad=qwe&dw':7,10,14,22 '/wqe-324/ewr':49 'aew.werc.ewr':6 '1aew.werc.ewr':9 '2aew.werc.ewr':11 '3aew.werc.ewr':13 '4aew.werc.ewr':15 '/usr/local/fff':45 '/awdf/dwqe/4325':46 'teodor@stack.net':33 '/?ad=qwe&dw=%20%32':25 '5aew.werc.ewr:8100':16 '6aew.werc.ewr:8100':21 '7aew.werc.ewr:8100':24 'aew.werc.ewr/?ad=qwe&dw':5 '1aew.werc.ewr/?ad=qwe&dw':8 '3aew.werc.ewr/?ad=qwe&dw':12 '6aew.werc.ewr:8100/?ad=qwe&dw':20 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':23 + '+4.0e-10':26 '-4.2':58,60 '/?ad=qwe&dw':7,10,14,22 '/?ad=qwe&dw=%20%32':25 '/awdf/dwqe/4325':46 '/usr/local/fff':45 '/wqe-324/ewr':49 '1aew.werc.ewr':9 '1aew.werc.ewr/?ad=qwe&dw':8 '234':61 '234.435':30 '2aew.werc.ewr':11 '345':1 '3aew.werc.ewr':13 '3aew.werc.ewr/?ad=qwe&dw':12 '4.2':54,55,56 '455':31 '4aew.werc.ewr':15 '5.005':32 '5aew.werc.ewr:8100':16 '6aew.werc.ewr:8100':21 '6aew.werc.ewr:8100/?ad=qwe&dw':20 '7aew.werc.ewr:8100':24 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':23 'ad':17 'aew.werc.ewr':6 'aew.werc.ewr/?ad=qwe&dw':5 'asdf':37 'dw':19 'efd.r':3 'ewr1':43 'ewri2':44 'gist.c':52 'gist.h':50 'gist.h.c':51 'hjwer':42 'jf':39 'jqw':64 'qwe':2,18,27,28,35 'qwe-wer':34 'qwer':38 'qwerti':65 'qwqwe':29 'readlin':53,57,59 'rewt/ewr':47 'sdjk':40 'teodor@stack.net':33 'wefjn':48 'wer':36 'wow':63 'www.com':4 (1 row) SELECT length(to_tsvector('english', '345 qwe@efd.r '' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf <fr>qwer jf sdjk<we hjwer <werrwe> ewr1> ewri2 <a href="qwe<qwe>"> @@ -852,7 +870,7 @@ SET default_text_search_config=simple; SELECT to_tsvector('SKIES My booKs'); to_tsvector ---------------------------- - 'my':2 'books':3 'skies':1 + 'books':3 'my':2 'skies':1 (1 row) SELECT plainto_tsquery('SKIES My booKs'); @@ -871,7 +889,7 @@ SET default_text_search_config=english; SELECT to_tsvector('SKIES My booKs'); to_tsvector ------------------ - 'sky':1 'book':3 + 'book':3 'sky':1 (1 row) SELECT plainto_tsquery('SKIES My booKs'); diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out index 4672f099e0a..6284fb61813 100644 --- a/src/test/regress/expected/tstypes.out +++ b/src/test/regress/expected/tstypes.out @@ -44,31 +44,31 @@ SELECT E'''1 \\''2'''::tsvector; SELECT E'''1 \\''2''3'::tsvector; tsvector ------------- - '3' '1 ''2' + '1 ''2' '3' (1 row) SELECT E'''1 \\''2'' 3'::tsvector; tsvector ------------- - '3' '1 ''2' + '1 ''2' '3' (1 row) SELECT E'''1 \\''2'' '' 3'' 4 '::tsvector; tsvector ------------------ - '4' ' 3' '1 ''2' + ' 3' '1 ''2' '4' (1 row) SELECT $$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector; tsvector ---------------------------------------- - '\\as' 'abc' 'AB\\c' 'ab\\c' 'ab\\\\c' + 'AB\\c' '\\as' 'ab\\\\c' 'ab\\c' 'abc' (1 row) SELECT tsvectorin(tsvectorout($$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector)); tsvectorin ---------------------------------------- - '\\as' 'abc' 'AB\\c' 'ab\\c' 'ab\\\\c' + 'AB\\c' '\\as' 'ab\\\\c' 'ab\\c' 'abc' (1 row) SELECT '''w'':4A,3B,2C,1D,5 a:8'; @@ -86,13 +86,13 @@ SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B'; SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c'); setweight ---------------------------------------------------------- - 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C + 'a':1C,3C 'asd':1C 'w':5C,6C,12C,13C 'zxc':81C,222C,567C (1 row) SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector); strip --------------- - 'a' 'w' 'asd' + 'a' 'asd' 'w' (1 row) --Base tsquery test @@ -336,6 +336,12 @@ SELECT $$'\\as'$$::tsquery; '\\as' (1 row) +SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery; + tsquery +------------------------------------------ + ( 'a':* & 'nbb':*AC | 'doo':*A ) | 'goo' +(1 row) + SELECT 'a' < 'b & c'::tsquery as "true"; true ------ @@ -439,12 +445,96 @@ SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB' as "true"; t (1 row) +SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*C' as "false"; + false +------- + f +(1 row) + +SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*CB' as "true"; + true +------ + t +(1 row) + +SELECT 'a b:89 ca:23A,64b cb:80c d:34c'::tsvector @@ 'd:AC & c:*C' as "true"; + true +------ + t +(1 row) + +SELECT 'a b:89 ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*C' as "true"; + true +------ + t +(1 row) + +SELECT 'a b:89 ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*B' as "true"; + true +------ + t +(1 row) + +SELECT 'supernova'::tsvector @@ 'super'::tsquery AS "false"; + false +------- + f +(1 row) + +SELECT 'supeanova supernova'::tsvector @@ 'super'::tsquery AS "false"; + false +------- + f +(1 row) + +SELECT 'supeznova supernova'::tsvector @@ 'super'::tsquery AS "false"; + false +------- + f +(1 row) + +SELECT 'supernova'::tsvector @@ 'super:*'::tsquery AS "true"; + true +------ + t +(1 row) + +SELECT 'supeanova supernova'::tsvector @@ 'super:*'::tsquery AS "true"; + true +------ + t +(1 row) + +SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true"; + true +------ + t +(1 row) + SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s'); ts_rank ----------- 0.0911891 (1 row) +SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s'); + ts_rank +----------- + 0.0303964 +(1 row) + +SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s:*'); + ts_rank +----------- + 0.0911891 +(1 row) + +SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | sa:*'); + ts_rank +----------- + 0.0911891 +(1 row) + SELECT ts_rank(' a:1 s:2B d g'::tsvector, 'a | s'); ts_rank ---------- @@ -481,6 +571,30 @@ SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a | s'); 0.3 (1 row) +SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s'); + ts_rank_cd +------------ + 0.1 +(1 row) + +SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s:*'); + ts_rank_cd +------------ + 0.3 +(1 row) + +SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | sa:*'); + ts_rank_cd +------------ + 0.3 +(1 row) + +SELECT ts_rank_cd(' a:1 sa:3C sab:2c d g'::tsvector, 'a | sa:*'); + ts_rank_cd +------------ + 0.5 +(1 row) + SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a | s'); ts_rank_cd ------------ diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql index deaa3714fef..84b2d800f78 100644 --- a/src/test/regress/sql/opr_sanity.sql +++ b/src/test/regress/sql/opr_sanity.sql @@ -746,25 +746,47 @@ WHERE p1.amprocfamily = p3.oid AND p3.opfmethod = p2.oid AND -- Detect missing pg_amproc entries: should have as many support functions -- as AM expects for each datatype combination supported by the opfamily. +-- GIN is a special case because it has an optional support function. SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3 WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND + p1.amname <> 'gin' AND p1.amsupport != (SELECT count(*) FROM pg_amproc AS p4 WHERE p4.amprocfamily = p2.oid AND p4.amproclefttype = p3.amproclefttype AND p4.amprocrighttype = p3.amprocrighttype); +-- Similar check for GIN, allowing one optional proc + +SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype +FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3 +WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND + p1.amname = 'gin' AND + p1.amsupport - 1 > (SELECT count(*) FROM pg_amproc AS p4 + WHERE p4.amprocfamily = p2.oid AND + p4.amproclefttype = p3.amproclefttype AND + p4.amprocrighttype = p3.amprocrighttype); + -- Also, check if there are any pg_opclass entries that don't seem to have --- pg_amproc support. +-- pg_amproc support. Again, GIN has to be checked separately. SELECT amname, opcname, count(*) FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND amproclefttype = amprocrighttype AND amproclefttype = opcintype +WHERE am.amname <> 'gin' GROUP BY amname, amsupport, opcname, amprocfamily HAVING count(*) != amsupport OR amprocfamily IS NULL; +SELECT amname, opcname, count(*) +FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid + LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND + amproclefttype = amprocrighttype AND amproclefttype = opcintype +WHERE am.amname = 'gin' +GROUP BY amname, amsupport, opcname, amprocfamily +HAVING count(*) < amsupport - 1 OR amprocfamily IS NULL; + -- Unfortunately, we can't check the amproc link very well because the -- signature of the function may be different for different support routines -- or different base data types. diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index 3cf34524d34..dc7427d3b1d 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -47,6 +47,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt'; SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt'; SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*'; create index wowidx on test_tsvector using gist (a); @@ -58,6 +59,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt'; SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt'; SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*'; RESET enable_seqscan; @@ -73,6 +75,7 @@ SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt'; SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt'; SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)'; SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'w:*|q:*'; RESET enable_seqscan; INSERT INTO test_tsvector VALUES ('???', 'DFG:1A,2B,6C,10 FGH'); diff --git a/src/test/regress/sql/tstypes.sql b/src/test/regress/sql/tstypes.sql index 49afc3b23a2..fd7c7024f5e 100644 --- a/src/test/regress/sql/tstypes.sql +++ b/src/test/regress/sql/tstypes.sql @@ -58,6 +58,7 @@ SELECT '1&(2&(4&(5|6)))'::tsquery; SELECT '1&(2&(4&(5|!6)))'::tsquery; SELECT E'1&(''2''&('' 4''&(\\|5 | ''6 \\'' !|&'')))'::tsquery; SELECT $$'\\as'$$::tsquery; +SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery; SELECT 'a' < 'b & c'::tsquery as "true"; SELECT 'a' > 'b & c'::tsquery as "false"; @@ -81,8 +82,23 @@ SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B' as "true"; SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A' as "true"; SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C' as "false"; SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB' as "true"; +SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*C' as "false"; +SELECT 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & c:*CB' as "true"; +SELECT 'a b:89 ca:23A,64b cb:80c d:34c'::tsvector @@ 'd:AC & c:*C' as "true"; +SELECT 'a b:89 ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*C' as "true"; +SELECT 'a b:89 ca:23A,64c cb:80b d:34c'::tsvector @@ 'd:AC & c:*B' as "true"; + +SELECT 'supernova'::tsvector @@ 'super'::tsquery AS "false"; +SELECT 'supeanova supernova'::tsvector @@ 'super'::tsquery AS "false"; +SELECT 'supeznova supernova'::tsvector @@ 'super'::tsquery AS "false"; +SELECT 'supernova'::tsvector @@ 'super:*'::tsquery AS "true"; +SELECT 'supeanova supernova'::tsvector @@ 'super:*'::tsquery AS "true"; +SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true"; SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s'); +SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s'); +SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s:*'); +SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | sa:*'); SELECT ts_rank(' a:1 s:2B d g'::tsvector, 'a | s'); SELECT ts_rank(' a:1 s:2 d g'::tsvector, 'a | s'); SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a & s'); @@ -90,6 +106,10 @@ SELECT ts_rank(' a:1 s:2B d g'::tsvector, 'a & s'); SELECT ts_rank(' a:1 s:2 d g'::tsvector, 'a & s'); SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a | s'); +SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s'); +SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | s:*'); +SELECT ts_rank_cd(' a:1 sa:2C d g'::tsvector, 'a | sa:*'); +SELECT ts_rank_cd(' a:1 sa:3C sab:2c d g'::tsvector, 'a | sa:*'); SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a | s'); SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a | s'); SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s'); |