diff options
Diffstat (limited to 'src/backend/utils/adt')
-rw-r--r-- | src/backend/utils/adt/tsginidx.c | 28 | ||||
-rw-r--r-- | src/backend/utils/adt/tsgistidx.c | 17 | ||||
-rw-r--r-- | src/backend/utils/adt/tsquery.c | 25 | ||||
-rw-r--r-- | src/backend/utils/adt/tsquery_cleanup.c | 242 | ||||
-rw-r--r-- | src/backend/utils/adt/tsquery_op.c | 4 | ||||
-rw-r--r-- | src/backend/utils/adt/tsvector_op.c | 477 |
6 files changed, 404 insertions, 389 deletions
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c index efc111e379c..3e0a44459ac 100644 --- a/src/backend/utils/adt/tsginidx.c +++ b/src/backend/utils/adt/tsginidx.c @@ -212,7 +212,7 @@ checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data) * Evaluate tsquery boolean expression using ternary logic. */ static GinTernaryValue -TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem) +TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase) { GinTernaryValue val1, val2, @@ -230,7 +230,10 @@ TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem) switch (curitem->qoperator.oper) { case OP_NOT: - result = TS_execute_ternary(gcv, curitem + 1); + /* In phrase search, always return MAYBE since we lack positions */ + if (in_phrase) + return GIN_MAYBE; + result = TS_execute_ternary(gcv, curitem + 1, in_phrase); if (result == GIN_MAYBE) return result; return !result; @@ -238,17 +241,21 @@ TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem) case OP_PHRASE: /* - * GIN doesn't contain any information about positions, treat + * GIN doesn't contain any information about positions, so treat * OP_PHRASE as OP_AND with recheck requirement */ - *gcv->need_recheck = true; + *(gcv->need_recheck) = true; + /* Pass down in_phrase == true in case there's a NOT below */ + in_phrase = true; + /* FALL THRU */ case OP_AND: - val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left); + val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left, + in_phrase); if (val1 == GIN_FALSE) return GIN_FALSE; - val2 = TS_execute_ternary(gcv, curitem + 1); + val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase); if (val2 == GIN_FALSE) return GIN_FALSE; if (val1 == GIN_TRUE && val2 == GIN_TRUE) @@ -257,10 +264,11 @@ TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem) return GIN_MAYBE; case OP_OR: - val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left); + val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left, + in_phrase); if (val1 == GIN_TRUE) return GIN_TRUE; - val2 = TS_execute_ternary(gcv, curitem + 1); + val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase); if (val2 == GIN_TRUE) return GIN_TRUE; if (val1 == GIN_FALSE && val2 == GIN_FALSE) @@ -307,7 +315,7 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS) res = TS_execute(GETQUERY(query), &gcv, - TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_AS_AND, + TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS, checkcondition_gin); } @@ -343,7 +351,7 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS) gcv.map_item_operand = (int *) (extra_data[0]); gcv.need_recheck = &recheck; - res = TS_execute_ternary(&gcv, GETQUERY(query)); + res = TS_execute_ternary(&gcv, GETQUERY(query), false); if (res == GIN_TRUE && recheck) res = GIN_MAYBE; diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c index 6cdfb13f6d1..a4c2bb9cec4 100644 --- a/src/backend/utils/adt/tsgistidx.c +++ b/src/backend/utils/adt/tsgistidx.c @@ -359,12 +359,11 @@ gtsvector_consistent(PG_FUNCTION_ARGS) if (ISALLTRUE(key)) PG_RETURN_BOOL(true); - PG_RETURN_BOOL(TS_execute( - GETQUERY(query), + /* since signature is lossy, cannot specify CALC_NOT here */ + PG_RETURN_BOOL(TS_execute(GETQUERY(query), (void *) GETSIGN(key), - TS_EXEC_PHRASE_AS_AND, - checkcondition_bit - )); + TS_EXEC_PHRASE_NO_POS, + checkcondition_bit)); } else { /* only leaf pages */ @@ -372,12 +371,10 @@ gtsvector_consistent(PG_FUNCTION_ARGS) chkval.arrb = GETARR(key); chkval.arre = chkval.arrb + ARRNELEM(key); - PG_RETURN_BOOL(TS_execute( - GETQUERY(query), + PG_RETURN_BOOL(TS_execute(GETQUERY(query), (void *) &chkval, - TS_EXEC_PHRASE_AS_AND | TS_EXEC_CALC_NOT, - checkcondition_arr - )); + TS_EXEC_PHRASE_NO_POS | TS_EXEC_CALC_NOT, + checkcondition_arr)); } } diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c index 3d11a1c2080..f0bd52877f3 100644 --- a/src/backend/utils/adt/tsquery.c +++ b/src/backend/utils/adt/tsquery.c @@ -557,13 +557,11 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup) curitem->oper == OP_OR || curitem->oper == OP_PHRASE); - if (curitem->oper == OP_PHRASE) - *needcleanup = true; /* push OP_PHRASE down later */ - (*pos)++; /* process RIGHT argument */ findoprnd_recurse(ptr, pos, nnodes, needcleanup); + curitem->left = *pos - tmp; /* set LEFT arg's offset */ /* process LEFT argument */ @@ -574,8 +572,9 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup) /* - * Fills in the left-fields previously left unfilled. The input - * QueryItems must be in polish (prefix) notation. + * Fill in the left-fields previously left unfilled. + * The input QueryItems must be in polish (prefix) notation. + * Also, set *needcleanup to true if there are any QI_VALSTOP nodes. */ static void findoprnd(QueryItem *ptr, int size, bool *needcleanup) @@ -687,15 +686,17 @@ parse_tsquery(char *buf, memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen); pfree(state.op); - /* Set left operand pointers for every operator. */ + /* + * Set left operand pointers for every operator. While we're at it, + * detect whether there are any QI_VALSTOP nodes. + */ findoprnd(ptr, query->size, &needcleanup); /* - * QI_VALSTOP nodes should be cleaned and OP_PHRASE should be pushed - * down + * If there are QI_VALSTOP nodes, delete them and simplify the tree. */ if (needcleanup) - return cleanup_fakeval_and_phrase(query); + query = cleanup_tsquery_stopwords(query); return query; } @@ -1088,6 +1089,9 @@ tsqueryrecv(PG_FUNCTION_ARGS) */ findoprnd(item, size, &needcleanup); + /* Can't have found any QI_VALSTOP nodes */ + Assert(!needcleanup); + /* Copy operands to output struct */ for (i = 0; i < size; i++) { @@ -1105,9 +1109,6 @@ tsqueryrecv(PG_FUNCTION_ARGS) SET_VARSIZE(query, len + datalen); - if (needcleanup) - PG_RETURN_TSQUERY(cleanup_fakeval_and_phrase(query)); - PG_RETURN_TSQUERY(query); } diff --git a/src/backend/utils/adt/tsquery_cleanup.c b/src/backend/utils/adt/tsquery_cleanup.c index 330664da639..c10c7ef0aa5 100644 --- a/src/backend/utils/adt/tsquery_cleanup.c +++ b/src/backend/utils/adt/tsquery_cleanup.c @@ -26,19 +26,6 @@ typedef struct NODE } NODE; /* - * To simplify walking on query tree and pushing down of phrase operator - * we define some fake priority here: phrase operator has highest priority - * of any other operators (and we believe here that OP_PHRASE is a highest - * code of operations) and value node has ever highest priority. - * Priority values of other operations don't matter until they are less than - * phrase operator and value node. - */ -#define VALUE_PRIORITY (OP_COUNT + 1) -#define NODE_PRIORITY(x) \ - ( ((x)->valnode->qoperator.type == QI_OPR) ? \ - (x)->valnode->qoperator.oper : VALUE_PRIORITY ) - -/* * make query tree from plain view of query */ static NODE * @@ -368,227 +355,6 @@ clean_stopword_intree(NODE *node, int *ladd, int *radd) return node; } -static NODE * -copyNODE(NODE *node) -{ - NODE *cnode = palloc(sizeof(NODE)); - - /* since this function recurses, it could be driven to stack overflow. */ - check_stack_depth(); - - cnode->valnode = palloc(sizeof(QueryItem)); - *(cnode->valnode) = *(node->valnode); - - if (node->valnode->type == QI_OPR) - { - cnode->right = copyNODE(node->right); - if (node->valnode->qoperator.oper != OP_NOT) - cnode->left = copyNODE(node->left); - } - - return cnode; -} - -static NODE * -makeNODE(int8 op, NODE *left, NODE *right) -{ - NODE *node = palloc(sizeof(NODE)); - - /* zeroing allocation to prevent difference in unused bytes */ - node->valnode = palloc0(sizeof(QueryItem)); - - node->valnode->qoperator.type = QI_OPR; - node->valnode->qoperator.oper = op; - - node->left = left; - node->right = right; - - return node; -} - -/* - * Move operation with high priority to the leaves. This guarantees - * that the phrase operator will be near the bottom of the tree. - * An idea behind is do not store position of lexemes during execution - * of ordinary operations (AND, OR, NOT) because it could be expensive. - * Actual transformation will be performed only on subtrees under the - * <-> (<n>) operation since it's needed solely for the phrase operator. - * - * Rules: - * a <-> (b | c) => (a <-> b) | (a <-> c) - * (a | b) <-> c => (a <-> c) | (b <-> c) - * a <-> !b => a & !(a <-> b) - * !a <-> b => b & !(a <-> b) - * - * Warnings for readers: - * a <-> b != b <-> a - * - * a <n> (b <n> c) != (a <n> b) <n> c since the phrase lengths are: - * n 2n-1 - */ -static NODE * -normalize_phrase_tree(NODE *node) -{ - /* there should be no stop words at this point */ - Assert(node->valnode->type != QI_VALSTOP); - - if (node->valnode->type == QI_VAL) - return node; - - /* since this function recurses, it could be driven to stack overflow. */ - check_stack_depth(); - - Assert(node->valnode->type == QI_OPR); - - if (node->valnode->qoperator.oper == OP_NOT) - { - NODE *orignode = node; - - /* eliminate NOT sequence */ - while (node->valnode->type == QI_OPR && - node->valnode->qoperator.oper == node->right->valnode->qoperator.oper) - { - node = node->right->right; - } - - if (orignode != node) - /* current node isn't checked yet */ - node = normalize_phrase_tree(node); - else - node->right = normalize_phrase_tree(node->right); - } - else if (node->valnode->qoperator.oper == OP_PHRASE) - { - int16 distance; - NODE *X; - - node->left = normalize_phrase_tree(node->left); - node->right = normalize_phrase_tree(node->right); - - /* - * if subtree contains only nodes with higher "priority" then we are - * done. See comment near NODE_PRIORITY() - */ - if (NODE_PRIORITY(node) <= NODE_PRIORITY(node->right) && - NODE_PRIORITY(node) <= NODE_PRIORITY(node->left)) - return node; - - /* - * We can't swap left-right and works only with left child because of - * a <-> b != b <-> a - */ - - distance = node->valnode->qoperator.distance; - - if (node->right->valnode->type == QI_OPR) - { - switch (node->right->valnode->qoperator.oper) - { - case OP_AND: - /* a <-> (b & c) => (a <-> b) & (a <-> c) */ - node = makeNODE(OP_AND, - makeNODE(OP_PHRASE, - node->left, - node->right->left), - makeNODE(OP_PHRASE, - copyNODE(node->left), - node->right->right)); - node->left->valnode->qoperator.distance = - node->right->valnode->qoperator.distance = distance; - break; - case OP_OR: - /* a <-> (b | c) => (a <-> b) | (a <-> c) */ - node = makeNODE(OP_OR, - makeNODE(OP_PHRASE, - node->left, - node->right->left), - makeNODE(OP_PHRASE, - copyNODE(node->left), - node->right->right)); - node->left->valnode->qoperator.distance = - node->right->valnode->qoperator.distance = distance; - break; - case OP_NOT: - /* a <-> !b => a & !(a <-> b) */ - X = node->right; - node->right = node->right->right; - X->right = node; - node = makeNODE(OP_AND, - copyNODE(node->left), - X); - break; - case OP_PHRASE: - /* no-op */ - break; - default: - elog(ERROR, "Wrong type of tsquery node: %d", - node->right->valnode->qoperator.oper); - } - } - - if (node->left->valnode->type == QI_OPR && - node->valnode->qoperator.oper == OP_PHRASE) - { - /* - * if the node is still OP_PHRASE, check the left subtree, - * otherwise the whole node will be transformed later. - */ - switch (node->left->valnode->qoperator.oper) - { - case OP_AND: - /* (a & b) <-> c => (a <-> c) & (b <-> c) */ - node = makeNODE(OP_AND, - makeNODE(OP_PHRASE, - node->left->left, - node->right), - makeNODE(OP_PHRASE, - node->left->right, - copyNODE(node->right))); - node->left->valnode->qoperator.distance = - node->right->valnode->qoperator.distance = distance; - break; - case OP_OR: - /* (a | b) <-> c => (a <-> c) | (b <-> c) */ - node = makeNODE(OP_OR, - makeNODE(OP_PHRASE, - node->left->left, - node->right), - makeNODE(OP_PHRASE, - node->left->right, - copyNODE(node->right))); - node->left->valnode->qoperator.distance = - node->right->valnode->qoperator.distance = distance; - break; - case OP_NOT: - /* !a <-> b => b & !(a <-> b) */ - X = node->left; - node->left = node->left->right; - X->right = node; - node = makeNODE(OP_AND, - X, - copyNODE(node->right)); - break; - case OP_PHRASE: - /* no-op */ - break; - default: - elog(ERROR, "Wrong type of tsquery node: %d", - node->left->valnode->qoperator.oper); - } - } - - /* continue transformation */ - node = normalize_phrase_tree(node); - } - else /* AND or OR */ - { - node->left = normalize_phrase_tree(node->left); - node->right = normalize_phrase_tree(node->right); - } - - return node; -} - /* * Number of elements in query tree */ @@ -613,8 +379,11 @@ calcstrlen(NODE *node) return size; } +/* + * Remove QI_VALSTOP (stopword) nodes from TSQuery. + */ TSQuery -cleanup_fakeval_and_phrase(TSQuery in) +cleanup_tsquery_stopwords(TSQuery in) { int32 len, lenstr, @@ -642,9 +411,6 @@ cleanup_fakeval_and_phrase(TSQuery in) return out; } - /* push OP_PHRASE nodes down */ - root = normalize_phrase_tree(root); - /* * Build TSQuery from plain view */ diff --git a/src/backend/utils/adt/tsquery_op.c b/src/backend/utils/adt/tsquery_op.c index a574b4b2573..8f90ce99e0d 100644 --- a/src/backend/utils/adt/tsquery_op.c +++ b/src/backend/utils/adt/tsquery_op.c @@ -104,7 +104,7 @@ tsquery_or(PG_FUNCTION_ARGS) PG_FREE_IF_COPY(a, 0); PG_FREE_IF_COPY(b, 1); - PG_RETURN_POINTER(query); + PG_RETURN_TSQUERY(query); } Datum @@ -140,7 +140,7 @@ tsquery_phrase_distance(PG_FUNCTION_ARGS) PG_FREE_IF_COPY(a, 0); PG_FREE_IF_COPY(b, 1); - PG_RETURN_POINTER(cleanup_fakeval_and_phrase(query)); + PG_RETURN_TSQUERY(query); } Datum diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index 36cc10c9017..01c721f835e 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -11,9 +11,10 @@ * *------------------------------------------------------------------------- */ - #include "postgres.h" +#include <limits.h> + #include "access/htup_details.h" #include "catalog/namespace.h" #include "catalog/pg_type.h" @@ -1405,147 +1406,394 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data) } /* + * Compute output position list for a tsquery operator in phrase mode. + * + * Merge the position lists in Ldata and Rdata as specified by "emit", + * returning the result list into *data. The input position lists must be + * sorted and unique, and the output will be as well. + * + * data: pointer to initially-all-zeroes output struct, or NULL + * Ldata, Rdata: input position lists + * emit: bitmask of TSPO_XXX flags + * Loffset: offset to be added to Ldata positions before comparing/outputting + * Roffset: offset to be added to Rdata positions before comparing/outputting + * max_npos: maximum possible required size of output position array + * + * Loffset and Roffset should not be negative, else we risk trying to output + * negative positions, which won't fit into WordEntryPos. + * + * Returns true if any positions were emitted to *data; or if data is NULL, + * returns true if any positions would have been emitted. + */ +#define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */ +#define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */ +#define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */ + +static bool +TS_phrase_output(ExecPhraseData *data, + ExecPhraseData *Ldata, + ExecPhraseData *Rdata, + int emit, + int Loffset, + int Roffset, + int max_npos) +{ + int Lindex, + Rindex; + + /* Loop until both inputs are exhausted */ + Lindex = Rindex = 0; + while (Lindex < Ldata->npos || Rindex < Rdata->npos) + { + int Lpos, + Rpos; + int output_pos = 0; + + /* + * Fetch current values to compare. WEP_GETPOS() is needed because + * ExecPhraseData->data can point to a tsvector's WordEntryPosVector. + */ + if (Lindex < Ldata->npos) + Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset; + else + { + /* L array exhausted, so we're done if R_ONLY isn't set */ + if (!(emit & TSPO_R_ONLY)) + break; + Lpos = INT_MAX; + } + if (Rindex < Rdata->npos) + Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset; + else + { + /* R array exhausted, so we're done if L_ONLY isn't set */ + if (!(emit & TSPO_L_ONLY)) + break; + Rpos = INT_MAX; + } + + /* Merge-join the two input lists */ + if (Lpos < Rpos) + { + /* Lpos is not matched in Rdata, should we output it? */ + if (emit & TSPO_L_ONLY) + output_pos = Lpos; + Lindex++; + } + else if (Lpos == Rpos) + { + /* Lpos and Rpos match ... should we output it? */ + if (emit & TSPO_BOTH) + output_pos = Rpos; + Lindex++; + Rindex++; + } + else /* Lpos > Rpos */ + { + /* Rpos is not matched in Ldata, should we output it? */ + if (emit & TSPO_R_ONLY) + output_pos = Rpos; + Rindex++; + } + + if (output_pos > 0) + { + if (data) + { + /* Store position, first allocating output array if needed */ + if (data->pos == NULL) + { + data->pos = (WordEntryPos *) + palloc(max_npos * sizeof(WordEntryPos)); + data->allocated = true; + } + data->pos[data->npos++] = output_pos; + } + else + { + /* + * Exact positions not needed, so return true as soon as we + * know there is at least one. + */ + return true; + } + } + } + + if (data && data->npos > 0) + { + /* Let's assert we didn't overrun the array */ + Assert(data->npos <= max_npos); + return true; + } + return false; +} + +/* * Execute tsquery at or below an OP_PHRASE operator. * - * This handles the recursion at levels where we need to care about - * match locations. In addition to the same arguments used for TS_execute, - * the caller may pass a preinitialized-to-zeroes ExecPhraseData struct to - * be filled with lexeme match positions on success. data == NULL if no - * match data need be returned. (In practice, outside callers pass NULL, - * and only the internal recursion cases pass a data pointer.) + * This handles tsquery execution at recursion levels where we need to care + * about match locations. + * + * In addition to the same arguments used for TS_execute, the caller may pass + * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme + * match position info on success. data == NULL if no position data need be + * returned. (In practice, outside callers pass NULL, and only the internal + * recursion cases pass a data pointer.) + * Note: the function assumes data != NULL for operators other than OP_PHRASE. + * This is OK because an outside call always starts from an OP_PHRASE node. + * + * The detailed semantics of the match data, given that the function returned + * "true" (successful match, or possible match), are: + * + * npos > 0, negate = false: + * query is matched at specified position(s) (and only those positions) + * npos > 0, negate = true: + * query is matched at all positions *except* specified position(s) + * npos = 0, negate = false: + * query is possibly matched, matching position(s) are unknown + * (this should only be returned when TS_EXEC_PHRASE_NO_POS flag is set) + * npos = 0, negate = true: + * query is matched at all positions + * + * Successful matches also return a "width" value which is the match width in + * lexemes, less one. Hence, "width" is zero for simple one-lexeme matches, + * and is the sum of the phrase operator distances for phrase matches. Note + * that when width > 0, the listed positions represent the ends of matches not + * the starts. (This unintuitive rule is needed to avoid possibly generating + * negative positions, which wouldn't fit into the WordEntryPos arrays.) + * + * When the function returns "false" (no match), it must return npos = 0, + * negate = false (which is the state initialized by the caller); but the + * "width" output in such cases is undefined. */ static bool TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags, - ExecPhraseData *data, - TSExecuteCallback chkcond) + TSExecuteCallback chkcond, + ExecPhraseData *data) { + ExecPhraseData Ldata, + Rdata; + bool lmatch, + rmatch; + int Loffset, + Roffset, + maxwidth; + /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); if (curitem->type == QI_VAL) - { return chkcond(arg, (QueryOperand *) curitem, data); - } - else + + switch (curitem->qoperator.oper) { - ExecPhraseData Ldata = {0, false, NULL}, - Rdata = {0, false, NULL}; - WordEntryPos *Lpos, - *LposStart, - *Rpos, - *pos_iter = NULL; + case OP_NOT: - Assert(curitem->qoperator.oper == OP_PHRASE); + /* + * Because a "true" result with no specific positions is taken as + * uncertain, we need no special care here for !TS_EXEC_CALC_NOT. + * If it's a false positive, the right things happen anyway. + * + * Also, we need not touch data->width, since a NOT operation does + * not change the match width. + */ + if (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data)) + { + if (data->npos > 0) + { + /* we have some positions, invert negate flag */ + data->negate = !data->negate; + return true; + } + else if (data->negate) + { + /* change "match everywhere" to "match nowhere" */ + data->negate = false; + return false; + } + /* match positions are, and remain, uncertain */ + return true; + } + else + { + /* change "match nowhere" to "match everywhere" */ + Assert(data->npos == 0 && !data->negate); + data->negate = true; + return true; + } - if (!TS_phrase_execute(curitem + curitem->qoperator.left, - arg, flags, &Ldata, chkcond)) - return false; + case OP_PHRASE: + case OP_AND: + memset(&Ldata, 0, sizeof(Ldata)); + memset(&Rdata, 0, sizeof(Rdata)); - if (!TS_phrase_execute(curitem + 1, arg, flags, &Rdata, chkcond)) - return false; + if (!TS_phrase_execute(curitem + curitem->qoperator.left, + arg, flags, chkcond, &Ldata)) + return false; - /* - * If either operand has no position information, then we normally - * return false. But if TS_EXEC_PHRASE_AS_AND flag is set then we - * return true, treating OP_PHRASE as if it were OP_AND. - */ - if (Ldata.npos == 0 || Rdata.npos == 0) - return (flags & TS_EXEC_PHRASE_AS_AND) ? true : false; + if (!TS_phrase_execute(curitem + 1, + arg, flags, chkcond, &Rdata)) + return false; - /* - * Prepare output position array if needed. - */ - if (data) - { /* - * We can recycle the righthand operand's result array if it was - * palloc'd, else must allocate our own. The number of matches - * couldn't be more than the smaller of the two operands' matches. + * If either operand has no position information, then we can't + * return position data, only a "possible match" result. "Possible + * match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag + * is set, otherwise return false. */ - if (!Rdata.allocated) - data->pos = palloc(sizeof(WordEntryPos) * Min(Ldata.npos, Rdata.npos)); - else - data->pos = Rdata.pos; + if ((Ldata.npos == 0 && !Ldata.negate) || + (Rdata.npos == 0 && !Rdata.negate)) + return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false; - data->allocated = true; - data->npos = 0; - pos_iter = data->pos; - } + if (curitem->qoperator.oper == OP_PHRASE) + { + /* + * Compute Loffset and Roffset suitable for phrase match, and + * compute overall width of whole phrase match. + */ + Loffset = curitem->qoperator.distance + Rdata.width; + Roffset = 0; + if (data) + data->width = curitem->qoperator.distance + + Ldata.width + Rdata.width; + } + else + { + /* + * For OP_AND, set output width and alignment like OP_OR (see + * comment below) + */ + maxwidth = Max(Ldata.width, Rdata.width); + Loffset = maxwidth - Ldata.width; + Roffset = maxwidth - Rdata.width; + if (data) + data->width = maxwidth; + } - /* - * Find matches by distance. WEP_GETPOS() is needed because - * ExecPhraseData->data can point to a tsvector's WordEntryPosVector. - * - * Note that the output positions are those of the matching RIGHT - * operands. - */ - Rpos = Rdata.pos; - LposStart = Ldata.pos; - while (Rpos < Rdata.pos + Rdata.npos) - { - /* - * We need to check all possible distances, so reset Lpos to - * guaranteed not yet satisfied position. - */ - Lpos = LposStart; - while (Lpos < Ldata.pos + Ldata.npos) + if (Ldata.negate && Rdata.negate) { - if (WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) == - curitem->qoperator.distance) - { - /* MATCH! */ - if (data) - { - /* Store position for upper phrase operator */ - *pos_iter = WEP_GETPOS(*Rpos); - pos_iter++; - - /* - * Set left start position to next, because current - * one could not satisfy distance for any other right - * position - */ - LposStart = Lpos + 1; - break; - } - else - { - /* - * We are at the root of the phrase tree and hence we - * don't have to identify all the match positions. - * Just report success. - */ - return true; - } + /* !L & !R: treat as !(L | R) */ + (void) TS_phrase_output(data, &Ldata, &Rdata, + TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY, + Loffset, Roffset, + Ldata.npos + Rdata.npos); + if (data) + data->negate = true; + return true; + } + else if (Ldata.negate) + { + /* !L & R */ + return TS_phrase_output(data, &Ldata, &Rdata, + TSPO_R_ONLY, + Loffset, Roffset, + Rdata.npos); + } + else if (Rdata.negate) + { + /* L & !R */ + return TS_phrase_output(data, &Ldata, &Rdata, + TSPO_L_ONLY, + Loffset, Roffset, + Ldata.npos); + } + else + { + /* straight AND */ + return TS_phrase_output(data, &Ldata, &Rdata, + TSPO_BOTH, + Loffset, Roffset, + Min(Ldata.npos, Rdata.npos)); + } - } - else if (WEP_GETPOS(*Rpos) <= WEP_GETPOS(*Lpos) || - WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) < - curitem->qoperator.distance) - { - /* - * Go to the next Rpos, because Lpos is ahead or on less - * distance than required by current operator - */ - break; + case OP_OR: + memset(&Ldata, 0, sizeof(Ldata)); + memset(&Rdata, 0, sizeof(Rdata)); - } + lmatch = TS_phrase_execute(curitem + curitem->qoperator.left, + arg, flags, chkcond, &Ldata); + rmatch = TS_phrase_execute(curitem + 1, + arg, flags, chkcond, &Rdata); - Lpos++; - } + if (!lmatch && !rmatch) + return false; - Rpos++; - } + /* + * If a valid operand has no position information, then we can't + * return position data, only a "possible match" result. "Possible + * match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag + * is set, otherwise return false. + */ + if ((lmatch && Ldata.npos == 0 && !Ldata.negate) || + (rmatch && Rdata.npos == 0 && !Rdata.negate)) + return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false; - if (data) - { - data->npos = pos_iter - data->pos; + /* + * Cope with undefined output width from failed submatch. (This + * takes less code than trying to ensure that all failure returns + * set data->width to zero.) + */ + if (!lmatch) + Ldata.width = 0; + if (!rmatch) + Rdata.width = 0; - if (data->npos > 0) + /* + * For OP_AND and OP_OR, report the width of the wider of the two + * inputs, and align the narrower input's positions to the right + * end of that width. This rule deals at least somewhat + * reasonably with cases like "x <-> (y | z <-> q)". + */ + maxwidth = Max(Ldata.width, Rdata.width); + Loffset = maxwidth - Ldata.width; + Roffset = maxwidth - Rdata.width; + data->width = maxwidth; + + if (Ldata.negate && Rdata.negate) + { + /* !L | !R: treat as !(L & R) */ + (void) TS_phrase_output(data, &Ldata, &Rdata, + TSPO_BOTH, + Loffset, Roffset, + Min(Ldata.npos, Rdata.npos)); + data->negate = true; return true; - } + } + else if (Ldata.negate) + { + /* !L | R: treat as !(L & !R) */ + (void) TS_phrase_output(data, &Ldata, &Rdata, + TSPO_L_ONLY, + Loffset, Roffset, + Ldata.npos); + data->negate = true; + return true; + } + else if (Rdata.negate) + { + /* L | !R: treat as !(!L & R) */ + (void) TS_phrase_output(data, &Ldata, &Rdata, + TSPO_R_ONLY, + Loffset, Roffset, + Rdata.npos); + data->negate = true; + return true; + } + else + { + /* straight OR */ + return TS_phrase_output(data, &Ldata, &Rdata, + TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY, + Loffset, Roffset, + Ldata.npos + Rdata.npos); + } + + default: + elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); } + /* not reachable, but keep compiler quiet */ return false; } @@ -1594,12 +1842,7 @@ TS_execute(QueryItem *curitem, void *arg, uint32 flags, return TS_execute(curitem + 1, arg, flags, chkcond); case OP_PHRASE: - - /* - * do not check TS_EXEC_PHRASE_AS_AND here because chkcond() could - * do something more if it's called from TS_phrase_execute() - */ - return TS_phrase_execute(curitem, arg, flags, NULL, chkcond); + return TS_phrase_execute(curitem, arg, flags, chkcond, NULL); default: elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper); |