diff options
Diffstat (limited to 'src/backend/utils/adt/tsquery_cleanup.c')
-rw-r--r-- | src/backend/utils/adt/tsquery_cleanup.c | 242 |
1 files changed, 4 insertions, 238 deletions
diff --git a/src/backend/utils/adt/tsquery_cleanup.c b/src/backend/utils/adt/tsquery_cleanup.c index 330664da639..c10c7ef0aa5 100644 --- a/src/backend/utils/adt/tsquery_cleanup.c +++ b/src/backend/utils/adt/tsquery_cleanup.c @@ -26,19 +26,6 @@ typedef struct NODE } NODE; /* - * To simplify walking on query tree and pushing down of phrase operator - * we define some fake priority here: phrase operator has highest priority - * of any other operators (and we believe here that OP_PHRASE is a highest - * code of operations) and value node has ever highest priority. - * Priority values of other operations don't matter until they are less than - * phrase operator and value node. - */ -#define VALUE_PRIORITY (OP_COUNT + 1) -#define NODE_PRIORITY(x) \ - ( ((x)->valnode->qoperator.type == QI_OPR) ? \ - (x)->valnode->qoperator.oper : VALUE_PRIORITY ) - -/* * make query tree from plain view of query */ static NODE * @@ -368,227 +355,6 @@ clean_stopword_intree(NODE *node, int *ladd, int *radd) return node; } -static NODE * -copyNODE(NODE *node) -{ - NODE *cnode = palloc(sizeof(NODE)); - - /* since this function recurses, it could be driven to stack overflow. */ - check_stack_depth(); - - cnode->valnode = palloc(sizeof(QueryItem)); - *(cnode->valnode) = *(node->valnode); - - if (node->valnode->type == QI_OPR) - { - cnode->right = copyNODE(node->right); - if (node->valnode->qoperator.oper != OP_NOT) - cnode->left = copyNODE(node->left); - } - - return cnode; -} - -static NODE * -makeNODE(int8 op, NODE *left, NODE *right) -{ - NODE *node = palloc(sizeof(NODE)); - - /* zeroing allocation to prevent difference in unused bytes */ - node->valnode = palloc0(sizeof(QueryItem)); - - node->valnode->qoperator.type = QI_OPR; - node->valnode->qoperator.oper = op; - - node->left = left; - node->right = right; - - return node; -} - -/* - * Move operation with high priority to the leaves. This guarantees - * that the phrase operator will be near the bottom of the tree. - * An idea behind is do not store position of lexemes during execution - * of ordinary operations (AND, OR, NOT) because it could be expensive. - * Actual transformation will be performed only on subtrees under the - * <-> (<n>) operation since it's needed solely for the phrase operator. - * - * Rules: - * a <-> (b | c) => (a <-> b) | (a <-> c) - * (a | b) <-> c => (a <-> c) | (b <-> c) - * a <-> !b => a & !(a <-> b) - * !a <-> b => b & !(a <-> b) - * - * Warnings for readers: - * a <-> b != b <-> a - * - * a <n> (b <n> c) != (a <n> b) <n> c since the phrase lengths are: - * n 2n-1 - */ -static NODE * -normalize_phrase_tree(NODE *node) -{ - /* there should be no stop words at this point */ - Assert(node->valnode->type != QI_VALSTOP); - - if (node->valnode->type == QI_VAL) - return node; - - /* since this function recurses, it could be driven to stack overflow. */ - check_stack_depth(); - - Assert(node->valnode->type == QI_OPR); - - if (node->valnode->qoperator.oper == OP_NOT) - { - NODE *orignode = node; - - /* eliminate NOT sequence */ - while (node->valnode->type == QI_OPR && - node->valnode->qoperator.oper == node->right->valnode->qoperator.oper) - { - node = node->right->right; - } - - if (orignode != node) - /* current node isn't checked yet */ - node = normalize_phrase_tree(node); - else - node->right = normalize_phrase_tree(node->right); - } - else if (node->valnode->qoperator.oper == OP_PHRASE) - { - int16 distance; - NODE *X; - - node->left = normalize_phrase_tree(node->left); - node->right = normalize_phrase_tree(node->right); - - /* - * if subtree contains only nodes with higher "priority" then we are - * done. See comment near NODE_PRIORITY() - */ - if (NODE_PRIORITY(node) <= NODE_PRIORITY(node->right) && - NODE_PRIORITY(node) <= NODE_PRIORITY(node->left)) - return node; - - /* - * We can't swap left-right and works only with left child because of - * a <-> b != b <-> a - */ - - distance = node->valnode->qoperator.distance; - - if (node->right->valnode->type == QI_OPR) - { - switch (node->right->valnode->qoperator.oper) - { - case OP_AND: - /* a <-> (b & c) => (a <-> b) & (a <-> c) */ - node = makeNODE(OP_AND, - makeNODE(OP_PHRASE, - node->left, - node->right->left), - makeNODE(OP_PHRASE, - copyNODE(node->left), - node->right->right)); - node->left->valnode->qoperator.distance = - node->right->valnode->qoperator.distance = distance; - break; - case OP_OR: - /* a <-> (b | c) => (a <-> b) | (a <-> c) */ - node = makeNODE(OP_OR, - makeNODE(OP_PHRASE, - node->left, - node->right->left), - makeNODE(OP_PHRASE, - copyNODE(node->left), - node->right->right)); - node->left->valnode->qoperator.distance = - node->right->valnode->qoperator.distance = distance; - break; - case OP_NOT: - /* a <-> !b => a & !(a <-> b) */ - X = node->right; - node->right = node->right->right; - X->right = node; - node = makeNODE(OP_AND, - copyNODE(node->left), - X); - break; - case OP_PHRASE: - /* no-op */ - break; - default: - elog(ERROR, "Wrong type of tsquery node: %d", - node->right->valnode->qoperator.oper); - } - } - - if (node->left->valnode->type == QI_OPR && - node->valnode->qoperator.oper == OP_PHRASE) - { - /* - * if the node is still OP_PHRASE, check the left subtree, - * otherwise the whole node will be transformed later. - */ - switch (node->left->valnode->qoperator.oper) - { - case OP_AND: - /* (a & b) <-> c => (a <-> c) & (b <-> c) */ - node = makeNODE(OP_AND, - makeNODE(OP_PHRASE, - node->left->left, - node->right), - makeNODE(OP_PHRASE, - node->left->right, - copyNODE(node->right))); - node->left->valnode->qoperator.distance = - node->right->valnode->qoperator.distance = distance; - break; - case OP_OR: - /* (a | b) <-> c => (a <-> c) | (b <-> c) */ - node = makeNODE(OP_OR, - makeNODE(OP_PHRASE, - node->left->left, - node->right), - makeNODE(OP_PHRASE, - node->left->right, - copyNODE(node->right))); - node->left->valnode->qoperator.distance = - node->right->valnode->qoperator.distance = distance; - break; - case OP_NOT: - /* !a <-> b => b & !(a <-> b) */ - X = node->left; - node->left = node->left->right; - X->right = node; - node = makeNODE(OP_AND, - X, - copyNODE(node->right)); - break; - case OP_PHRASE: - /* no-op */ - break; - default: - elog(ERROR, "Wrong type of tsquery node: %d", - node->left->valnode->qoperator.oper); - } - } - - /* continue transformation */ - node = normalize_phrase_tree(node); - } - else /* AND or OR */ - { - node->left = normalize_phrase_tree(node->left); - node->right = normalize_phrase_tree(node->right); - } - - return node; -} - /* * Number of elements in query tree */ @@ -613,8 +379,11 @@ calcstrlen(NODE *node) return size; } +/* + * Remove QI_VALSTOP (stopword) nodes from TSQuery. + */ TSQuery -cleanup_fakeval_and_phrase(TSQuery in) +cleanup_tsquery_stopwords(TSQuery in) { int32 len, lenstr, @@ -642,9 +411,6 @@ cleanup_fakeval_and_phrase(TSQuery in) return out; } - /* push OP_PHRASE nodes down */ - root = normalize_phrase_tree(root); - /* * Build TSQuery from plain view */ |