aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/tsquery.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/tsquery.c')
-rw-r--r--src/backend/utils/adt/tsquery.c767
1 files changed, 767 insertions, 0 deletions
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
new file mode 100644
index 00000000000..1f8abb3298e
--- /dev/null
+++ b/src/backend/utils/adt/tsquery.c
@@ -0,0 +1,767 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsquery.c
+ * I/O functions for tsquery
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "libpq/pqformat.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_utils.h"
+#include "utils/memutils.h"
+#include "utils/pg_crc.h"
+
+/* parser's states */
+#define WAITOPERAND 1
+#define WAITOPERATOR 2
+#define WAITFIRSTOPERAND 3
+#define WAITSINGLEOPERAND 4
+
+/*
+ * node of query tree, also used
+ * for storing polish notation in parser
+ */
+typedef struct ParseQueryNode
+{
+ int2 weight;
+ int2 type;
+ int4 val;
+ int2 distance;
+ int2 length;
+ struct ParseQueryNode *next;
+} ParseQueryNode;
+
+static char *
+get_weight(char *buf, int2 *weight)
+{
+ *weight = 0;
+
+ if (!t_iseq(buf, ':'))
+ return buf;
+
+ buf++;
+ while (*buf && pg_mblen(buf) == 1)
+ {
+ switch (*buf)
+ {
+ case 'a':
+ case 'A':
+ *weight |= 1 << 3;
+ break;
+ case 'b':
+ case 'B':
+ *weight |= 1 << 2;
+ break;
+ case 'c':
+ case 'C':
+ *weight |= 1 << 1;
+ break;
+ case 'd':
+ case 'D':
+ *weight |= 1;
+ break;
+ default:
+ return buf;
+ }
+ buf++;
+ }
+
+ return buf;
+}
+
+/*
+ * get token from query string
+ */
+static int4
+gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+{
+ while (1)
+ {
+ switch (state->state)
+ {
+ case WAITFIRSTOPERAND:
+ case WAITOPERAND:
+ if (t_iseq(state->buf, '!'))
+ {
+ (state->buf)++; /* can safely ++, t_iseq guarantee
+ * that pg_mblen()==1 */
+ *val = (int4) '!';
+ state->state = WAITOPERAND;
+ return OPR;
+ }
+ else if (t_iseq(state->buf, '('))
+ {
+ state->count++;
+ (state->buf)++;
+ state->state = WAITOPERAND;
+ return OPEN;
+ }
+ else if (t_iseq(state->buf, ':'))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error at start of operand in tsearch query: \"%s\"",
+ state->buffer)));
+ }
+ else if (!t_isspace(state->buf))
+ {
+ state->valstate.prsbuf = state->buf;
+ if (gettoken_tsvector(&(state->valstate)))
+ {
+ *strval = state->valstate.word;
+ *lenval = state->valstate.curpos - state->valstate.word;
+ state->buf = get_weight(state->valstate.prsbuf, weight);
+ state->state = WAITOPERATOR;
+ return VAL;
+ }
+ else if (state->state == WAITFIRSTOPERAND)
+ return END;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("no operand in tsearch query: \"%s\"",
+ state->buffer)));
+ }
+ break;
+ case WAITOPERATOR:
+ if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))
+ {
+ state->state = WAITOPERAND;
+ *val = (int4) *(state->buf);
+ (state->buf)++;
+ return OPR;
+ }
+ else if (t_iseq(state->buf, ')'))
+ {
+ (state->buf)++;
+ state->count--;
+ return (state->count < 0) ? ERR : CLOSE;
+ }
+ else if (*(state->buf) == '\0')
+ return (state->count) ? ERR : END;
+ else if (!t_isspace(state->buf))
+ return ERR;
+ break;
+ case WAITSINGLEOPERAND:
+ if (*(state->buf) == '\0')
+ return END;
+ *strval = state->buf;
+ *lenval = strlen(state->buf);
+ state->buf += strlen(state->buf);
+ state->count++;
+ return VAL;
+ default:
+ return ERR;
+ break;
+ }
+ state->buf += pg_mblen(state->buf);
+ }
+ return END;
+}
+
+/*
+ * push new one in polish notation reverse view
+ */
+void
+pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+{
+ ParseQueryNode *tmp = (ParseQueryNode *) palloc(sizeof(ParseQueryNode));
+
+ tmp->weight = weight;
+ tmp->type = type;
+ tmp->val = val;
+ if (distance >= MAXSTRPOS)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("value is too big in tsearch query: \"%s\"",
+ state->buffer)));
+ if (lenval >= MAXSTRLEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("operand is too long in tsearch query: \"%s\"",
+ state->buffer)));
+ tmp->distance = distance;
+ tmp->length = lenval;
+ tmp->next = state->str;
+ state->str = tmp;
+ state->num++;
+}
+
+/*
+ * This function is used for tsquery parsing
+ */
+void
+pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int2 weight)
+{
+ pg_crc32 c;
+
+ if (lenval >= MAXSTRLEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("word is too long in tsearch query: \"%s\"",
+ state->buffer)));
+
+ INIT_CRC32(c);
+ COMP_CRC32(c, strval, lenval);
+ FIN_CRC32(c);
+ pushquery(state, type, *(int4 *) &c,
+ state->curop - state->op, lenval, weight);
+
+ while (state->curop - state->op + lenval + 1 >= state->lenop)
+ {
+ int4 tmp = state->curop - state->op;
+
+ state->lenop *= 2;
+ state->op = (char *) repalloc((void *) state->op, state->lenop);
+ state->curop = state->op + tmp;
+ }
+ memcpy((void *) state->curop, (void *) strval, lenval);
+ state->curop += lenval;
+ *(state->curop) = '\0';
+ state->curop++;
+ state->sumlen += lenval + 1 /* \0 */ ;
+ return;
+}
+
+#define STACKDEPTH 32
+/*
+ * make polish notation of query
+ */
+static int4
+makepol(TSQueryParserState * state, void (*pushval) (TSQueryParserState *, int, char *, int, int2))
+{
+ int4 val = 0,
+ type;
+ int4 lenval = 0;
+ char *strval = NULL;
+ int4 stack[STACKDEPTH];
+ int4 lenstack = 0;
+ int2 weight = 0;
+
+ while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+ {
+ switch (type)
+ {
+ case VAL:
+ pushval(state, VAL, strval, lenval, weight);
+ while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+ stack[lenstack - 1] == (int4) '!'))
+ {
+ lenstack--;
+ pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ }
+ break;
+ case OPR:
+ if (lenstack && val == (int4) '|')
+ pushquery(state, OPR, val, 0, 0, 0);
+ else
+ {
+ if (lenstack == STACKDEPTH) /* internal error */
+ elog(ERROR, "tsquery stack too small");
+ stack[lenstack] = val;
+ lenstack++;
+ }
+ break;
+ case OPEN:
+ if (makepol(state, pushval) == ERR)
+ return ERR;
+ if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
+ stack[lenstack - 1] == (int4) '!'))
+ {
+ lenstack--;
+ pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ }
+ break;
+ case CLOSE:
+ while (lenstack)
+ {
+ lenstack--;
+ pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ };
+ return END;
+ break;
+ case ERR:
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsearch query: \"%s\"",
+ state->buffer)));
+ return ERR;
+
+ }
+ }
+ while (lenstack)
+ {
+ lenstack--;
+ pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+ };
+ return END;
+}
+
+static void
+findoprnd(QueryItem * ptr, int4 *pos)
+{
+ if (ptr[*pos].type == VAL || ptr[*pos].type == VALSTOP)
+ {
+ ptr[*pos].left = 0;
+ (*pos)++;
+ }
+ else if (ptr[*pos].val == (int4) '!')
+ {
+ ptr[*pos].left = 1;
+ (*pos)++;
+ findoprnd(ptr, pos);
+ }
+ else
+ {
+ QueryItem *curitem = &ptr[*pos];
+ int4 tmp = *pos;
+
+ (*pos)++;
+ findoprnd(ptr, pos);
+ curitem->left = *pos - tmp;
+ findoprnd(ptr, pos);
+ }
+}
+
+
+/*
+ * input
+ */
+TSQuery
+parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int, int2), Oid cfg_id, bool isplain)
+{
+ TSQueryParserState state;
+ int4 i;
+ TSQuery query;
+ int4 commonlen;
+ QueryItem *ptr;
+ ParseQueryNode *tmp;
+ int4 pos = 0;
+
+ /* init state */
+ state.buffer = buf;
+ state.buf = buf;
+ state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
+ state.count = 0;
+ state.num = 0;
+ state.str = NULL;
+ state.cfg_id = cfg_id;
+
+ /* init value parser's state */
+ state.valstate.oprisdelim = true;
+ state.valstate.len = 32;
+ state.valstate.word = (char *) palloc(state.valstate.len);
+
+ /* init list of operand */
+ state.sumlen = 0;
+ state.lenop = 64;
+ state.curop = state.op = (char *) palloc(state.lenop);
+ *(state.curop) = '\0';
+
+ /* parse query & make polish notation (postfix, but in reverse order) */
+ makepol(&state, pushval);
+ pfree(state.valstate.word);
+ if (!state.num)
+ {
+ ereport(NOTICE,
+ (errmsg("tsearch query doesn't contain lexeme(s): \"%s\"",
+ state.buffer)));
+ query = (TSQuery) palloc(HDRSIZETQ);
+ SET_VARSIZE(query, HDRSIZETQ);
+ query->size = 0;
+ return query;
+ }
+
+ /* make finish struct */
+ commonlen = COMPUTESIZE(state.num, state.sumlen);
+ query = (TSQuery) palloc(commonlen);
+ SET_VARSIZE(query, commonlen);
+ query->size = state.num;
+ ptr = GETQUERY(query);
+
+ /* set item in polish notation */
+ for (i = 0; i < state.num; i++)
+ {
+ ptr[i].weight = state.str->weight;
+ ptr[i].type = state.str->type;
+ ptr[i].val = state.str->val;
+ ptr[i].distance = state.str->distance;
+ ptr[i].length = state.str->length;
+ tmp = state.str->next;
+ pfree(state.str);
+ state.str = tmp;
+ }
+
+ /* set user friendly-operand view */
+ memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+ pfree(state.op);
+
+ /* set left operand's position for every operator */
+ pos = 0;
+ findoprnd(ptr, &pos);
+
+ return query;
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsqueryin(PG_FUNCTION_ARGS)
+{
+ char *in = PG_GETARG_CSTRING(0);
+
+ pg_verifymbstr(in, strlen(in), false);
+
+ PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, InvalidOid, false));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+ QueryItem *curpol;
+ char *buf;
+ char *cur;
+ char *op;
+ int4 buflen;
+} INFIX;
+
+#define RESIZEBUF(inf,addsize) \
+while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
+{ \
+ int4 len = (inf)->cur - (inf)->buf; \
+ (inf)->buflen *= 2; \
+ (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
+ (inf)->cur = (inf)->buf + len; \
+}
+
+/*
+ * recursive walk on tree and print it in
+ * infix (human-readable) view
+ */
+static void
+infix(INFIX * in, bool first)
+{
+ if (in->curpol->type == VAL)
+ {
+ char *op = in->op + in->curpol->distance;
+ int clen;
+
+ RESIZEBUF(in, in->curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
+ *(in->cur) = '\'';
+ in->cur++;
+ while (*op)
+ {
+ if (t_iseq(op, '\''))
+ {
+ *(in->cur) = '\'';
+ in->cur++;
+ }
+ COPYCHAR(in->cur, op);
+
+ clen = pg_mblen(op);
+ op += clen;
+ in->cur += clen;
+ }
+ *(in->cur) = '\'';
+ in->cur++;
+ if (in->curpol->weight)
+ {
+ *(in->cur) = ':';
+ in->cur++;
+ if (in->curpol->weight & (1 << 3))
+ {
+ *(in->cur) = 'A';
+ in->cur++;
+ }
+ if (in->curpol->weight & (1 << 2))
+ {
+ *(in->cur) = 'B';
+ in->cur++;
+ }
+ if (in->curpol->weight & (1 << 1))
+ {
+ *(in->cur) = 'C';
+ in->cur++;
+ }
+ if (in->curpol->weight & 1)
+ {
+ *(in->cur) = 'D';
+ in->cur++;
+ }
+ }
+ *(in->cur) = '\0';
+ in->curpol++;
+ }
+ else if (in->curpol->val == (int4) '!')
+ {
+ bool isopr = false;
+
+ RESIZEBUF(in, 1);
+ *(in->cur) = '!';
+ in->cur++;
+ *(in->cur) = '\0';
+ in->curpol++;
+ if (in->curpol->type == OPR)
+ {
+ isopr = true;
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, "( ");
+ in->cur = strchr(in->cur, '\0');
+ }
+ infix(in, isopr);
+ if (isopr)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, " )");
+ in->cur = strchr(in->cur, '\0');
+ }
+ }
+ else
+ {
+ int4 op = in->curpol->val;
+ INFIX nrm;
+
+ in->curpol++;
+ if (op == (int4) '|' && !first)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, "( ");
+ in->cur = strchr(in->cur, '\0');
+ }
+
+ nrm.curpol = in->curpol;
+ nrm.op = in->op;
+ nrm.buflen = 16;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+ /* get right operand */
+ infix(&nrm, false);
+
+ /* get & print left operand */
+ in->curpol = nrm.curpol;
+ infix(in, false);
+
+ /* print operator & right operand */
+ RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+ sprintf(in->cur, " %c %s", op, nrm.buf);
+ in->cur = strchr(in->cur, '\0');
+ pfree(nrm.buf);
+
+ if (op == (int4) '|' && !first)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, " )");
+ in->cur = strchr(in->cur, '\0');
+ }
+ }
+}
+
+
+Datum
+tsqueryout(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ INFIX nrm;
+
+ if (query->size == 0)
+ {
+ char *b = palloc(1);
+
+ *b = '\0';
+ PG_RETURN_POINTER(b);
+ }
+ nrm.curpol = GETQUERY(query);
+ nrm.buflen = 32;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+ *(nrm.cur) = '\0';
+ nrm.op = GETOPERAND(query);
+ infix(&nrm, true);
+
+ PG_FREE_IF_COPY(query, 0);
+ PG_RETURN_CSTRING(nrm.buf);
+}
+
+Datum
+tsquerysend(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ StringInfoData buf;
+ int i;
+ QueryItem *item = GETQUERY(query);
+
+ pq_begintypsend(&buf);
+
+ pq_sendint(&buf, query->size, sizeof(int32));
+ for (i = 0; i < query->size; i++)
+ {
+ int tmp;
+
+ pq_sendint(&buf, item->type, sizeof(item->type));
+ pq_sendint(&buf, item->weight, sizeof(item->weight));
+ pq_sendint(&buf, item->left, sizeof(item->left));
+ pq_sendint(&buf, item->val, sizeof(item->val));
+
+ /*
+ * We are sure that sizeof(WordEntry) == sizeof(int32), and about
+ * layout of QueryItem
+ */
+ tmp = *(int32 *) (((char *) item) + HDRSIZEQI);
+ pq_sendint(&buf, tmp, sizeof(tmp));
+
+ item++;
+ }
+
+ item = GETQUERY(query);
+ for (i = 0; i < query->size; i++)
+ {
+ if (item->type == VAL)
+ pq_sendbytes(&buf, GETOPERAND(query) + item->distance, item->length);
+ item++;
+ }
+
+ PG_FREE_IF_COPY(query, 0);
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+Datum
+tsqueryrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ TSQuery query;
+ int i,
+ size,
+ tmp,
+ len = HDRSIZETQ;
+ QueryItem *item;
+ int datalen = 0;
+ char *ptr;
+
+ size = pq_getmsgint(buf, sizeof(uint32));
+ if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem)))
+ elog(ERROR, "invalid size of tsquery");
+ len += sizeof(QueryItem) * size;
+
+ query = (TSQuery) palloc(len);
+ query->size = size;
+ item = GETQUERY(query);
+
+ for (i = 0; i < size; i++)
+ {
+ item->type = (int8) pq_getmsgint(buf, sizeof(int8));
+ item->weight = (int8) pq_getmsgint(buf, sizeof(int8));
+ item->left = (int16) pq_getmsgint(buf, sizeof(int16));
+ item->val = (int32) pq_getmsgint(buf, sizeof(int32));
+ tmp = pq_getmsgint(buf, sizeof(int32));
+ memcpy((((char *) item) + HDRSIZEQI), &tmp, sizeof(int32));
+
+ /*
+ * Sanity checks
+ */
+ if (item->type == VAL)
+ {
+ datalen += item->length + 1; /* \0 */
+ }
+ else if (item->type == OPR)
+ {
+ if (item->val == '|' || item->val == '&')
+ {
+ if (item->left <= 0 || i + item->left >= size)
+ elog(ERROR, "invalid pointer to left operand");
+ }
+
+ if (i == size - 1)
+ elog(ERROR, "invalid pointer to right operand");
+ }
+ else
+ elog(ERROR, "unknown tsquery node type");
+
+ item++;
+ }
+
+ query = (TSQuery) repalloc(query, len + datalen);
+
+ item = GETQUERY(query);
+ ptr = GETOPERAND(query);
+ for (i = 0; i < size; i++)
+ {
+ if (item->type == VAL)
+ {
+ item->distance = ptr - GETOPERAND(query);
+ memcpy(ptr,
+ pq_getmsgbytes(buf, item->length),
+ item->length);
+ ptr += item->length;
+ *ptr++ = '\0';
+ }
+ item++;
+ }
+
+ Assert(ptr - GETOPERAND(query) == datalen);
+
+ SET_VARSIZE(query, len + datalen);
+
+ PG_RETURN_TSVECTOR(query);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ INFIX nrm;
+ text *res;
+ QueryItem *q;
+ int4 len;
+
+ if (query->size == 0)
+ {
+ res = (text *) palloc(VARHDRSZ);
+ SET_VARSIZE(res, VARHDRSZ);
+ PG_RETURN_POINTER(res);
+ }
+
+ q = clean_NOT(GETQUERY(query), &len);
+
+ if (!q)
+ {
+ res = (text *) palloc(1 + VARHDRSZ);
+ SET_VARSIZE(res, 1 + VARHDRSZ);
+ *((char *) VARDATA(res)) = 'T';
+ }
+ else
+ {
+ nrm.curpol = q;
+ nrm.buflen = 32;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+ *(nrm.cur) = '\0';
+ nrm.op = GETOPERAND(query);
+ infix(&nrm, true);
+
+ res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
+ SET_VARSIZE(res, nrm.cur - nrm.buf + VARHDRSZ);
+ strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
+ pfree(q);
+ }
+
+ PG_FREE_IF_COPY(query, 0);
+
+ PG_RETURN_POINTER(res);
+}