diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2022-12-27 12:00:31 -0500 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2022-12-27 12:00:31 -0500 |
commit | 78212f21011449f0374831323655baa7c00f3680 (patch) | |
tree | 6ff1d8ba1c89999920225d8f6bfa2d8567a24d77 /src/backend/utils/adt/tsquery.c | |
parent | eb8312a22a84c33fc405ae9b497113973f552f90 (diff) | |
download | postgresql-78212f21011449f0374831323655baa7c00f3680.tar.gz postgresql-78212f21011449f0374831323655baa7c00f3680.zip |
Convert tsqueryin and tsvectorin to report errors softly.
This is slightly tedious because the adjustments cascade through
a couple of levels of subroutines, but it's not very hard.
I chose to avoid changing function signatures more than absolutely
necessary, by passing the escontext pointer in existing structs
where possible.
tsquery's nuisance NOTICEs about empty queries are suppressed in
soft-error mode, since they're not errors and we surely don't want
them to be shown to the user anyway. Maybe that whole behavior
should be reconsidered.
Discussion: https://postgr.es/m/3824377.1672076822@sss.pgh.pa.us
Diffstat (limited to 'src/backend/utils/adt/tsquery.c')
-rw-r--r-- | src/backend/utils/adt/tsquery.c | 100 |
1 files changed, 76 insertions, 24 deletions
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c index a206926042e..1097294d55f 100644 --- a/src/backend/utils/adt/tsquery.c +++ b/src/backend/utils/adt/tsquery.c @@ -16,6 +16,7 @@ #include "libpq/pqformat.h" #include "miscadmin.h" +#include "nodes/miscnodes.h" #include "tsearch/ts_locale.h" #include "tsearch/ts_type.h" #include "tsearch/ts_utils.h" @@ -58,10 +59,16 @@ typedef enum /* * get token from query string * - * *operator is filled in with OP_* when return values is PT_OPR, - * but *weight could contain a distance value in case of phrase operator. - * *strval, *lenval and *weight are filled in when return value is PT_VAL + * All arguments except "state" are output arguments. * + * If return value is PT_OPR, then *operator is filled with an OP_* code + * and *weight will contain a distance value in case of phrase operator. + * + * If return value is PT_VAL, then *lenval, *strval, *weight, and *prefix + * are filled. + * + * If PT_ERR is returned then a soft error has occurred. If state->escontext + * isn't already filled then this should be reported as a generic parse error. */ typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator, int *lenval, char **strval, @@ -93,6 +100,9 @@ struct TSQueryParserStateData /* state for value's parser */ TSVectorParseState valstate; + + /* context object for soft errors - must match valstate's escontext */ + Node *escontext; }; /* @@ -194,7 +204,7 @@ parse_phrase_operator(TSQueryParserState pstate, int16 *distance) if (ptr == endptr) return false; else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS) - ereport(ERROR, + ereturn(pstate->escontext, false, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("distance in phrase operator must be an integer value between zero and %d inclusive", MAXENTRYPOS))); @@ -301,10 +311,8 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator, } else if (t_iseq(state->buf, ':')) { - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error in tsquery: \"%s\"", - state->buffer))); + /* generic syntax error message is fine */ + return PT_ERR; } else if (!t_isspace(state->buf)) { @@ -320,12 +328,17 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator, state->state = WAITOPERATOR; return PT_VAL; } + else if (SOFT_ERROR_OCCURRED(state->escontext)) + { + /* gettoken_tsvector reported a soft error */ + return PT_ERR; + } else if (state->state == WAITFIRSTOPERAND) { return PT_END; } else - ereport(ERROR, + ereturn(state->escontext, PT_ERR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("no operand in tsquery: \"%s\"", state->buffer))); @@ -354,6 +367,11 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator, *operator = OP_PHRASE; return PT_OPR; } + else if (SOFT_ERROR_OCCURRED(state->escontext)) + { + /* parse_phrase_operator reported a soft error */ + return PT_ERR; + } else if (t_iseq(state->buf, ')')) { state->buf++; @@ -438,6 +456,11 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator, state->state = WAITOPERATOR; return PT_VAL; } + else if (SOFT_ERROR_OCCURRED(state->escontext)) + { + /* gettoken_tsvector reported a soft error */ + return PT_ERR; + } else if (state->state == WAITFIRSTOPERAND) { return PT_END; @@ -529,12 +552,12 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int QueryOperand *tmp; if (distance >= MAXSTRPOS) - ereport(ERROR, + ereturn(state->escontext,, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("value is too big in tsquery: \"%s\"", state->buffer))); if (lenval >= MAXSTRLEN) - ereport(ERROR, + ereturn(state->escontext,, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("operand is too long in tsquery: \"%s\"", state->buffer))); @@ -562,7 +585,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool pg_crc32 valcrc; if (lenval >= MAXSTRLEN) - ereport(ERROR, + ereturn(state->escontext,, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("word is too long in tsquery: \"%s\"", state->buffer))); @@ -686,11 +709,17 @@ makepol(TSQueryParserState state, return; case PT_ERR: default: - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("syntax error in tsquery: \"%s\"", - state->buffer))); + /* don't overwrite a soft error saved by gettoken function */ + if (!SOFT_ERROR_OCCURRED(state->escontext)) + errsave(state->escontext, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("syntax error in tsquery: \"%s\"", + state->buffer))); + return; } + /* detect soft error in pushval or recursion */ + if (SOFT_ERROR_OCCURRED(state->escontext)) + return; } cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ ); @@ -769,6 +798,8 @@ findoprnd(QueryItem *ptr, int size, bool *needcleanup) /* + * Parse the tsquery stored in "buf". + * * Each value (operand) in the query is passed to pushval. pushval can * transform the simple value to an arbitrarily complex expression using * pushValue and pushOperator. It must push a single value with pushValue, @@ -778,12 +809,19 @@ findoprnd(QueryItem *ptr, int size, bool *needcleanup) * * opaque is passed on to pushval as is, pushval can use it to store its * private state. + * + * The pushval function can record soft errors via escontext. + * Callers must check SOFT_ERROR_OCCURRED to detect that. + * + * A bitmask of flags (see ts_utils.h) and an error context object + * can be provided as well. If a soft error occurs, NULL is returned. */ TSQuery parse_tsquery(char *buf, PushFunction pushval, Datum opaque, - int flags) + int flags, + Node *escontext) { struct TSQueryParserStateData state; int i; @@ -791,6 +829,7 @@ parse_tsquery(char *buf, int commonlen; QueryItem *ptr; ListCell *cell; + bool noisy; bool needcleanup; int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY; @@ -808,15 +847,19 @@ parse_tsquery(char *buf, else state.gettoken = gettoken_query_standard; + /* emit nuisance NOTICEs only if not doing soft errors */ + noisy = !(escontext && IsA(escontext, ErrorSaveContext)); + /* init state */ state.buffer = buf; state.buf = buf; state.count = 0; state.state = WAITFIRSTOPERAND; state.polstr = NIL; + state.escontext = escontext; /* init value parser's state */ - state.valstate = init_tsvector_parser(state.buffer, tsv_flags); + state.valstate = init_tsvector_parser(state.buffer, tsv_flags, escontext); /* init list of operand */ state.sumlen = 0; @@ -829,11 +872,15 @@ parse_tsquery(char *buf, close_tsvector_parser(state.valstate); + if (SOFT_ERROR_OCCURRED(escontext)) + return NULL; + if (state.polstr == NIL) { - ereport(NOTICE, - (errmsg("text-search query doesn't contain lexemes: \"%s\"", - state.buffer))); + if (noisy) + ereport(NOTICE, + (errmsg("text-search query doesn't contain lexemes: \"%s\"", + state.buffer))); query = (TSQuery) palloc(HDRSIZETQ); SET_VARSIZE(query, HDRSIZETQ); query->size = 0; @@ -841,7 +888,7 @@ parse_tsquery(char *buf, } if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen)) - ereport(ERROR, + ereturn(escontext, NULL, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("tsquery is too large"))); commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen); @@ -889,7 +936,7 @@ parse_tsquery(char *buf, * If there are QI_VALSTOP nodes, delete them and simplify the tree. */ if (needcleanup) - query = cleanup_tsquery_stopwords(query); + query = cleanup_tsquery_stopwords(query, noisy); return query; } @@ -908,8 +955,13 @@ Datum tsqueryin(PG_FUNCTION_ARGS) { char *in = PG_GETARG_CSTRING(0); + Node *escontext = fcinfo->context; - PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), 0)); + PG_RETURN_TSQUERY(parse_tsquery(in, + pushval_asis, + PointerGetDatum(NULL), + 0, + escontext)); } /* |