diff options
-rw-r--r-- | src/backend/tsearch/to_tsany.c | 120 | ||||
-rw-r--r-- | src/include/tsearch/ts_type.h | 9 |
2 files changed, 58 insertions, 71 deletions
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c index 6400440756d..b410a49908a 100644 --- a/src/backend/tsearch/to_tsany.c +++ b/src/backend/tsearch/to_tsany.c @@ -28,11 +28,11 @@ typedef struct MorphOpaque typedef struct TSVectorBuildState { ParsedText *prs; - TSVector result; Oid cfgId; } TSVectorBuildState; -static void add_to_tsvector(void *state, char *elem_value, int elem_len); +static void add_to_tsvector(void *_state, char *elem_value, int elem_len); + Datum get_current_ts_config(PG_FUNCTION_ARGS) @@ -270,34 +270,33 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS) { Oid cfgId = PG_GETARG_OID(0); Jsonb *jb = PG_GETARG_JSONB(1); + TSVector result; TSVectorBuildState state; - ParsedText *prs = (ParsedText *) palloc(sizeof(ParsedText)); + ParsedText prs; - prs->words = NULL; - state.result = NULL; + prs.words = NULL; + prs.curwords = 0; + state.prs = &prs; state.cfgId = cfgId; - state.prs = prs; - iterate_jsonb_string_values(jb, &state, (JsonIterateStringValuesAction) add_to_tsvector); + iterate_jsonb_string_values(jb, &state, add_to_tsvector); - PG_FREE_IF_COPY(jb, 1); - - if (state.result == NULL) + if (prs.curwords > 0) + result = make_tsvector(&prs); + else { /* - * There weren't any string elements in jsonb, so wee need to return - * an empty vector + * There weren't any string elements in jsonb, so we need to return an + * empty vector */ - - if (prs->words != NULL) - pfree(prs->words); - - state.result = palloc(CALCDATASIZE(0, 0)); - SET_VARSIZE(state.result, CALCDATASIZE(0, 0)); - state.result->size = 0; + result = palloc(CALCDATASIZE(0, 0)); + SET_VARSIZE(result, CALCDATASIZE(0, 0)); + result->size = 0; } - PG_RETURN_TSVECTOR(state.result); + PG_FREE_IF_COPY(jb, 1); + + PG_RETURN_TSVECTOR(result); } Datum @@ -317,33 +316,33 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS) { Oid cfgId = PG_GETARG_OID(0); text *json = PG_GETARG_TEXT_P(1); + TSVector result; TSVectorBuildState state; - ParsedText *prs = (ParsedText *) palloc(sizeof(ParsedText)); + ParsedText prs; - prs->words = NULL; - state.result = NULL; + prs.words = NULL; + prs.curwords = 0; + state.prs = &prs; state.cfgId = cfgId; - state.prs = prs; - iterate_json_string_values(json, &state, (JsonIterateStringValuesAction) add_to_tsvector); + iterate_json_string_values(json, &state, add_to_tsvector); - PG_FREE_IF_COPY(json, 1); - if (state.result == NULL) + if (prs.curwords > 0) + result = make_tsvector(&prs); + else { /* - * There weren't any string elements in json, so wee need to return an + * There weren't any string elements in json, so we need to return an * empty vector */ - - if (prs->words != NULL) - pfree(prs->words); - - state.result = palloc(CALCDATASIZE(0, 0)); - SET_VARSIZE(state.result, CALCDATASIZE(0, 0)); - state.result->size = 0; + result = palloc(CALCDATASIZE(0, 0)); + SET_VARSIZE(result, CALCDATASIZE(0, 0)); + result->size = 0; } - PG_RETURN_TSVECTOR(state.result); + PG_FREE_IF_COPY(json, 1); + + PG_RETURN_TSVECTOR(result); } Datum @@ -359,45 +358,42 @@ json_to_tsvector(PG_FUNCTION_ARGS) } /* - * Extend current TSVector from _state with a new one, - * build over a json(b) element. + * Parse lexemes in an element of a json(b) value, add to TSVectorBuildState. */ static void add_to_tsvector(void *_state, char *elem_value, int elem_len) { TSVectorBuildState *state = (TSVectorBuildState *) _state; ParsedText *prs = state->prs; - TSVector item_vector; - int i; + int32 prevwords; - prs->lenwords = elem_len / 6; - if (prs->lenwords == 0) - prs->lenwords = 2; + if (prs->words == NULL) + { + /* + * First time through: initialize words array to a reasonable size. + * (parsetext() will realloc it bigger as needed.) + */ + prs->lenwords = Max(elem_len / 6, 64); + prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords); + prs->curwords = 0; + prs->pos = 0; + } - prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords); - prs->curwords = 0; - prs->pos = 0; + prevwords = prs->curwords; parsetext(state->cfgId, prs, elem_value, elem_len); - if (prs->curwords) - { - if (state->result != NULL) - { - for (i = 0; i < prs->curwords; i++) - prs->words[i].pos.pos = prs->words[i].pos.pos + TS_JUMP; - - item_vector = make_tsvector(prs); - - state->result = (TSVector) DirectFunctionCall2(tsvector_concat, - TSVectorGetDatum(state->result), - PointerGetDatum(item_vector)); - } - else - state->result = make_tsvector(prs); - } + /* + * If we extracted any words from this JSON element, advance pos to create + * an artificial break between elements. This is because we don't want + * phrase searches to think that the last word in this element is adjacent + * to the first word in the next one. + */ + if (prs->curwords > prevwords) + prs->pos += 1; } + /* * to_tsquery */ diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h index 2885bc01532..30d7c4bccdb 100644 --- a/src/include/tsearch/ts_type.h +++ b/src/include/tsearch/ts_type.h @@ -86,15 +86,6 @@ typedef struct #define MAXNUMPOS (256) #define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) ) -/* - * In case if a TSVector contains several parts and we want to treat them as - * separate, it's necessary to add an artificial increment to position of each - * lexeme from every next part. It's required to avoid the situation when - * tsquery can find a phrase consisting of lexemes from two of such parts. - * TS_JUMP defined a value of this increment. - */ -#define TS_JUMP 1 - /* This struct represents a complete tsvector datum */ typedef struct { |