aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2016-08-05 16:09:06 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2016-08-05 16:09:06 -0400
commitf10eab73df2b94c860dea4a906c54e3c903f42e2 (patch)
tree8fedfd12ef602529ca659e163e75e356c11653b2
parentc50d192ce33c10fa06411306f8644b4f47ce9a06 (diff)
downloadpostgresql-f10eab73df2b94c860dea4a906c54e3c903f42e2.tar.gz
postgresql-f10eab73df2b94c860dea4a906c54e3c903f42e2.zip
Make array_to_tsvector() sort and de-duplicate the given strings.
This is required for the result to be a legal tsvector value. Noted while fooling with Andreas Seltenreich's ts_delete() crash. Discussion: <87invhoj6e.fsf@credativ.de>
-rw-r--r--doc/src/sgml/func.sgml2
-rw-r--r--src/backend/utils/adt/tsvector_op.c49
-rw-r--r--src/test/regress/expected/tstypes.out7
-rw-r--r--src/test/regress/sql/tstypes.sql2
4 files changed, 52 insertions, 8 deletions
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 971e642276c..783033403a4 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9294,7 +9294,7 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
<entry><type>tsvector</type></entry>
<entry>convert array of lexemes to <type>tsvector</type></entry>
<entry><literal>array_to_tsvector('{fat,cat,rat}'::text[])</literal></entry>
- <entry><literal>'fat' 'cat' 'rat'</literal></entry>
+ <entry><literal>'cat' 'fat' 'rat'</literal></entry>
</row>
<row>
<entry>
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 29cc687643c..ad5a254c57e 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -416,17 +416,34 @@ tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
return -1;
}
+/*
+ * qsort comparator functions
+ */
+
static int
-compareint(const void *va, const void *vb)
+compare_int(const void *va, const void *vb)
{
- int32 a = *((const int32 *) va);
- int32 b = *((const int32 *) vb);
+ int a = *((const int *) va);
+ int b = *((const int *) vb);
if (a == b)
return 0;
return (a > b) ? 1 : -1;
}
+static int
+compare_text_lexemes(const void *va, const void *vb)
+{
+ Datum a = *((const Datum *) va);
+ Datum b = *((const Datum *) vb);
+ char *alex = VARDATA_ANY(a);
+ int alex_len = VARSIZE_ANY_EXHDR(a);
+ char *blex = VARDATA_ANY(b);
+ int blex_len = VARSIZE_ANY_EXHDR(b);
+
+ return tsCompareString(alex, alex_len, blex, blex_len, false);
+}
+
/*
* Internal routine to delete lexemes from TSVector by array of offsets.
*
@@ -459,7 +476,7 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
{
int kp;
- qsort(indices_to_delete, indices_count, sizeof(int), compareint);
+ qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
kp = 0;
for (k = 1; k < indices_count; k++)
{
@@ -743,32 +760,50 @@ array_to_tsvector(PG_FUNCTION_ARGS)
bool *nulls;
int nitems,
i,
+ j,
tslen,
datalen = 0;
char *cur;
deconstruct_array(v, TEXTOID, -1, false, 'i', &dlexemes, &nulls, &nitems);
+ /* Reject nulls (maybe we should just ignore them, instead?) */
for (i = 0; i < nitems; i++)
{
if (nulls[i])
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("lexeme array may not contain nulls")));
+ }
- datalen += VARSIZE_ANY_EXHDR(dlexemes[i]);
+ /* Sort and de-dup, because this is required for a valid tsvector. */
+ if (nitems > 1)
+ {
+ qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
+ j = 0;
+ for (i = 1; i < nitems; i++)
+ {
+ if (compare_text_lexemes(&dlexemes[j], &dlexemes[i]) < 0)
+ dlexemes[++j] = dlexemes[i];
+ }
+ nitems = ++j;
}
+ /* Calculate space needed for surviving lexemes. */
+ for (i = 0; i < nitems; i++)
+ datalen += VARSIZE_ANY_EXHDR(dlexemes[i]);
tslen = CALCDATASIZE(nitems, datalen);
+
+ /* Allocate and fill tsvector. */
tsout = (TSVector) palloc0(tslen);
SET_VARSIZE(tsout, tslen);
tsout->size = nitems;
+
arrout = ARRPTR(tsout);
cur = STRPTR(tsout);
-
for (i = 0; i < nitems; i++)
{
- char *lex = VARDATA(dlexemes[i]);
+ char *lex = VARDATA_ANY(dlexemes[i]);
int lex_len = VARSIZE_ANY_EXHDR(dlexemes[i]);
memcpy(cur, lex, lex_len);
diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out
index 73f43c5ff02..8d9290cbac1 100644
--- a/src/test/regress/expected/tstypes.out
+++ b/src/test/regress/expected/tstypes.out
@@ -1165,6 +1165,13 @@ SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
ERROR: lexeme array may not contain nulls
+-- array_to_tsvector must sort and de-dup
+SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']);
+ array_to_tsvector
+-------------------
+ 'bar' 'baz' 'foo'
+(1 row)
+
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
setweight
----------------------------------------------------------
diff --git a/src/test/regress/sql/tstypes.sql b/src/test/regress/sql/tstypes.sql
index f0c06ba5f5a..9ea93a29938 100644
--- a/src/test/regress/sql/tstypes.sql
+++ b/src/test/regress/sql/tstypes.sql
@@ -226,6 +226,8 @@ SELECT tsvector_to_array('base hidden rebel spaceship strike'::tsvector);
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
+-- array_to_tsvector must sort and de-dup
+SELECT array_to_tsvector(ARRAY['foo','bar','baz','bar']);
SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c');