diff options
author | Teodor Sigaev <teodor@sigaev.ru> | 2018-04-07 20:58:03 +0300 |
---|---|---|
committer | Teodor Sigaev <teodor@sigaev.ru> | 2018-04-07 20:58:03 +0300 |
commit | 1c1791e00065f6986f9d44a78ce7c28b2d1322dd (patch) | |
tree | 7287b278140f8fe30ceb289146afc4fdcc97a040 /src/test | |
parent | 529ab7bd1fb9c836fe5ccd96f79329d407522e20 (diff) | |
download | postgresql-1c1791e00065f6986f9d44a78ce7c28b2d1322dd.tar.gz postgresql-1c1791e00065f6986f9d44a78ce7c28b2d1322dd.zip |
Add json(b)_to_tsvector function
Jsonb has a complex nature so there isn't best-for-everything way to convert it
to tsvector for full text search. Current to_tsvector(json(b)) suggests to
convert only string values, but it's possible to index keys, numerics and even
booleans value. To solve that json(b)_to_tsvector has a second required
argument contained a list of desired types of json fields. Second argument is
a jsonb scalar or array right now with possibility to add new options in a
future.
Bump catalog version
Author: Dmitry Dolgov with some editorization by me
Reviewed by: Teodor Sigaev
Discussion: https://www.postgresql.org/message-id/CA+q6zcXJQbS1b4kJ_HeAOoOc=unfnOrUEL=KGgE32QKDww7d8g@mail.gmail.com
Diffstat (limited to 'src/test')
-rw-r--r-- | src/test/regress/expected/json.out | 122 | ||||
-rw-r--r-- | src/test/regress/expected/jsonb.out | 122 | ||||
-rw-r--r-- | src/test/regress/sql/json.sql | 30 | ||||
-rw-r--r-- | src/test/regress/sql/jsonb.sql | 30 |
4 files changed, 304 insertions, 0 deletions
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out index 06c728e363d..54d06e38f2d 100644 --- a/src/test/regress/expected/json.out +++ b/src/test/regress/expected/json.out @@ -2324,6 +2324,86 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13 (1 row) +-- json to tsvector with numeric values +select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json); + to_tsvector +--------------------------------- + 'aaa':1 'bbb':3 'ccc':5 'ddd':4 +(1 row) + +-- json_to_tsvector +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"all"'); + json_to_tsvector +---------------------------------------------------------------------------------------- + '123':8 '456':12 'aaa':2 'b':6 'bbb':4 'c':10 'd':14 'f':18 'fals':20 'g':22 'true':16 +(1 row) + +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"key"'); + json_to_tsvector +-------------------------------- + 'b':2 'c':4 'd':6 'f':8 'g':10 +(1 row) + +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"string"'); + json_to_tsvector +------------------ + 'aaa':1 'bbb':3 +(1 row) + +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"numeric"'); + json_to_tsvector +------------------ + '123':1 '456':3 +(1 row) + +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"boolean"'); + json_to_tsvector +------------------- + 'fals':3 'true':1 +(1 row) + +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["string", "numeric"]'); + json_to_tsvector +--------------------------------- + '123':5 '456':7 'aaa':1 'bbb':3 +(1 row) + +select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"all"'); + json_to_tsvector +---------------------------------------------------------------------------------------- + '123':8 '456':12 'aaa':2 'b':6 'bbb':4 'c':10 'd':14 'f':18 'fals':20 'g':22 'true':16 +(1 row) + +select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"key"'); + json_to_tsvector +-------------------------------- + 'b':2 'c':4 'd':6 'f':8 'g':10 +(1 row) + +select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"string"'); + json_to_tsvector +------------------ + 'aaa':1 'bbb':3 +(1 row) + +select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"numeric"'); + json_to_tsvector +------------------ + '123':1 '456':3 +(1 row) + +select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"boolean"'); + json_to_tsvector +------------------- + 'fals':3 'true':1 +(1 row) + +select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["string", "numeric"]'); + json_to_tsvector +--------------------------------- + '123':5 '456':7 'aaa':1 'bbb':3 +(1 row) + -- ts_vector corner cases select to_tsvector('""'::json); to_tsvector @@ -2349,6 +2429,48 @@ select to_tsvector('null'::json); (1 row) +-- json_to_tsvector corner cases +select json_to_tsvector('""'::json, '"all"'); + json_to_tsvector +------------------ + +(1 row) + +select json_to_tsvector('{}'::json, '"all"'); + json_to_tsvector +------------------ + +(1 row) + +select json_to_tsvector('[]'::json, '"all"'); + json_to_tsvector +------------------ + +(1 row) + +select json_to_tsvector('null'::json, '"all"'); + json_to_tsvector +------------------ + +(1 row) + +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '""'); +ERROR: wrong flag in flag array: "" +HINT: Possible values are: "string", "numeric", "boolean", "key" and "all" +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '{}'); +ERROR: wrong flag type, only arrays and scalars are allowed +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '[]'); + json_to_tsvector +------------------ + +(1 row) + +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, 'null'); +ERROR: flag array element is not a string +HINT: Possible values are: "string", "numeric", "boolean", "key" and "all" +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["all", null]'); +ERROR: flag array element is not a string +HINT: Possible values are: "string", "numeric", "boolean", "key" and "all" -- ts_headline for json select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh')); ts_headline diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out index f8d6e6f7ccd..dd9ae4f7a09 100644 --- a/src/test/regress/expected/jsonb.out +++ b/src/test/regress/expected/jsonb.out @@ -4122,6 +4122,86 @@ select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff gg 'aaa':1 'bbb':3 'ccc':5 'ddd':4 'eee':8 'fff':9 'ggg':10 'hhh':12 'iii':13 (1 row) +-- jsonb to tsvector with numeric values +select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb); + to_tsvector +--------------------------------- + 'aaa':1 'bbb':3 'ccc':5 'ddd':4 +(1 row) + +-- jsonb_to_tsvector +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"all"'); + jsonb_to_tsvector +---------------------------------------------------------------------------------------- + '123':8 '456':12 'aaa':2 'b':6 'bbb':4 'c':10 'd':14 'f':18 'fals':20 'g':22 'true':16 +(1 row) + +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"key"'); + jsonb_to_tsvector +-------------------------------- + 'b':2 'c':4 'd':6 'f':8 'g':10 +(1 row) + +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"string"'); + jsonb_to_tsvector +------------------- + 'aaa':1 'bbb':3 +(1 row) + +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"numeric"'); + jsonb_to_tsvector +------------------- + '123':1 '456':3 +(1 row) + +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"boolean"'); + jsonb_to_tsvector +------------------- + 'fals':3 'true':1 +(1 row) + +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["string", "numeric"]'); + jsonb_to_tsvector +--------------------------------- + '123':5 '456':7 'aaa':1 'bbb':3 +(1 row) + +select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"all"'); + jsonb_to_tsvector +---------------------------------------------------------------------------------------- + '123':8 '456':12 'aaa':2 'b':6 'bbb':4 'c':10 'd':14 'f':18 'fals':20 'g':22 'true':16 +(1 row) + +select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"key"'); + jsonb_to_tsvector +-------------------------------- + 'b':2 'c':4 'd':6 'f':8 'g':10 +(1 row) + +select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"string"'); + jsonb_to_tsvector +------------------- + 'aaa':1 'bbb':3 +(1 row) + +select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"numeric"'); + jsonb_to_tsvector +------------------- + '123':1 '456':3 +(1 row) + +select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"boolean"'); + jsonb_to_tsvector +------------------- + 'fals':3 'true':1 +(1 row) + +select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["string", "numeric"]'); + jsonb_to_tsvector +--------------------------------- + '123':5 '456':7 'aaa':1 'bbb':3 +(1 row) + -- ts_vector corner cases select to_tsvector('""'::jsonb); to_tsvector @@ -4147,6 +4227,48 @@ select to_tsvector('null'::jsonb); (1 row) +-- jsonb_to_tsvector corner cases +select jsonb_to_tsvector('""'::jsonb, '"all"'); + jsonb_to_tsvector +------------------- + +(1 row) + +select jsonb_to_tsvector('{}'::jsonb, '"all"'); + jsonb_to_tsvector +------------------- + +(1 row) + +select jsonb_to_tsvector('[]'::jsonb, '"all"'); + jsonb_to_tsvector +------------------- + +(1 row) + +select jsonb_to_tsvector('null'::jsonb, '"all"'); + jsonb_to_tsvector +------------------- + +(1 row) + +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '""'); +ERROR: wrong flag in flag array: "" +HINT: Possible values are: "string", "numeric", "boolean", "key" and "all" +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '{}'); +ERROR: wrong flag type, only arrays and scalars are allowed +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '[]'); + jsonb_to_tsvector +------------------- + +(1 row) + +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, 'null'); +ERROR: flag array element is not a string +HINT: Possible values are: "string", "numeric", "boolean", "key" and "all" +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["all", null]'); +ERROR: flag array element is not a string +HINT: Possible values are: "string", "numeric", "boolean", "key" and "all" -- ts_headline for jsonb select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh')); ts_headline diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql index 256652c41f7..add1e01cbaa 100644 --- a/src/test/regress/sql/json.sql +++ b/src/test/regress/sql/json.sql @@ -763,12 +763,42 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c" -- json to tsvector with stop words select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::json); +-- json to tsvector with numeric values +select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::json); + +-- json_to_tsvector +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"all"'); +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"key"'); +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"string"'); +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"numeric"'); +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"boolean"'); +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["string", "numeric"]'); + +select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"all"'); +select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"key"'); +select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"string"'); +select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"numeric"'); +select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '"boolean"'); +select json_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["string", "numeric"]'); + -- ts_vector corner cases select to_tsvector('""'::json); select to_tsvector('{}'::json); select to_tsvector('[]'::json); select to_tsvector('null'::json); +-- json_to_tsvector corner cases +select json_to_tsvector('""'::json, '"all"'); +select json_to_tsvector('{}'::json, '"all"'); +select json_to_tsvector('[]'::json, '"all"'); +select json_to_tsvector('null'::json, '"all"'); + +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '""'); +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '{}'); +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '[]'); +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, 'null'); +select json_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::json, '["all", null]'); + -- ts_headline for json select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh')); select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::json, tsquery('bbb & ddd & hhh')); diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql index 2439f949bd5..f31dd2ac759 100644 --- a/src/test/regress/sql/jsonb.sql +++ b/src/test/regress/sql/jsonb.sql @@ -1089,12 +1089,42 @@ select to_tsvector('simple', '{"a": "aaa bbb ddd ccc", "b": ["eee fff ggg"], "c" -- jsonb to tsvector with stop words select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": ["the eee fff ggg"], "c": {"d": "hhh. iii"}}'::jsonb); +-- jsonb to tsvector with numeric values +select to_tsvector('english', '{"a": "aaa in bbb ddd ccc", "b": 123, "c": 456}'::jsonb); + +-- jsonb_to_tsvector +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"all"'); +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"key"'); +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"string"'); +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"numeric"'); +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"boolean"'); +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["string", "numeric"]'); + +select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"all"'); +select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"key"'); +select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"string"'); +select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"numeric"'); +select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '"boolean"'); +select jsonb_to_tsvector('english', '{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["string", "numeric"]'); + -- ts_vector corner cases select to_tsvector('""'::jsonb); select to_tsvector('{}'::jsonb); select to_tsvector('[]'::jsonb); select to_tsvector('null'::jsonb); +-- jsonb_to_tsvector corner cases +select jsonb_to_tsvector('""'::jsonb, '"all"'); +select jsonb_to_tsvector('{}'::jsonb, '"all"'); +select jsonb_to_tsvector('[]'::jsonb, '"all"'); +select jsonb_to_tsvector('null'::jsonb, '"all"'); + +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '""'); +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '{}'); +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '[]'); +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, 'null'); +select jsonb_to_tsvector('{"a": "aaa in bbb", "b": 123, "c": 456, "d": true, "f": false, "g": null}'::jsonb, '["all", null]'); + -- ts_headline for jsonb select ts_headline('{"a": "aaa bbb", "b": {"c": "ccc ddd fff", "c1": "ccc1 ddd1"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh')); select ts_headline('english', '{"a": "aaa bbb", "b": {"c": "ccc ddd fff"}, "d": ["ggg hhh", "iii jjj"]}'::jsonb, tsquery('bbb & ddd & hhh')); |