aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2020-03-10 12:29:59 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2020-03-10 12:30:02 -0400
commitd01f03a495294f726001a90ec27a675428d9fc45 (patch)
treebc48d90e3c2cf5a8b3010a3f8a5a5e36fe1e45d8 /src
parent40b3e2c201af10c27a2c4c5bfcd029026b4cdff3 (diff)
downloadpostgresql-d01f03a495294f726001a90ec27a675428d9fc45.tar.gz
postgresql-d01f03a495294f726001a90ec27a675428d9fc45.zip
Preserve integer and float values accurately in (de)serialize_deflist.
Previously, this code just smashed all types of DefElem values to strings, cavalierly reasoning that nobody would care. But in point of fact, most of the defGetFoo functions do distinguish among different input syntaxes; for instance defGetBoolean will accept 1 as an integer but not "1" as a string. This led to CREATE/ALTER TEXT SEARCH DICTIONARY accepting 0 and 1 as values for boolean dictionary properties, only to have the dictionary fail at runtime. We can upgrade this behavior by teaching serialize_deflist that it does not need to quote T_Integer or T_Float nodes' values on output, and then teaching deserialize_deflist to restore unquoted integer or float values as the appropriate node type. This should not break anything using pg_ts_dict.dictinitoption, since that field is just defined as being something valid to include in CREATE TEXT SEARCH DICTIONARY. deserialize_deflist is also used to parse the options arguments for the ts_headline family of functions, but so far as I can see this won't cause any problems there either: the only consumer of that output is prsd_headline which always uses defGetString. (Really that's a bad idea, but I won't risk changing it here.) This is surely a bug fix, but given the lack of field complaints I don't think it's necessary to back-patch. Discussion: https://postgr.es/m/CAMkU=1xRcs_BUPzR0+V3WndaCAv0E_m3h6aUEJ8NF-sY1nnHsw@mail.gmail.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/commands/tsearchcmds.c90
-rw-r--r--src/test/regress/expected/tsdicts.out35
-rw-r--r--src/test/regress/sql/tsdicts.sql13
3 files changed, 117 insertions, 21 deletions
diff --git a/src/backend/commands/tsearchcmds.c b/src/backend/commands/tsearchcmds.c
index 9dca682e874..9da8f7fd579 100644
--- a/src/backend/commands/tsearchcmds.c
+++ b/src/backend/commands/tsearchcmds.c
@@ -36,6 +36,7 @@
#include "commands/alter.h"
#include "commands/defrem.h"
#include "commands/event_trigger.h"
+#include "common/string.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "parser/parse_func.h"
@@ -52,6 +53,8 @@ static void MakeConfigurationMapping(AlterTSConfigurationStmt *stmt,
HeapTuple tup, Relation relMap);
static void DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
HeapTuple tup, Relation relMap);
+static DefElem *buildDefItem(const char *name, const char *val,
+ bool was_quoted);
/* --------------------- TS Parser commands ------------------------ */
@@ -1519,9 +1522,6 @@ DropConfigurationMapping(AlterTSConfigurationStmt *stmt,
* For the convenience of pg_dump, the output is formatted exactly as it
* would need to appear in CREATE TEXT SEARCH DICTIONARY to reproduce the
* same options.
- *
- * Note that we assume that only the textual representation of an option's
- * value is interesting --- hence, non-string DefElems get forced to strings.
*/
text *
serialize_deflist(List *deflist)
@@ -1539,19 +1539,30 @@ serialize_deflist(List *deflist)
appendStringInfo(&buf, "%s = ",
quote_identifier(defel->defname));
- /* If backslashes appear, force E syntax to determine their handling */
- if (strchr(val, '\\'))
- appendStringInfoChar(&buf, ESCAPE_STRING_SYNTAX);
- appendStringInfoChar(&buf, '\'');
- while (*val)
+
+ /*
+ * If the value is a T_Integer or T_Float, emit it without quotes,
+ * otherwise with quotes. This is essential to allow correct
+ * reconstruction of the node type as well as the value.
+ */
+ if (IsA(defel->arg, Integer) || IsA(defel->arg, Float))
+ appendStringInfoString(&buf, val);
+ else
{
- char ch = *val++;
+ /* If backslashes appear, force E syntax to quote them safely */
+ if (strchr(val, '\\'))
+ appendStringInfoChar(&buf, ESCAPE_STRING_SYNTAX);
+ appendStringInfoChar(&buf, '\'');
+ while (*val)
+ {
+ char ch = *val++;
- if (SQL_STR_DOUBLE(ch, true))
+ if (SQL_STR_DOUBLE(ch, true))
+ appendStringInfoChar(&buf, ch);
appendStringInfoChar(&buf, ch);
- appendStringInfoChar(&buf, ch);
+ }
+ appendStringInfoChar(&buf, '\'');
}
- appendStringInfoChar(&buf, '\'');
if (lnext(deflist, l) != NULL)
appendStringInfoString(&buf, ", ");
}
@@ -1566,7 +1577,7 @@ serialize_deflist(List *deflist)
*
* This is also used for prsheadline options, so for backward compatibility
* we need to accept a few things serialize_deflist() will never emit:
- * in particular, unquoted and double-quoted values.
+ * in particular, unquoted and double-quoted strings.
*/
List *
deserialize_deflist(Datum txt)
@@ -1694,8 +1705,9 @@ deserialize_deflist(Datum txt)
{
*wsptr++ = '\0';
result = lappend(result,
- makeDefElem(pstrdup(workspace),
- (Node *) makeString(pstrdup(startvalue)), -1));
+ buildDefItem(workspace,
+ startvalue,
+ true));
state = CS_WAITKEY;
}
}
@@ -1726,8 +1738,9 @@ deserialize_deflist(Datum txt)
{
*wsptr++ = '\0';
result = lappend(result,
- makeDefElem(pstrdup(workspace),
- (Node *) makeString(pstrdup(startvalue)), -1));
+ buildDefItem(workspace,
+ startvalue,
+ true));
state = CS_WAITKEY;
}
}
@@ -1741,8 +1754,9 @@ deserialize_deflist(Datum txt)
{
*wsptr++ = '\0';
result = lappend(result,
- makeDefElem(pstrdup(workspace),
- (Node *) makeString(pstrdup(startvalue)), -1));
+ buildDefItem(workspace,
+ startvalue,
+ false));
state = CS_WAITKEY;
}
else
@@ -1760,8 +1774,9 @@ deserialize_deflist(Datum txt)
{
*wsptr++ = '\0';
result = lappend(result,
- makeDefElem(pstrdup(workspace),
- (Node *) makeString(pstrdup(startvalue)), -1));
+ buildDefItem(workspace,
+ startvalue,
+ false));
}
else if (state != CS_WAITKEY)
ereport(ERROR,
@@ -1773,3 +1788,36 @@ deserialize_deflist(Datum txt)
return result;
}
+
+/*
+ * Build one DefElem for deserialize_deflist
+ */
+static DefElem *
+buildDefItem(const char *name, const char *val, bool was_quoted)
+{
+ /* If input was quoted, always emit as string */
+ if (!was_quoted && val[0] != '\0')
+ {
+ int v;
+ char *endptr;
+
+ /* Try to parse as an integer */
+ errno = 0;
+ v = strtoint(val, &endptr, 10);
+ if (errno == 0 && *endptr == '\0')
+ return makeDefElem(pstrdup(name),
+ (Node *) makeInteger(v),
+ -1);
+ /* Nope, how about as a float? */
+ errno = 0;
+ (void) strtod(val, &endptr);
+ if (errno == 0 && *endptr == '\0')
+ return makeDefElem(pstrdup(name),
+ (Node *) makeFloat(pstrdup(val)),
+ -1);
+ }
+ /* Just make it a string */
+ return makeDefElem(pstrdup(name),
+ (Node *) makeString(pstrdup(val)),
+ -1);
+}
diff --git a/src/test/regress/expected/tsdicts.out b/src/test/regress/expected/tsdicts.out
index 5a927be9485..c8042931429 100644
--- a/src/test/regress/expected/tsdicts.out
+++ b/src/test/regress/expected/tsdicts.out
@@ -470,6 +470,41 @@ SELECT ts_lexize('synonym', 'indices');
{index}
(1 row)
+-- test altering boolean parameters
+SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
+ dictinitoption
+-----------------------------
+ synonyms = 'synonym_sample'
+(1 row)
+
+ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 1);
+SELECT ts_lexize('synonym', 'PoStGrEs');
+ ts_lexize
+-----------
+
+(1 row)
+
+SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
+ dictinitoption
+------------------------------------------------
+ synonyms = 'synonym_sample', casesensitive = 1
+(1 row)
+
+ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 2); -- fail
+ERROR: casesensitive requires a Boolean value
+ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = off);
+SELECT ts_lexize('synonym', 'PoStGrEs');
+ ts_lexize
+-----------
+ {pgsql}
+(1 row)
+
+SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
+ dictinitoption
+----------------------------------------------------
+ synonyms = 'synonym_sample', casesensitive = 'off'
+(1 row)
+
-- Create and simple test thesaurus dictionary
-- More tests in configuration checks because ts_lexize()
-- cannot pass more than one word to thesaurus.
diff --git a/src/test/regress/sql/tsdicts.sql b/src/test/regress/sql/tsdicts.sql
index 908e6755018..ddc6c7f4453 100644
--- a/src/test/regress/sql/tsdicts.sql
+++ b/src/test/regress/sql/tsdicts.sql
@@ -148,6 +148,19 @@ SELECT ts_lexize('synonym', 'PoStGrEs');
SELECT ts_lexize('synonym', 'Gogle');
SELECT ts_lexize('synonym', 'indices');
+-- test altering boolean parameters
+SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
+
+ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 1);
+SELECT ts_lexize('synonym', 'PoStGrEs');
+SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
+
+ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = 2); -- fail
+
+ALTER TEXT SEARCH DICTIONARY synonym (CaseSensitive = off);
+SELECT ts_lexize('synonym', 'PoStGrEs');
+SELECT dictinitoption FROM pg_ts_dict WHERE dictname = 'synonym';
+
-- Create and simple test thesaurus dictionary
-- More tests in configuration checks because ts_lexize()
-- cannot pass more than one word to thesaurus.