diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/catalog/system_views.sql | 7 | ||||
-rw-r--r-- | src/backend/parser/scansup.c | 12 | ||||
-rw-r--r-- | src/backend/utils/adt/misc.c | 224 | ||||
-rw-r--r-- | src/include/catalog/catversion.h | 2 | ||||
-rw-r--r-- | src/include/catalog/pg_proc.h | 3 | ||||
-rw-r--r-- | src/include/parser/scansup.h | 3 | ||||
-rw-r--r-- | src/include/utils/builtins.h | 1 | ||||
-rw-r--r-- | src/test/regress/expected/name.out | 66 | ||||
-rw-r--r-- | src/test/regress/sql/name.sql | 33 |
9 files changed, 349 insertions, 2 deletions
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index fef67bdd4cd..9ae1ef4efa6 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -990,3 +990,10 @@ RETURNS jsonb LANGUAGE INTERNAL STRICT IMMUTABLE AS 'jsonb_set'; + +CREATE OR REPLACE FUNCTION + parse_ident(str text, strict boolean DEFAULT true) +RETURNS text[] +LANGUAGE INTERNAL +STRICT IMMUTABLE +AS 'parse_ident'; diff --git a/src/backend/parser/scansup.c b/src/backend/parser/scansup.c index 2b4ab202c34..7aa5b768411 100644 --- a/src/backend/parser/scansup.c +++ b/src/backend/parser/scansup.c @@ -130,6 +130,15 @@ scanstr(const char *s) char * downcase_truncate_identifier(const char *ident, int len, bool warn) { + return downcase_identifier(ident, len, warn, true); +} + +/* + * a workhorse for downcase_truncate_identifier + */ +char * +downcase_identifier(const char *ident, int len, bool warn, bool truncate) +{ char *result; int i; bool enc_is_single_byte; @@ -158,12 +167,13 @@ downcase_truncate_identifier(const char *ident, int len, bool warn) } result[i] = '\0'; - if (i >= NAMEDATALEN) + if (i >= NAMEDATALEN && truncate) truncate_identifier(result, i, warn); return result; } + /* * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes. * diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c index 43f36db47bb..4dcc5a63be7 100644 --- a/src/backend/utils/adt/misc.c +++ b/src/backend/utils/adt/misc.c @@ -27,6 +27,7 @@ #include "commands/dbcommands.h" #include "funcapi.h" #include "miscadmin.h" +#include "parser/scansup.h" #include "parser/keywords.h" #include "postmaster/syslogger.h" #include "rewrite/rewriteHandler.h" @@ -719,3 +720,226 @@ pg_column_is_updatable(PG_FUNCTION_ARGS) PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS); } + + +/* + * This simple parser utility are compatible with lexer implementation, + * used only in parse_ident function + */ +static bool +is_ident_start(unsigned char c) +{ + if (c == '_') + return true; + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) + return true; + + if (c >= 0200 && c <= 0377) + return true; + + return false; +} + +static bool +is_ident_cont(unsigned char c) +{ + if (c >= '0' && c <= '9') + return true; + + return is_ident_start(c); +} + +/* + * Sanitize SQL string for using in error message. + */ +static char * +sanitize_text(text *t) +{ + int len = VARSIZE_ANY_EXHDR(t); + const char *p = VARDATA_ANY(t); + StringInfo dstr; + + dstr = makeStringInfo(); + + appendStringInfoChar(dstr, '"'); + + while (len--) + { + switch (*p) + { + case '\b': + appendStringInfoString(dstr, "\\b"); + break; + case '\f': + appendStringInfoString(dstr, "\\f"); + break; + case '\n': + appendStringInfoString(dstr, "\\n"); + break; + case '\r': + appendStringInfoString(dstr, "\\r"); + break; + case '\t': + appendStringInfoString(dstr, "\\t"); + break; + case '\'': + appendStringInfoString(dstr, "''"); + break; + case '\\': + appendStringInfoString(dstr, "\\\\"); + break; + default: + if ((unsigned char) *p < ' ') + appendStringInfo(dstr, "\\u%04x", (int) *p); + else + appendStringInfoCharMacro(dstr, *p); + break; + } + p++; + } + + appendStringInfoChar(dstr, '"'); + + return dstr->data; +} + +/* + * parse_ident - parse SQL composed identifier to separate identifiers. + * When strict mode is active (second parameter), then any chars after + * last identifiers are disallowed. + */ +Datum +parse_ident(PG_FUNCTION_ARGS) +{ + text *qualname; + char *qualname_str; + bool strict; + char *nextp; + bool after_dot = false; + ArrayBuildState *astate = NULL; + + qualname = PG_GETARG_TEXT_PP(0); + qualname_str = text_to_cstring(qualname); + strict = PG_GETARG_BOOL(1); + + nextp = qualname_str; + + /* skip leading whitespace */ + while (isspace((unsigned char) *nextp)) + nextp++; + + for (;;) + { + char *curname; + char *endp; + bool missing_ident; + + missing_ident = true; + + if (*nextp == '\"') + { + curname = nextp + 1; + for (;;) + { + endp = strchr(nextp + 1, '\"'); + if (endp == NULL) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("unclosed double quotes"), + errdetail("string %s is not valid identifier", + sanitize_text(qualname)))); + if (endp[1] != '\"') + break; + memmove(endp, endp + 1, strlen(endp)); + nextp = endp; + } + nextp = endp + 1; + *endp = '\0'; + + /* Show complete input string in this case. */ + if (endp - curname == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("identifier should not be empty: %s", + sanitize_text(qualname)))); + + astate = accumArrayResult(astate, CStringGetTextDatum(curname), + false, TEXTOID, CurrentMemoryContext); + missing_ident = false; + } + else + { + if (is_ident_start((unsigned char) *nextp)) + { + char *downname; + int len; + text *part; + + curname = nextp++; + while (is_ident_cont((unsigned char) *nextp)) + nextp++; + + len = nextp - curname; + + /* + * Unlike name, we don't implicitly truncate identifiers. This + * is useful for allowing the user to check for specific parts + * of the identifier being too long. It's easy enough for the + * user to get the truncated names by casting our output to + * name[]. + */ + downname = downcase_identifier(curname, len, false, false); + part = cstring_to_text_with_len(downname, len); + astate = accumArrayResult(astate, PointerGetDatum(part), false, + TEXTOID, CurrentMemoryContext); + missing_ident = false; + } + } + + if (missing_ident) + { + /* Different error messages based on where we failed. */ + if (*nextp == '.') + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing valid identifier before \".\" symbol: %s", + sanitize_text(qualname)))); + else if (after_dot) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing valid identifier after \".\" symbol: %s", + sanitize_text(qualname)))); + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("missing valid identifier: %s", + sanitize_text(qualname)))); + } + + while (isspace((unsigned char) *nextp)) + nextp++; + + if (*nextp == '.') + { + after_dot = true; + nextp++; + while (isspace((unsigned char) *nextp)) + nextp++; + } + else if (*nextp == '\0') + { + break; + } + else + { + if (strict) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("identifier contains disallowed characters: %s", + sanitize_text(qualname)))); + break; + } + } + + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); +} diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 7f410c178c5..7c9e9eb19f6 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201603151 +#define CATALOG_VERSION_NO 201603181 #endif diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index ceb8129e9af..a59532732c0 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -3463,6 +3463,9 @@ DESCR("I/O"); DATA(insert OID = 4086 ( to_regnamespace PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 4089 "25" _null_ _null_ _null_ _null_ _null_ to_regnamespace _null_ _null_ _null_ )); DESCR("convert namespace name to regnamespace"); +DATA(insert OID = 1268 ( parse_ident PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1009 "25 16" _null_ _null_ "{str,strict}" _null_ _null_ parse_ident _null_ _null_ _null_ )); +DESCR("parse qualified identifier to array of identifiers"); + DATA(insert OID = 2246 ( fmgr_internal_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_internal_validator _null_ _null_ _null_ )); DESCR("(internal)"); DATA(insert OID = 2247 ( fmgr_c_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_c_validator _null_ _null_ _null_ )); diff --git a/src/include/parser/scansup.h b/src/include/parser/scansup.h index 4f4164bb8c4..4f95c81f7c9 100644 --- a/src/include/parser/scansup.h +++ b/src/include/parser/scansup.h @@ -20,6 +20,9 @@ extern char *scanstr(const char *s); extern char *downcase_truncate_identifier(const char *ident, int len, bool warn); +extern char *downcase_identifier(const char *ident, int len, + bool warn, bool truncate); + extern void truncate_identifier(char *ident, int len, bool warn); extern bool scanner_isspace(char ch); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 59a00bbcbcf..206288da810 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -510,6 +510,7 @@ extern Datum pg_typeof(PG_FUNCTION_ARGS); extern Datum pg_collation_for(PG_FUNCTION_ARGS); extern Datum pg_relation_is_updatable(PG_FUNCTION_ARGS); extern Datum pg_column_is_updatable(PG_FUNCTION_ARGS); +extern Datum parse_ident(PG_FUNCTION_ARGS); /* oid.c */ extern Datum oidin(PG_FUNCTION_ARGS); diff --git a/src/test/regress/expected/name.out b/src/test/regress/expected/name.out index b359d528225..56139d45efc 100644 --- a/src/test/regress/expected/name.out +++ b/src/test/regress/expected/name.out @@ -124,3 +124,69 @@ SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*'; (2 rows) DROP TABLE NAME_TBL; +DO $$ +DECLARE r text[]; +BEGIN + r := parse_ident('Schemax.Tabley'); + RAISE NOTICE '%', format('%I.%I', r[1], r[2]); + r := parse_ident('"SchemaX"."TableY"'); + RAISE NOTICE '%', format('%I.%I', r[1], r[2]); +END; +$$; +NOTICE: schemax.tabley +NOTICE: "SchemaX"."TableY" +SELECT parse_ident('foo.boo'); + parse_ident +------------- + {foo,boo} +(1 row) + +SELECT parse_ident('foo.boo[]'); -- should fail +ERROR: identifier contains disallowed characters: "foo.boo[]" +SELECT parse_ident('foo.boo[]', strict => false); -- ok + parse_ident +------------- + {foo,boo} +(1 row) + +-- should fail +SELECT parse_ident(' '); +ERROR: missing valid identifier: " " +SELECT parse_ident(' .aaa'); +ERROR: missing valid identifier before "." symbol: " .aaa" +SELECT parse_ident(' aaa . '); +ERROR: missing valid identifier after "." symbol: " aaa . " +SELECT parse_ident('aaa.a%b'); +ERROR: identifier contains disallowed characters: "aaa.a%b" +SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'); +ERROR: identifier contains disallowed characters: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" +SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ; + length | length +--------+-------- + 414 | 289 +(1 row) + +SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"'); + parse_ident +----------------------------------------------------------------------------------------------------------- + {first," second "," third "," xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"} +(1 row) + +SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"')::name[]; + parse_ident +------------------------------------------------------------------------------------------------------ + {first," second "," third "," xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"} +(1 row) + +SELECT parse_ident(E'"c".X XXXX\002XXXXXX'); +ERROR: identifier contains disallowed characters: ""c".X XXXX\u0002XXXXXX" +SELECT parse_ident('1020'); +ERROR: missing valid identifier: "1020" +SELECT parse_ident('10.20'); +ERROR: missing valid identifier: "10.20" +SELECT parse_ident('.'); +ERROR: missing valid identifier before "." symbol: "." +SELECT parse_ident('.1020'); +ERROR: missing valid identifier before "." symbol: ".1020" +SELECT parse_ident('xxx.1020'); +ERROR: missing valid identifier after "." symbol: "xxx.1020" diff --git a/src/test/regress/sql/name.sql b/src/test/regress/sql/name.sql index 1c7a6716eea..602bf26a48c 100644 --- a/src/test/regress/sql/name.sql +++ b/src/test/regress/sql/name.sql @@ -52,3 +52,36 @@ SELECT '' AS three, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '[0-9]'; SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*'; DROP TABLE NAME_TBL; + +DO $$ +DECLARE r text[]; +BEGIN + r := parse_ident('Schemax.Tabley'); + RAISE NOTICE '%', format('%I.%I', r[1], r[2]); + r := parse_ident('"SchemaX"."TableY"'); + RAISE NOTICE '%', format('%I.%I', r[1], r[2]); +END; +$$; + +SELECT parse_ident('foo.boo'); +SELECT parse_ident('foo.boo[]'); -- should fail +SELECT parse_ident('foo.boo[]', strict => false); -- ok + +-- should fail +SELECT parse_ident(' '); +SELECT parse_ident(' .aaa'); +SELECT parse_ident(' aaa . '); +SELECT parse_ident('aaa.a%b'); +SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'); + +SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ; + +SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"'); +SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"')::name[]; + +SELECT parse_ident(E'"c".X XXXX\002XXXXXX'); +SELECT parse_ident('1020'); +SELECT parse_ident('10.20'); +SELECT parse_ident('.'); +SELECT parse_ident('.1020'); +SELECT parse_ident('xxx.1020'); |