diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/parser/parser.c | 74 | ||||
-rw-r--r-- | src/backend/parser/scan.l | 131 | ||||
-rw-r--r-- | src/backend/utils/adt/jsonpath_scan.l | 45 | ||||
-rw-r--r-- | src/backend/utils/adt/xml.c | 24 | ||||
-rw-r--r-- | src/backend/utils/mb/mbutils.c | 105 | ||||
-rw-r--r-- | src/common/jsonapi.c | 58 | ||||
-rw-r--r-- | src/include/mb/pg_wchar.h | 17 | ||||
-rw-r--r-- | src/include/parser/scanner.h | 16 | ||||
-rw-r--r-- | src/test/regress/expected/json_encoding.out | 15 | ||||
-rw-r--r-- | src/test/regress/expected/json_encoding_1.out | 39 | ||||
-rw-r--r-- | src/test/regress/expected/json_encoding_2.out | 9 | ||||
-rw-r--r-- | src/test/regress/expected/jsonpath_encoding.out | 15 | ||||
-rw-r--r-- | src/test/regress/expected/jsonpath_encoding_1.out | 33 | ||||
-rw-r--r-- | src/test/regress/expected/jsonpath_encoding_2.out | 9 | ||||
-rw-r--r-- | src/test/regress/expected/strings.out | 83 | ||||
-rw-r--r-- | src/test/regress/sql/json_encoding.sql | 13 | ||||
-rw-r--r-- | src/test/regress/sql/jsonpath_encoding.sql | 13 | ||||
-rw-r--r-- | src/test/regress/sql/strings.sql | 20 |
18 files changed, 557 insertions, 162 deletions
diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c index 1bf1144c4fd..be86eb37fef 100644 --- a/src/backend/parser/parser.c +++ b/src/backend/parser/parser.c @@ -292,22 +292,14 @@ hexval(unsigned char c) return 0; /* not reached */ } -/* is Unicode code point acceptable in database's encoding? */ +/* is Unicode code point acceptable? */ static void -check_unicode_value(pg_wchar c, int pos, core_yyscan_t yyscanner) +check_unicode_value(pg_wchar c) { - /* See also addunicode() in scan.l */ - if (c == 0 || c > 0x10FFFF) + if (!is_valid_unicode_codepoint(c)) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("invalid Unicode escape value"), - scanner_errposition(pos, yyscanner))); - - if (c > 0x7F && GetDatabaseEncoding() != PG_UTF8) - ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8"), - scanner_errposition(pos, yyscanner))); + errmsg("invalid Unicode escape value"))); } /* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */ @@ -338,20 +330,39 @@ str_udeescape(const char *str, char escape, const char *in; char *new, *out; + size_t new_len; pg_wchar pair_first = 0; + ScannerCallbackState scbstate; /* - * This relies on the subtle assumption that a UTF-8 expansion cannot be - * longer than its escaped representation. + * Guesstimate that result will be no longer than input, but allow enough + * padding for Unicode conversion. */ - new = palloc(strlen(str) + 1); + new_len = strlen(str) + MAX_UNICODE_EQUIVALENT_STRING + 1; + new = palloc(new_len); in = str; out = new; while (*in) { + /* Enlarge string if needed */ + size_t out_dist = out - new; + + if (out_dist > new_len - (MAX_UNICODE_EQUIVALENT_STRING + 1)) + { + new_len *= 2; + new = repalloc(new, new_len); + out = new + out_dist; + } + if (in[0] == escape) { + /* + * Any errors reported while processing this escape sequence will + * have an error cursor pointing at the escape. + */ + setup_scanner_errposition_callback(&scbstate, yyscanner, + in - str + position + 3); /* 3 for U&" */ if (in[1] == escape) { if (pair_first) @@ -370,9 +381,7 @@ str_udeescape(const char *str, char escape, (hexval(in[2]) << 8) + (hexval(in[3]) << 4) + hexval(in[4]); - check_unicode_value(unicode, - in - str + position + 3, /* 3 for U&" */ - yyscanner); + check_unicode_value(unicode); if (pair_first) { if (is_utf16_surrogate_second(unicode)) @@ -390,8 +399,8 @@ str_udeescape(const char *str, char escape, pair_first = unicode; else { - unicode_to_utf8(unicode, (unsigned char *) out); - out += pg_mblen(out); + pg_unicode_to_server(unicode, (unsigned char *) out); + out += strlen(out); } in += 5; } @@ -411,9 +420,7 @@ str_udeescape(const char *str, char escape, (hexval(in[5]) << 8) + (hexval(in[6]) << 4) + hexval(in[7]); - check_unicode_value(unicode, - in - str + position + 3, /* 3 for U&" */ - yyscanner); + check_unicode_value(unicode); if (pair_first) { if (is_utf16_surrogate_second(unicode)) @@ -431,17 +438,18 @@ str_udeescape(const char *str, char escape, pair_first = unicode; else { - unicode_to_utf8(unicode, (unsigned char *) out); - out += pg_mblen(out); + pg_unicode_to_server(unicode, (unsigned char *) out); + out += strlen(out); } in += 8; } else ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), - errmsg("invalid Unicode escape value"), - scanner_errposition(in - str + position + 3, /* 3 for U&" */ - yyscanner))); + errmsg("invalid Unicode escape"), + errhint("Unicode escapes must be \\XXXX or \\+XXXXXX."))); + + cancel_scanner_errposition_callback(&scbstate); } else { @@ -457,15 +465,13 @@ str_udeescape(const char *str, char escape, goto invalid_pair; *out = '\0'; + return new; /* - * We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII - * codes; but it's probably not worth the trouble, since this isn't likely - * to be a performance-critical path. + * We might get here with the error callback active, or not. Call + * scanner_errposition to make sure an error cursor appears; if the + * callback is active, this is duplicative but harmless. */ - pg_verifymbstr(new, out - new, false); - return new; - invalid_pair: ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 84c73914a85..b1ea0cb5384 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -106,6 +106,18 @@ const uint16 ScanKeywordTokens[] = { */ #define ADVANCE_YYLLOC(delta) ( *(yylloc) += (delta) ) +/* + * Sometimes, we do want yylloc to point into the middle of a token; this is + * useful for instance to throw an error about an escape sequence within a + * string literal. But if we find no error there, we want to revert yylloc + * to the token start, so that that's the location reported to the parser. + * Use PUSH_YYLLOC/POP_YYLLOC to save/restore yylloc around such code. + * (Currently the implied "stack" is just one location, but someday we might + * need to nest these.) + */ +#define PUSH_YYLLOC() (yyextra->save_yylloc = *(yylloc)) +#define POP_YYLLOC() (*(yylloc) = yyextra->save_yylloc) + #define startlit() ( yyextra->literallen = 0 ) static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner); static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner); @@ -605,8 +617,18 @@ other . <xe>{xeunicode} { pg_wchar c = strtoul(yytext + 2, NULL, 16); + /* + * For consistency with other productions, issue any + * escape warning with cursor pointing to start of string. + * We might want to change that, someday. + */ check_escape_warning(yyscanner); + /* Remember start of overall string token ... */ + PUSH_YYLLOC(); + /* ... and set the error cursor to point at this esc seq */ + SET_YYLLOC(); + if (is_utf16_surrogate_first(c)) { yyextra->utf16_first_part = c; @@ -616,10 +638,18 @@ other . yyerror("invalid Unicode surrogate pair"); else addunicode(c, yyscanner); + + /* Restore yylloc to be start of string token */ + POP_YYLLOC(); } <xeu>{xeunicode} { pg_wchar c = strtoul(yytext + 2, NULL, 16); + /* Remember start of overall string token ... */ + PUSH_YYLLOC(); + /* ... and set the error cursor to point at this esc seq */ + SET_YYLLOC(); + if (!is_utf16_surrogate_second(c)) yyerror("invalid Unicode surrogate pair"); @@ -627,12 +657,21 @@ other . addunicode(c, yyscanner); + /* Restore yylloc to be start of string token */ + POP_YYLLOC(); + BEGIN(xe); } -<xeu>. { yyerror("invalid Unicode surrogate pair"); } -<xeu>\n { yyerror("invalid Unicode surrogate pair"); } -<xeu><<EOF>> { yyerror("invalid Unicode surrogate pair"); } +<xeu>. | +<xeu>\n | +<xeu><<EOF>> { + /* Set the error cursor to point at missing esc seq */ + SET_YYLLOC(); + yyerror("invalid Unicode surrogate pair"); + } <xe,xeu>{xeunicodefail} { + /* Set the error cursor to point at malformed esc seq */ + SET_YYLLOC(); ereport(ERROR, (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), errmsg("invalid Unicode escape"), @@ -1029,12 +1068,13 @@ other . * scanner_errposition * Report a lexer or grammar error cursor position, if possible. * - * This is expected to be used within an ereport() call. The return value + * This is expected to be used within an ereport() call, or via an error + * callback such as setup_scanner_errposition_callback(). The return value * is a dummy (always 0, in fact). * * Note that this can only be used for messages emitted during raw parsing - * (essentially, scan.l and gram.y), since it requires the yyscanner struct - * to still be available. + * (essentially, scan.l, parser.c, and gram.y), since it requires the + * yyscanner struct to still be available. */ int scanner_errposition(int location, core_yyscan_t yyscanner) @@ -1051,6 +1091,62 @@ scanner_errposition(int location, core_yyscan_t yyscanner) } /* + * Error context callback for inserting scanner error location. + * + * Note that this will be called for *any* error occurring while the + * callback is installed. We avoid inserting an irrelevant error location + * if the error is a query cancel --- are there any other important cases? + */ +static void +scb_error_callback(void *arg) +{ + ScannerCallbackState *scbstate = (ScannerCallbackState *) arg; + + if (geterrcode() != ERRCODE_QUERY_CANCELED) + (void) scanner_errposition(scbstate->location, scbstate->yyscanner); +} + +/* + * setup_scanner_errposition_callback + * Arrange for non-scanner errors to report an error position + * + * Sometimes the scanner calls functions that aren't part of the scanner + * subsystem and can't reasonably be passed the yyscanner pointer; yet + * we would like any errors thrown in those functions to be tagged with an + * error location. Use this function to set up an error context stack + * entry that will accomplish that. Usage pattern: + * + * declare a local variable "ScannerCallbackState scbstate" + * ... + * setup_scanner_errposition_callback(&scbstate, yyscanner, location); + * call function that might throw error; + * cancel_scanner_errposition_callback(&scbstate); + */ +void +setup_scanner_errposition_callback(ScannerCallbackState *scbstate, + core_yyscan_t yyscanner, + int location) +{ + /* Setup error traceback support for ereport() */ + scbstate->yyscanner = yyscanner; + scbstate->location = location; + scbstate->errcallback.callback = scb_error_callback; + scbstate->errcallback.arg = (void *) scbstate; + scbstate->errcallback.previous = error_context_stack; + error_context_stack = &scbstate->errcallback; +} + +/* + * Cancel a previously-set-up errposition callback. + */ +void +cancel_scanner_errposition_callback(ScannerCallbackState *scbstate) +{ + /* Pop the error context stack */ + error_context_stack = scbstate->errcallback.previous; +} + +/* * scanner_yyerror * Report a lexer or grammar error. * @@ -1226,19 +1322,20 @@ process_integer_literal(const char *token, YYSTYPE *lval) static void addunicode(pg_wchar c, core_yyscan_t yyscanner) { - char buf[8]; + ScannerCallbackState scbstate; + char buf[MAX_UNICODE_EQUIVALENT_STRING + 1]; - /* See also check_unicode_value() in parser.c */ - if (c == 0 || c > 0x10FFFF) + if (!is_valid_unicode_codepoint(c)) yyerror("invalid Unicode escape value"); - if (c > 0x7F) - { - if (GetDatabaseEncoding() != PG_UTF8) - yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8"); - yyextra->saw_non_ascii = true; - } - unicode_to_utf8(c, (unsigned char *) buf); - addlit(buf, pg_mblen(buf), yyscanner); + + /* + * We expect that pg_unicode_to_server() will complain about any + * unconvertible code point, so we don't have to set saw_non_ascii. + */ + setup_scanner_errposition_callback(&scbstate, yyscanner, *(yylloc)); + pg_unicode_to_server(c, (unsigned char *) buf); + cancel_scanner_errposition_callback(&scbstate); + addlit(buf, strlen(buf), yyscanner); } static unsigned char diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l index 70681b789d3..be0a2cfa2f7 100644 --- a/src/backend/utils/adt/jsonpath_scan.l +++ b/src/backend/utils/adt/jsonpath_scan.l @@ -486,13 +486,6 @@ hexval(char c) static void addUnicodeChar(int ch) { - /* - * For UTF8, replace the escape sequence by the actual - * utf8 character in lex->strval. Do this also for other - * encodings if the escape designates an ASCII character, - * otherwise raise an error. - */ - if (ch == 0) { /* We can't allow this, since our TEXT type doesn't */ @@ -501,40 +494,20 @@ addUnicodeChar(int ch) errmsg("unsupported Unicode escape sequence"), errdetail("\\u0000 cannot be converted to text."))); } - else if (GetDatabaseEncoding() == PG_UTF8) - { - char utf8str[5]; - int utf8len; - - unicode_to_utf8(ch, (unsigned char *) utf8str); - utf8len = pg_utf_mblen((unsigned char *) utf8str); - addstring(false, utf8str, utf8len); - } - else if (ch <= 0x007f) - { - /* - * This is the only way to designate things like a - * form feed character in JSON, so it's useful in all - * encodings. - */ - addchar(false, (char) ch); - } else { - ereport(ERROR, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type %s", "jsonpath"), - errdetail("Unicode escape values cannot be used for code " - "point values above 007F when the server encoding " - "is not UTF8."))); + char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1]; + + pg_unicode_to_server(ch, (unsigned char *) cbuf); + addstring(false, cbuf, strlen(cbuf)); } } -/* Add unicode character and process its hi surrogate */ +/* Add unicode character, processing any surrogate pairs */ static void addUnicode(int ch, int *hi_surrogate) { - if (ch >= 0xd800 && ch <= 0xdbff) + if (is_utf16_surrogate_first(ch)) { if (*hi_surrogate != -1) ereport(ERROR, @@ -542,10 +515,10 @@ addUnicode(int ch, int *hi_surrogate) errmsg("invalid input syntax for type %s", "jsonpath"), errdetail("Unicode high surrogate must not follow " "a high surrogate."))); - *hi_surrogate = (ch & 0x3ff) << 10; + *hi_surrogate = ch; return; } - else if (ch >= 0xdc00 && ch <= 0xdfff) + else if (is_utf16_surrogate_second(ch)) { if (*hi_surrogate == -1) ereport(ERROR, @@ -553,7 +526,7 @@ addUnicode(int ch, int *hi_surrogate) errmsg("invalid input syntax for type %s", "jsonpath"), errdetail("Unicode low surrogate must follow a high " "surrogate."))); - ch = 0x10000 + *hi_surrogate + (ch & 0x3ff); + ch = surrogate_pair_to_codepoint(*hi_surrogate, ch); *hi_surrogate = -1; } else if (*hi_surrogate != -1) diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index c7ae1eded80..4c299057a6f 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -2086,26 +2086,6 @@ map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, /* - * Map a Unicode codepoint into the current server encoding. - */ -static char * -unicode_to_sqlchar(pg_wchar c) -{ - char utf8string[8]; /* need room for trailing zero */ - char *result; - - memset(utf8string, 0, sizeof(utf8string)); - unicode_to_utf8(c, (unsigned char *) utf8string); - - result = pg_any_to_server(utf8string, strlen(utf8string), PG_UTF8); - /* if pg_any_to_server didn't strdup, we must */ - if (result == utf8string) - result = pstrdup(result); - return result; -} - - -/* * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3. */ char * @@ -2125,10 +2105,12 @@ map_xml_name_to_sql_identifier(const char *name) && isxdigit((unsigned char) *(p + 5)) && *(p + 6) == '_') { + char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1]; unsigned int u; sscanf(p + 2, "%X", &u); - appendStringInfoString(&buf, unicode_to_sqlchar(u)); + pg_unicode_to_server(u, (unsigned char *) cbuf); + appendStringInfoString(&buf, cbuf); p += 6; } else diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 86787bcb319..a8e13cacfde 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -68,6 +68,13 @@ static FmgrInfo *ToServerConvProc = NULL; static FmgrInfo *ToClientConvProc = NULL; /* + * This variable stores the conversion function to convert from UTF-8 + * to the server encoding. It's NULL if the server encoding *is* UTF-8, + * or if we lack a conversion function for this. + */ +static FmgrInfo *Utf8ToServerConvProc = NULL; + +/* * These variables track the currently-selected encodings. */ static const pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII]; @@ -273,6 +280,8 @@ SetClientEncoding(int encoding) void InitializeClientEncoding(void) { + int current_server_encoding; + Assert(!backend_startup_complete); backend_startup_complete = true; @@ -289,6 +298,35 @@ InitializeClientEncoding(void) pg_enc2name_tbl[pending_client_encoding].name, GetDatabaseEncodingName()))); } + + /* + * Also look up the UTF8-to-server conversion function if needed. Since + * the server encoding is fixed within any one backend process, we don't + * have to do this more than once. + */ + current_server_encoding = GetDatabaseEncoding(); + if (current_server_encoding != PG_UTF8 && + current_server_encoding != PG_SQL_ASCII) + { + Oid utf8_to_server_proc; + + Assert(IsTransactionState()); + utf8_to_server_proc = + FindDefaultConversionProc(PG_UTF8, + current_server_encoding); + /* If there's no such conversion, just leave the pointer as NULL */ + if (OidIsValid(utf8_to_server_proc)) + { + FmgrInfo *finfo; + + finfo = (FmgrInfo *) MemoryContextAlloc(TopMemoryContext, + sizeof(FmgrInfo)); + fmgr_info_cxt(utf8_to_server_proc, finfo, + TopMemoryContext); + /* Set Utf8ToServerConvProc only after data is fully valid */ + Utf8ToServerConvProc = finfo; + } + } } /* @@ -752,6 +790,73 @@ perform_default_encoding_conversion(const char *src, int len, return result; } +/* + * Convert a single Unicode code point into a string in the server encoding. + * + * The code point given by "c" is converted and stored at *s, which must + * have at least MAX_UNICODE_EQUIVALENT_STRING+1 bytes available. + * The output will have a trailing '\0'. Throws error if the conversion + * cannot be performed. + * + * Note that this relies on having previously looked up any required + * conversion function. That's partly for speed but mostly because the parser + * may call this outside any transaction, or in an aborted transaction. + */ +void +pg_unicode_to_server(pg_wchar c, unsigned char *s) +{ + unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1]; + int c_as_utf8_len; + int server_encoding; + + /* + * Complain if invalid Unicode code point. The choice of errcode here is + * debatable, but really our caller should have checked this anyway. + */ + if (!is_valid_unicode_codepoint(c)) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid Unicode code point"))); + + /* Otherwise, if it's in ASCII range, conversion is trivial */ + if (c <= 0x7F) + { + s[0] = (unsigned char) c; + s[1] = '\0'; + return; + } + + /* If the server encoding is UTF-8, we just need to reformat the code */ + server_encoding = GetDatabaseEncoding(); + if (server_encoding == PG_UTF8) + { + unicode_to_utf8(c, s); + s[pg_utf_mblen(s)] = '\0'; + return; + } + + /* For all other cases, we must have a conversion function available */ + if (Utf8ToServerConvProc == NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("conversion between %s and %s is not supported", + pg_enc2name_tbl[PG_UTF8].name, + GetDatabaseEncodingName()))); + + /* Construct UTF-8 source string */ + unicode_to_utf8(c, c_as_utf8); + c_as_utf8_len = pg_utf_mblen(c_as_utf8); + c_as_utf8[c_as_utf8_len] = '\0'; + + /* Convert, or throw error if we can't */ + FunctionCall5(Utf8ToServerConvProc, + Int32GetDatum(PG_UTF8), + Int32GetDatum(server_encoding), + CStringGetDatum(c_as_utf8), + CStringGetDatum(s), + Int32GetDatum(c_as_utf8_len)); +} + /* convert a multibyte string to a wchar */ int diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c index f08a03c1690..7df231c3851 100644 --- a/src/common/jsonapi.c +++ b/src/common/jsonapi.c @@ -744,21 +744,21 @@ json_lex_string(JsonLexContext *lex) } if (lex->strval != NULL) { - char utf8str[5]; - int utf8len; - - if (ch >= 0xd800 && ch <= 0xdbff) + /* + * Combine surrogate pairs. + */ + if (is_utf16_surrogate_first(ch)) { if (hi_surrogate != -1) return JSON_UNICODE_HIGH_SURROGATE; - hi_surrogate = (ch & 0x3ff) << 10; + hi_surrogate = ch; continue; } - else if (ch >= 0xdc00 && ch <= 0xdfff) + else if (is_utf16_surrogate_second(ch)) { if (hi_surrogate == -1) return JSON_UNICODE_LOW_SURROGATE; - ch = 0x10000 + hi_surrogate + (ch & 0x3ff); + ch = surrogate_pair_to_codepoint(hi_surrogate, ch); hi_surrogate = -1; } @@ -766,35 +766,52 @@ json_lex_string(JsonLexContext *lex) return JSON_UNICODE_LOW_SURROGATE; /* - * For UTF8, replace the escape sequence by the actual - * utf8 character in lex->strval. Do this also for other - * encodings if the escape designates an ASCII character, - * otherwise raise an error. + * Reject invalid cases. We can't have a value above + * 0xFFFF here (since we only accepted 4 hex digits + * above), so no need to test for out-of-range chars. */ - if (ch == 0) { /* We can't allow this, since our TEXT type doesn't */ return JSON_UNICODE_CODE_POINT_ZERO; } - else if (lex->input_encoding == PG_UTF8) + + /* + * Add the represented character to lex->strval. In the + * backend, we can let pg_unicode_to_server() handle any + * required character set conversion; in frontend, we can + * only deal with trivial conversions. + * + * Note: pg_unicode_to_server() will throw an error for a + * conversion failure, rather than returning a failure + * indication. That seems OK. + */ +#ifndef FRONTEND + { + char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1]; + + pg_unicode_to_server(ch, (unsigned char *) cbuf); + appendStringInfoString(lex->strval, cbuf); + } +#else + if (lex->input_encoding == PG_UTF8) { + /* OK, we can map the code point to UTF8 easily */ + char utf8str[5]; + int utf8len; + unicode_to_utf8(ch, (unsigned char *) utf8str); utf8len = pg_utf_mblen((unsigned char *) utf8str); appendBinaryStringInfo(lex->strval, utf8str, utf8len); } else if (ch <= 0x007f) { - /* - * This is the only way to designate things like a - * form feed character in JSON, so it's useful in all - * encodings. - */ + /* The ASCII range is the same in all encodings */ appendStringInfoChar(lex->strval, (char) ch); } else return JSON_UNICODE_HIGH_ESCAPE; - +#endif /* FRONTEND */ } } else if (lex->strval != NULL) @@ -1083,7 +1100,8 @@ json_errdetail(JsonParseErrorType error, JsonLexContext *lex) case JSON_UNICODE_ESCAPE_FORMAT: return _("\"\\u\" must be followed by four hexadecimal digits."); case JSON_UNICODE_HIGH_ESCAPE: - return _("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."); + /* note: this case is only reachable in frontend not backend */ + return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8."); case JSON_UNICODE_HIGH_SURROGATE: return _("Unicode high surrogate must not follow a high surrogate."); case JSON_UNICODE_LOW_SURROGATE: diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index b8892ef730e..494aefc7fab 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -316,6 +316,15 @@ typedef enum pg_enc #define MAX_CONVERSION_GROWTH 4 /* + * Maximum byte length of the string equivalent to any one Unicode code point, + * in any backend encoding. The current value assumes that a 4-byte UTF-8 + * character might expand by MAX_CONVERSION_GROWTH, which is a huge + * overestimate. But in current usage we don't allocate large multiples of + * this, so there's little point in being stingy. + */ +#define MAX_UNICODE_EQUIVALENT_STRING 16 + +/* * Table for mapping an encoding number to official encoding name and * possibly other subsidiary data. Be careful to check encoding number * before accessing a table entry! @@ -506,6 +515,12 @@ typedef uint32 (*utf_local_conversion_func) (uint32 code); * Some handy functions for Unicode-specific tests. */ static inline bool +is_valid_unicode_codepoint(pg_wchar c) +{ + return (c > 0 && c <= 0x10FFFF); +} + +static inline bool is_utf16_surrogate_first(pg_wchar c) { return (c >= 0xD800 && c <= 0xDBFF); @@ -603,6 +618,8 @@ extern char *pg_server_to_client(const char *s, int len); extern char *pg_any_to_server(const char *s, int len, int encoding); extern char *pg_server_to_any(const char *s, int len, int encoding); +extern void pg_unicode_to_server(pg_wchar c, unsigned char *s); + extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc); extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc); diff --git a/src/include/parser/scanner.h b/src/include/parser/scanner.h index 7a0e5e5d982..a27352afc14 100644 --- a/src/include/parser/scanner.h +++ b/src/include/parser/scanner.h @@ -99,9 +99,13 @@ typedef struct core_yy_extra_type int literallen; /* actual current string length */ int literalalloc; /* current allocated buffer size */ + /* + * Random assorted scanner state. + */ int state_before_str_stop; /* start cond. before end quote */ int xcdepth; /* depth of nesting in slash-star comments */ char *dolqstart; /* current $foo$ quote start string */ + YYLTYPE save_yylloc; /* one-element stack for PUSH_YYLLOC() */ /* first part of UTF16 surrogate pair for Unicode escapes */ int32 utf16_first_part; @@ -116,6 +120,14 @@ typedef struct core_yy_extra_type */ typedef void *core_yyscan_t; +/* Support for scanner_errposition_callback function */ +typedef struct ScannerCallbackState +{ + core_yyscan_t yyscanner; + int location; + ErrorContextCallback errcallback; +} ScannerCallbackState; + /* Constant data exported from parser/scan.l */ extern PGDLLIMPORT const uint16 ScanKeywordTokens[]; @@ -129,6 +141,10 @@ extern void scanner_finish(core_yyscan_t yyscanner); extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner); extern int scanner_errposition(int location, core_yyscan_t yyscanner); +extern void setup_scanner_errposition_callback(ScannerCallbackState *scbstate, + core_yyscan_t yyscanner, + int location); +extern void cancel_scanner_errposition_callback(ScannerCallbackState *scbstate); extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner) pg_attribute_noreturn(); #endif /* SCANNER_H */ diff --git a/src/test/regress/expected/json_encoding.out b/src/test/regress/expected/json_encoding.out index d8d34f4ff6a..f343f74fe18 100644 --- a/src/test/regress/expected/json_encoding.out +++ b/src/test/regress/expected/json_encoding.out @@ -1,4 +1,19 @@ +-- -- encoding-sensitive tests for json and jsonb +-- +-- We provide expected-results files for UTF8 (json_encoding.out) +-- and for SQL_ASCII (json_encoding_1.out). Skip otherwise. +SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII') + AS skip_test \gset +\if :skip_test +\quit +\endif +SELECT getdatabaseencoding(); -- just to label the results files + getdatabaseencoding +--------------------- + UTF8 +(1 row) + -- first json -- basic unicode input SELECT '"\u"'::json; -- ERROR, incomplete escape diff --git a/src/test/regress/expected/json_encoding_1.out b/src/test/regress/expected/json_encoding_1.out index 79ed78e1c5f..e2fc131b0fa 100644 --- a/src/test/regress/expected/json_encoding_1.out +++ b/src/test/regress/expected/json_encoding_1.out @@ -1,4 +1,19 @@ +-- -- encoding-sensitive tests for json and jsonb +-- +-- We provide expected-results files for UTF8 (json_encoding.out) +-- and for SQL_ASCII (json_encoding_1.out). Skip otherwise. +SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII') + AS skip_test \gset +\if :skip_test +\quit +\endif +SELECT getdatabaseencoding(); -- just to label the results files + getdatabaseencoding +--------------------- + SQL_ASCII +(1 row) + -- first json -- basic unicode input SELECT '"\u"'::json; -- ERROR, incomplete escape @@ -33,9 +48,7 @@ SELECT '"\uaBcD"'::json; -- OK, uppercase and lower case both OK -- handling of unicode surrogate pairs select json '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8; -ERROR: unsupported Unicode escape sequence -DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. -CONTEXT: JSON data, line 1: { "a":... +ERROR: conversion between UTF8 and SQL_ASCII is not supported select json '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row ERROR: invalid input syntax for type json DETAIL: Unicode high surrogate must not follow a high surrogate. @@ -84,9 +97,7 @@ select json '{ "a": "null \\u0000 escape" }' as not_an_escape; (1 row) select json '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; -ERROR: unsupported Unicode escape sequence -DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. -CONTEXT: JSON data, line 1: { "a":... +ERROR: conversion between UTF8 and SQL_ASCII is not supported select json '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; correct_everywhere -------------------- @@ -144,18 +155,14 @@ CONTEXT: JSON data, line 1: ... -- use octet_length here so we don't get an odd unicode char in the -- output SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK -ERROR: unsupported Unicode escape sequence +ERROR: conversion between UTF8 and SQL_ASCII is not supported LINE 1: SELECT octet_length('"\uaBcD"'::jsonb::text); ^ -DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. -CONTEXT: JSON data, line 1: ... -- handling of unicode surrogate pairs SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8; -ERROR: unsupported Unicode escape sequence +ERROR: conversion between UTF8 and SQL_ASCII is not supported LINE 1: SELECT octet_length((jsonb '{ "a": "\ud83d\ude04\ud83d\udc3... ^ -DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. -CONTEXT: JSON data, line 1: { "a":... SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row ERROR: invalid input syntax for type json LINE 1: SELECT jsonb '{ "a": "\ud83d\ud83d" }' -> 'a'; @@ -182,11 +189,9 @@ DETAIL: Unicode low surrogate must follow a high surrogate. CONTEXT: JSON data, line 1: { "a":... -- handling of simple unicode escapes SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as correct_in_utf8; -ERROR: unsupported Unicode escape sequence +ERROR: conversion between UTF8 and SQL_ASCII is not supported LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' as corr... ^ -DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. -CONTEXT: JSON data, line 1: { "a":... SELECT jsonb '{ "a": "dollar \u0024 character" }' as correct_everywhere; correct_everywhere ----------------------------- @@ -212,11 +217,9 @@ SELECT jsonb '{ "a": "null \\u0000 escape" }' as not_an_escape; (1 row) SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8; -ERROR: unsupported Unicode escape sequence +ERROR: conversion between UTF8 and SQL_ASCII is not supported LINE 1: SELECT jsonb '{ "a": "the Copyright \u00a9 sign" }' ->> 'a'... ^ -DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. -CONTEXT: JSON data, line 1: { "a":... SELECT jsonb '{ "a": "dollar \u0024 character" }' ->> 'a' as correct_everywhere; correct_everywhere -------------------- diff --git a/src/test/regress/expected/json_encoding_2.out b/src/test/regress/expected/json_encoding_2.out new file mode 100644 index 00000000000..4fc8f0241ab --- /dev/null +++ b/src/test/regress/expected/json_encoding_2.out @@ -0,0 +1,9 @@ +-- +-- encoding-sensitive tests for json and jsonb +-- +-- We provide expected-results files for UTF8 (json_encoding.out) +-- and for SQL_ASCII (json_encoding_1.out). Skip otherwise. +SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII') + AS skip_test \gset +\if :skip_test +\quit diff --git a/src/test/regress/expected/jsonpath_encoding.out b/src/test/regress/expected/jsonpath_encoding.out index ecffe095b59..7cbfb6abcf3 100644 --- a/src/test/regress/expected/jsonpath_encoding.out +++ b/src/test/regress/expected/jsonpath_encoding.out @@ -1,4 +1,19 @@ +-- -- encoding-sensitive tests for jsonpath +-- +-- We provide expected-results files for UTF8 (jsonpath_encoding.out) +-- and for SQL_ASCII (jsonpath_encoding_1.out). Skip otherwise. +SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII') + AS skip_test \gset +\if :skip_test +\quit +\endif +SELECT getdatabaseencoding(); -- just to label the results files + getdatabaseencoding +--------------------- + UTF8 +(1 row) + -- checks for double-quoted values -- basic unicode input SELECT '"\u"'::jsonpath; -- ERROR, incomplete escape diff --git a/src/test/regress/expected/jsonpath_encoding_1.out b/src/test/regress/expected/jsonpath_encoding_1.out index c8cc2173a8c..005136c9657 100644 --- a/src/test/regress/expected/jsonpath_encoding_1.out +++ b/src/test/regress/expected/jsonpath_encoding_1.out @@ -1,4 +1,19 @@ +-- -- encoding-sensitive tests for jsonpath +-- +-- We provide expected-results files for UTF8 (jsonpath_encoding.out) +-- and for SQL_ASCII (jsonpath_encoding_1.out). Skip otherwise. +SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII') + AS skip_test \gset +\if :skip_test +\quit +\endif +SELECT getdatabaseencoding(); -- just to label the results files + getdatabaseencoding +--------------------- + SQL_ASCII +(1 row) + -- checks for double-quoted values -- basic unicode input SELECT '"\u"'::jsonpath; -- ERROR, incomplete escape @@ -19,16 +34,14 @@ LINE 1: SELECT '"\u0000"'::jsonpath; ^ DETAIL: \u0000 cannot be converted to text. SELECT '"\uaBcD"'::jsonpath; -- OK, uppercase and lower case both OK -ERROR: invalid input syntax for type jsonpath +ERROR: conversion between UTF8 and SQL_ASCII is not supported LINE 1: SELECT '"\uaBcD"'::jsonpath; ^ -DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. -- handling of unicode surrogate pairs select '"\ud83d\ude04\ud83d\udc36"'::jsonpath as correct_in_utf8; -ERROR: invalid input syntax for type jsonpath +ERROR: conversion between UTF8 and SQL_ASCII is not supported LINE 1: select '"\ud83d\ude04\ud83d\udc36"'::jsonpath as correct_in_... ^ -DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. select '"\ud83d\ud83d"'::jsonpath; -- 2 high surrogates in a row ERROR: invalid input syntax for type jsonpath LINE 1: select '"\ud83d\ud83d"'::jsonpath; @@ -51,10 +64,9 @@ LINE 1: select '"\ude04X"'::jsonpath; DETAIL: Unicode low surrogate must follow a high surrogate. --handling of simple unicode escapes select '"the Copyright \u00a9 sign"'::jsonpath as correct_in_utf8; -ERROR: invalid input syntax for type jsonpath +ERROR: conversion between UTF8 and SQL_ASCII is not supported LINE 1: select '"the Copyright \u00a9 sign"'::jsonpath as correct_in... ^ -DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. select '"dollar \u0024 character"'::jsonpath as correct_everywhere; correct_everywhere ---------------------- @@ -98,16 +110,14 @@ LINE 1: SELECT '$."\u0000"'::jsonpath; ^ DETAIL: \u0000 cannot be converted to text. SELECT '$."\uaBcD"'::jsonpath; -- OK, uppercase and lower case both OK -ERROR: invalid input syntax for type jsonpath +ERROR: conversion between UTF8 and SQL_ASCII is not supported LINE 1: SELECT '$."\uaBcD"'::jsonpath; ^ -DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. -- handling of unicode surrogate pairs select '$."\ud83d\ude04\ud83d\udc36"'::jsonpath as correct_in_utf8; -ERROR: invalid input syntax for type jsonpath +ERROR: conversion between UTF8 and SQL_ASCII is not supported LINE 1: select '$."\ud83d\ude04\ud83d\udc36"'::jsonpath as correct_i... ^ -DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. select '$."\ud83d\ud83d"'::jsonpath; -- 2 high surrogates in a row ERROR: invalid input syntax for type jsonpath LINE 1: select '$."\ud83d\ud83d"'::jsonpath; @@ -130,10 +140,9 @@ LINE 1: select '$."\ude04X"'::jsonpath; DETAIL: Unicode low surrogate must follow a high surrogate. --handling of simple unicode escapes select '$."the Copyright \u00a9 sign"'::jsonpath as correct_in_utf8; -ERROR: invalid input syntax for type jsonpath +ERROR: conversion between UTF8 and SQL_ASCII is not supported LINE 1: select '$."the Copyright \u00a9 sign"'::jsonpath as correct_... ^ -DETAIL: Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8. select '$."dollar \u0024 character"'::jsonpath as correct_everywhere; correct_everywhere ------------------------ diff --git a/src/test/regress/expected/jsonpath_encoding_2.out b/src/test/regress/expected/jsonpath_encoding_2.out new file mode 100644 index 00000000000..bb71bfe72c4 --- /dev/null +++ b/src/test/regress/expected/jsonpath_encoding_2.out @@ -0,0 +1,9 @@ +-- +-- encoding-sensitive tests for jsonpath +-- +-- We provide expected-results files for UTF8 (jsonpath_encoding.out) +-- and for SQL_ASCII (jsonpath_encoding_1.out). Skip otherwise. +SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII') + AS skip_test \gset +\if :skip_test +\quit diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 60cb86193c7..6c4443afcf1 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -35,6 +35,12 @@ SELECT U&'d!0061t\+000061' UESCAPE '!' AS U&"d*0061t\+000061" UESCAPE '*'; dat\+000061 (1 row) +SELECT U&'a\\b' AS "a\b"; + a\b +----- + a\b +(1 row) + SELECT U&' \' UESCAPE '!' AS "tricky"; tricky -------- @@ -48,13 +54,15 @@ SELECT 'tricky' AS U&"\" UESCAPE '!'; (1 row) SELECT U&'wrong: \061'; -ERROR: invalid Unicode escape value +ERROR: invalid Unicode escape LINE 1: SELECT U&'wrong: \061'; ^ +HINT: Unicode escapes must be \XXXX or \+XXXXXX. SELECT U&'wrong: \+0061'; -ERROR: invalid Unicode escape value +ERROR: invalid Unicode escape LINE 1: SELECT U&'wrong: \+0061'; ^ +HINT: Unicode escapes must be \XXXX or \+XXXXXX. SELECT U&'wrong: +0061' UESCAPE +; ERROR: UESCAPE must be followed by a simple string literal at or near "+" LINE 1: SELECT U&'wrong: +0061' UESCAPE +; @@ -63,6 +71,77 @@ SELECT U&'wrong: +0061' UESCAPE '+'; ERROR: invalid Unicode escape character at or near "'+'" LINE 1: SELECT U&'wrong: +0061' UESCAPE '+'; ^ +SELECT U&'wrong: \db99'; +ERROR: invalid Unicode surrogate pair +LINE 1: SELECT U&'wrong: \db99'; + ^ +SELECT U&'wrong: \db99xy'; +ERROR: invalid Unicode surrogate pair +LINE 1: SELECT U&'wrong: \db99xy'; + ^ +SELECT U&'wrong: \db99\\'; +ERROR: invalid Unicode surrogate pair +LINE 1: SELECT U&'wrong: \db99\\'; + ^ +SELECT U&'wrong: \db99\0061'; +ERROR: invalid Unicode surrogate pair +LINE 1: SELECT U&'wrong: \db99\0061'; + ^ +SELECT U&'wrong: \+00db99\+000061'; +ERROR: invalid Unicode surrogate pair +LINE 1: SELECT U&'wrong: \+00db99\+000061'; + ^ +SELECT U&'wrong: \+2FFFFF'; +ERROR: invalid Unicode escape value +LINE 1: SELECT U&'wrong: \+2FFFFF'; + ^ +-- while we're here, check the same cases in E-style literals +SELECT E'd\u0061t\U00000061' AS "data"; + data +------ + data +(1 row) + +SELECT E'a\\b' AS "a\b"; + a\b +----- + a\b +(1 row) + +SELECT E'wrong: \u061'; +ERROR: invalid Unicode escape +LINE 1: SELECT E'wrong: \u061'; + ^ +HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX. +SELECT E'wrong: \U0061'; +ERROR: invalid Unicode escape +LINE 1: SELECT E'wrong: \U0061'; + ^ +HINT: Unicode escapes must be \uXXXX or \UXXXXXXXX. +SELECT E'wrong: \udb99'; +ERROR: invalid Unicode surrogate pair at or near "'" +LINE 1: SELECT E'wrong: \udb99'; + ^ +SELECT E'wrong: \udb99xy'; +ERROR: invalid Unicode surrogate pair at or near "x" +LINE 1: SELECT E'wrong: \udb99xy'; + ^ +SELECT E'wrong: \udb99\\'; +ERROR: invalid Unicode surrogate pair at or near "\" +LINE 1: SELECT E'wrong: \udb99\\'; + ^ +SELECT E'wrong: \udb99\u0061'; +ERROR: invalid Unicode surrogate pair at or near "\u0061" +LINE 1: SELECT E'wrong: \udb99\u0061'; + ^ +SELECT E'wrong: \U0000db99\U00000061'; +ERROR: invalid Unicode surrogate pair at or near "\U00000061" +LINE 1: SELECT E'wrong: \U0000db99\U00000061'; + ^ +SELECT E'wrong: \U002FFFFF'; +ERROR: invalid Unicode escape value at or near "\U002FFFFF" +LINE 1: SELECT E'wrong: \U002FFFFF'; + ^ SET standard_conforming_strings TO off; SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061"; ERROR: unsafe use of string constant with Unicode escapes diff --git a/src/test/regress/sql/json_encoding.sql b/src/test/regress/sql/json_encoding.sql index 87a2d564ff3..d7fac69733d 100644 --- a/src/test/regress/sql/json_encoding.sql +++ b/src/test/regress/sql/json_encoding.sql @@ -1,5 +1,16 @@ - +-- -- encoding-sensitive tests for json and jsonb +-- + +-- We provide expected-results files for UTF8 (json_encoding.out) +-- and for SQL_ASCII (json_encoding_1.out). Skip otherwise. +SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII') + AS skip_test \gset +\if :skip_test +\quit +\endif + +SELECT getdatabaseencoding(); -- just to label the results files -- first json diff --git a/src/test/regress/sql/jsonpath_encoding.sql b/src/test/regress/sql/jsonpath_encoding.sql index 3a23b728182..55d9e30b95c 100644 --- a/src/test/regress/sql/jsonpath_encoding.sql +++ b/src/test/regress/sql/jsonpath_encoding.sql @@ -1,5 +1,16 @@ - +-- -- encoding-sensitive tests for jsonpath +-- + +-- We provide expected-results files for UTF8 (jsonpath_encoding.out) +-- and for SQL_ASCII (jsonpath_encoding_1.out). Skip otherwise. +SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII') + AS skip_test \gset +\if :skip_test +\quit +\endif + +SELECT getdatabaseencoding(); -- just to label the results files -- checks for double-quoted values diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index c5cd15142a5..3e28cd198f4 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -21,6 +21,7 @@ SET standard_conforming_strings TO on; SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061"; SELECT U&'d!0061t\+000061' UESCAPE '!' AS U&"d*0061t\+000061" UESCAPE '*'; +SELECT U&'a\\b' AS "a\b"; SELECT U&' \' UESCAPE '!' AS "tricky"; SELECT 'tricky' AS U&"\" UESCAPE '!'; @@ -30,6 +31,25 @@ SELECT U&'wrong: \+0061'; SELECT U&'wrong: +0061' UESCAPE +; SELECT U&'wrong: +0061' UESCAPE '+'; +SELECT U&'wrong: \db99'; +SELECT U&'wrong: \db99xy'; +SELECT U&'wrong: \db99\\'; +SELECT U&'wrong: \db99\0061'; +SELECT U&'wrong: \+00db99\+000061'; +SELECT U&'wrong: \+2FFFFF'; + +-- while we're here, check the same cases in E-style literals +SELECT E'd\u0061t\U00000061' AS "data"; +SELECT E'a\\b' AS "a\b"; +SELECT E'wrong: \u061'; +SELECT E'wrong: \U0061'; +SELECT E'wrong: \udb99'; +SELECT E'wrong: \udb99xy'; +SELECT E'wrong: \udb99\\'; +SELECT E'wrong: \udb99\u0061'; +SELECT E'wrong: \U0000db99\U00000061'; +SELECT E'wrong: \U002FFFFF'; + SET standard_conforming_strings TO off; SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061"; |