Fix jsonb Unicode escape processing, and in consequence disallow \u0000.

We've been trying to support \u0000 in JSON values since commit 78ed8e03c67d7333, and have introduced increasingly worse hacks to try to make it work, such as commit 0ad1a816320a2b53. However, it fundamentally can't work in the way envisioned, because the stored representation looks the same as for \\u0000 which is not the same thing at all. It's also entirely bogus to output \u0000 when de-escaped output is called for. The right way to do this would be to store an actual 0x00 byte, and then throw error only if asked to produce de-escaped textual output. However, getting to that point seems likely to take considerable work and may well never be practical in the 9.4.x series. To preserve our options for better behavior while getting rid of the nasty side-effects of 0ad1a816320a2b53, revert that commit in toto and instead throw error if \u0000 is used in a context where it needs to be de-escaped. (These are the same contexts where non-ASCII Unicode escapes throw error if the database encoding isn't UTF8, so this behavior is by no means without precedent.) In passing, make both the \u0000 case and the non-ASCII Unicode case report ERRCODE_UNTRANSLATABLE_CHARACTER / "unsupported Unicode escape sequence" rather than claiming there's something wrong with the input syntax. Back-patch to 9.4, where we have to do something because 0ad1a816320a2b53 broke things for many cases having nothing to do with \u0000. 9.3 also has bogus behavior, but only for that specific escape value, so given the lack of field complaints it seems better to leave 9.3 alone.
author: Tom Lane <tgl@sss.pgh.pa.us> 2015-01-30 14:44:46 -0500
committer: Tom Lane <tgl@sss.pgh.pa.us> 2015-01-30 14:44:56 -0500
commit: 451d2808151e56c2c70893b8c3ee72af393a9f1d (patch)
tree: 3380b5cfd00c81235c609a805a02b773ecf77d79 /src
parent: e40d43f88eb8617e7c1b3a03dec595efe6066f72 (diff)
download: postgresql-451d2808151e56c2c70893b8c3ee72af393a9f1d.tar.gz
postgresql-451d2808151e56c2c70893b8c3ee72af393a9f1d.zip
7 files changed, 239 insertions, 136 deletions
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
index 3c137ead1d0..951b6554007 100644
--- a/src/backend/utils/adt/json.c
+++ b/src/backend/utils/adt/json.c
@@ -806,14 +806,17 @@ json_lex_string(JsonLexContext *lex)
 					 * For UTF8, replace the escape sequence by the actual
 					 * utf8 character in lex->strval. Do this also for other
 					 * encodings if the escape designates an ASCII character,
-					 * otherwise raise an error. We don't ever unescape a
-					 * \u0000, since that would result in an impermissible nul
-					 * byte.
+					 * otherwise raise an error.
 					 */
 
 					if (ch == 0)
 					{
-						appendStringInfoString(lex->strval, "\\u0000");
+						/* We can't allow this, since our TEXT type doesn't */
+						ereport(ERROR,
+								(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+							   errmsg("unsupported Unicode escape sequence"),
+						   errdetail("\\u0000 cannot be converted to text."),
+								 report_json_context(lex)));
 					}
 					else if (GetDatabaseEncoding() == PG_UTF8)
 					{
@@ -833,8 +836,8 @@ json_lex_string(JsonLexContext *lex)
 					else
 					{
 						ereport(ERROR,
-								(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-								 errmsg("invalid input syntax for type json"),
+								(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+							   errmsg("unsupported Unicode escape sequence"),
 								 errdetail("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8."),
 								 report_json_context(lex)));
 					}
@@ -1284,8 +1287,8 @@ json_categorize_type(Oid typoid,
 
 	/*
 	 * We need to get the output function for everything except date and
-	 * timestamp types, array and composite types, booleans,
-	 * and non-builtin types  where there's a cast to json.
+	 * timestamp types, array and composite types, booleans, and non-builtin
+	 * types where there's a cast to json.
 	 */
 
 	switch (typoid)
@@ -1335,11 +1338,12 @@ json_categorize_type(Oid typoid,
 				/* but let's look for a cast to json, if it's not built-in */
 				if (typoid >= FirstNormalObjectId)
 				{
-					Oid castfunc;
+					Oid			castfunc;
 					CoercionPathType ctype;
 
 					ctype = find_coercion_pathway(JSONOID, typoid,
-												  COERCION_EXPLICIT, &castfunc);
+												  COERCION_EXPLICIT,
+												  &castfunc);
 					if (ctype == COERCION_PATH_FUNC && OidIsValid(castfunc))
 					{
 						*tcategory = JSONTYPE_CAST;
@@ -2382,30 +2386,7 @@ escape_json(StringInfo buf, const char *str)
 				appendStringInfoString(buf, "\\\"");
 				break;
 			case '\\':
-
-				/*
-				 * Unicode escapes are passed through as is. There is no
-				 * requirement that they denote a valid character in the
-				 * server encoding - indeed that is a big part of their
-				 * usefulness.
-				 *
-				 * All we require is that they consist of \uXXXX where the Xs
-				 * are hexadecimal digits. It is the responsibility of the
-				 * caller of, say, to_json() to make sure that the unicode
-				 * escape is valid.
-				 *
-				 * In the case of a jsonb string value being escaped, the only
-				 * unicode escape that should be present is \u0000, all the
-				 * other unicode escapes will have been resolved.
-				 */
-				if (p[1] == 'u' &&
-					isxdigit((unsigned char) p[2]) &&
-					isxdigit((unsigned char) p[3]) &&
-					isxdigit((unsigned char) p[4]) &&
-					isxdigit((unsigned char) p[5]))
-					appendStringInfoCharMacro(buf, *p);
-				else
-					appendStringInfoString(buf, "\\\\");
+				appendStringInfoString(buf, "\\\\");
 				break;
 			default:
 				if ((unsigned char) *p < ' ')
diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out
index e435d3e1650..16704363dc6 100644
--- a/src/test/regress/expected/json.out
+++ b/src/test/regress/expected/json.out
@@ -426,20 +426,6 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
 (1 row)
 
 COMMIT;
--- unicode escape - backslash is not escaped
-select to_json(text '\uabcd');
- to_json  
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_json(text '\abcd');
- to_json  
-----------
- "\\abcd"
-(1 row)
-
 --json_agg
 SELECT json_agg(q)
   FROM ( SELECT $$a$$ || x AS b, y AS c,
@@ -1400,6 +1386,36 @@ ERROR:  invalid input syntax for type json
 DETAIL:  Unicode low surrogate must follow a high surrogate.
 CONTEXT:  JSON data, line 1: { "a":...
 --handling of simple unicode escapes
+select json '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+            correct_in_utf8            
+---------------------------------------
+ { "a":  "the Copyright \u00a9 sign" }
+(1 row)
+
+select json '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+         correct_everywhere          
+-------------------------------------
+ { "a":  "dollar \u0024 character" }
+(1 row)
+
+select json '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+            not_an_escape             
+--------------------------------------
+ { "a":  "dollar \\u0024 character" }
+(1 row)
+
+select json '{ "a":  "null \u0000 escape" }' as not_unescaped;
+         not_unescaped          
+--------------------------------
+ { "a":  "null \u0000 escape" }
+(1 row)
+
+select json '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+          not_an_escape          
+---------------------------------
+ { "a":  "null \\u0000 escape" }
+(1 row)
+
 select json '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
    correct_in_utf8    
 ----------------------
@@ -1412,8 +1428,18 @@ select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
  dollar $ character
 (1 row)
 
-select json '{ "a":  "null \u0000 escape" }' ->> 'a' as not_unescaped;
-   not_unescaped    
+select json '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+      not_an_escape      
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+select json '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+ERROR:  unsupported Unicode escape sequence
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+select json '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+   not_an_escape    
 --------------------
  null \u0000 escape
 (1 row)
diff --git a/src/test/regress/expected/json_1.out b/src/test/regress/expected/json_1.out
index 106b481fab9..807814641dd 100644
--- a/src/test/regress/expected/json_1.out
+++ b/src/test/regress/expected/json_1.out
@@ -426,20 +426,6 @@ select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
 (1 row)
 
 COMMIT;
--- unicode escape - backslash is not escaped
-select to_json(text '\uabcd');
- to_json  
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_json(text '\abcd');
- to_json  
-----------
- "\\abcd"
-(1 row)
-
 --json_agg
 SELECT json_agg(q)
   FROM ( SELECT $$a$$ || x AS b, y AS c,
@@ -1378,7 +1364,7 @@ select * from json_populate_recordset(row('def',99,null)::jpop,'[{"a":[100,200,3
 
 -- handling of unicode surrogate pairs
 select json '{ "a":  "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
-ERROR:  invalid input syntax for type json
+ERROR:  unsupported Unicode escape sequence
 DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
 CONTEXT:  JSON data, line 1: { "a":...
 select json '{ "a":  "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
@@ -1398,8 +1384,38 @@ ERROR:  invalid input syntax for type json
 DETAIL:  Unicode low surrogate must follow a high surrogate.
 CONTEXT:  JSON data, line 1: { "a":...
 --handling of simple unicode escapes
+select json '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+            correct_in_utf8            
+---------------------------------------
+ { "a":  "the Copyright \u00a9 sign" }
+(1 row)
+
+select json '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+         correct_everywhere          
+-------------------------------------
+ { "a":  "dollar \u0024 character" }
+(1 row)
+
+select json '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+            not_an_escape             
+--------------------------------------
+ { "a":  "dollar \\u0024 character" }
+(1 row)
+
+select json '{ "a":  "null \u0000 escape" }' as not_unescaped;
+         not_unescaped          
+--------------------------------
+ { "a":  "null \u0000 escape" }
+(1 row)
+
+select json '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+          not_an_escape          
+---------------------------------
+ { "a":  "null \\u0000 escape" }
+(1 row)
+
 select json '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
-ERROR:  invalid input syntax for type json
+ERROR:  unsupported Unicode escape sequence
 DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
 CONTEXT:  JSON data, line 1: { "a":...
 select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
@@ -1408,8 +1424,18 @@ select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
  dollar $ character
 (1 row)
 
-select json '{ "a":  "null \u0000 escape" }' ->> 'a' as not_unescaped;
-   not_unescaped    
+select json '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+      not_an_escape      
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+select json '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+ERROR:  unsupported Unicode escape sequence
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+select json '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+   not_an_escape    
 --------------------
  null \u0000 escape
 (1 row)
diff --git a/src/test/regress/expected/jsonb.out b/src/test/regress/expected/jsonb.out
index aa5686ffb69..6c6ed950f08 100644
--- a/src/test/regress/expected/jsonb.out
+++ b/src/test/regress/expected/jsonb.out
@@ -60,12 +60,18 @@ LINE 1: SELECT '"\u000g"'::jsonb;
                ^
 DETAIL:  "\u" must be followed by four hexadecimal digits.
 CONTEXT:  JSON data, line 1: "\u000g...
-SELECT '"\u0000"'::jsonb;		-- OK, legal escape
-  jsonb   
-----------
- "\u0000"
+SELECT '"\u0045"'::jsonb;		-- OK, legal escape
+ jsonb 
+-------
+ "E"
 (1 row)
 
+SELECT '"\u0000"'::jsonb;		-- ERROR, we don't support U+0000
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT '"\u0000"'::jsonb;
+               ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: ...
 -- use octet_length here so we don't get an odd unicode char in the
 -- output
 SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -324,20 +330,6 @@ select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
 (1 row)
 
 COMMIT;
--- unicode escape - backslash is not escaped
-select to_jsonb(text '\uabcd');
- to_jsonb 
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_jsonb(text '\abcd');
- to_jsonb 
-----------
- "\\abcd"
-(1 row)
-
 --jsonb_agg
 CREATE TEMP TABLE rows AS
 SELECT x, 'txt' || x as y
@@ -1971,20 +1963,62 @@ LINE 1: SELECT jsonb '{ "a":  "\ude04X" }' -> 'a';
 DETAIL:  Unicode low surrogate must follow a high surrogate.
 CONTEXT:  JSON data, line 1: { "a":...
 -- handling of simple unicode escapes
-SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8;
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+        correct_in_utf8        
+-------------------------------
+ {"a": "the Copyright © sign"}
+(1 row)
+
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+     correct_everywhere      
+-----------------------------
+ {"a": "dollar $ character"}
+(1 row)
+
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+           not_an_escape           
+-----------------------------------
+ {"a": "dollar \\u0024 character"}
+(1 row)
+
+SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+                     ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+        not_an_escape         
+------------------------------
+ {"a": "null \\u0000 escape"}
+(1 row)
+
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
    correct_in_utf8    
 ----------------------
  the Copyright © sign
 (1 row)
 
-SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE;
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
  correct_everywhere 
 --------------------
  dollar $ character
 (1 row)
 
-SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' AS not_unescaped;
-   not_unescaped    
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+      not_an_escape      
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fai...
+                     ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+   not_an_escape    
 --------------------
  null \u0000 escape
 (1 row)
diff --git a/src/test/regress/expected/jsonb_1.out b/src/test/regress/expected/jsonb_1.out
index 687ae63b707..f30148d51c1 100644
--- a/src/test/regress/expected/jsonb_1.out
+++ b/src/test/regress/expected/jsonb_1.out
@@ -60,16 +60,22 @@ LINE 1: SELECT '"\u000g"'::jsonb;
                ^
 DETAIL:  "\u" must be followed by four hexadecimal digits.
 CONTEXT:  JSON data, line 1: "\u000g...
-SELECT '"\u0000"'::jsonb;		-- OK, legal escape
-  jsonb   
-----------
- "\u0000"
+SELECT '"\u0045"'::jsonb;		-- OK, legal escape
+ jsonb 
+-------
+ "E"
 (1 row)
 
+SELECT '"\u0000"'::jsonb;		-- ERROR, we don't support U+0000
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT '"\u0000"'::jsonb;
+               ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: ...
 -- use octet_length here so we don't get an odd unicode char in the
 -- output
 SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
-ERROR:  invalid input syntax for type json
+ERROR:  unsupported Unicode escape sequence
 LINE 1: SELECT octet_length('"\uaBcD"'::jsonb::text);
                             ^
 DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
@@ -324,20 +330,6 @@ select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
 (1 row)
 
 COMMIT;
--- unicode escape - backslash is not escaped
-select to_jsonb(text '\uabcd');
- to_jsonb 
-----------
- "\uabcd"
-(1 row)
-
--- any other backslash is escaped
-select to_jsonb(text '\abcd');
- to_jsonb 
-----------
- "\\abcd"
-(1 row)
-
 --jsonb_agg
 CREATE TEMP TABLE rows AS
 SELECT x, 'txt' || x as y
@@ -1941,7 +1933,7 @@ SELECT * FROM jsonb_populate_recordset(row('def',99,NULL)::jbpop,'[{"a":[100,200
 
 -- handling of unicode surrogate pairs
 SELECT octet_length((jsonb '{ "a":  "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8;
-ERROR:  invalid input syntax for type json
+ERROR:  unsupported Unicode escape sequence
 LINE 1: SELECT octet_length((jsonb '{ "a":  "\ud83d\ude04\ud83d\udc3...
                                    ^
 DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
@@ -1971,20 +1963,62 @@ LINE 1: SELECT jsonb '{ "a":  "\ude04X" }' -> 'a';
 DETAIL:  Unicode low surrogate must follow a high surrogate.
 CONTEXT:  JSON data, line 1: { "a":...
 -- handling of simple unicode escapes
-SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8;
-ERROR:  invalid input syntax for type json
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as corr...
+                     ^
+DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+     correct_everywhere      
+-----------------------------
+ {"a": "dollar $ character"}
+(1 row)
+
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+           not_an_escape           
+-----------------------------------
+ {"a": "dollar \\u0024 character"}
+(1 row)
+
+SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+                     ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+        not_an_escape         
+------------------------------
+ {"a": "null \\u0000 escape"}
+(1 row)
+
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
+ERROR:  unsupported Unicode escape sequence
 LINE 1: SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a'...
                      ^
 DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
 CONTEXT:  JSON data, line 1: { "a":...
-SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE;
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
  correct_everywhere 
 --------------------
  dollar $ character
 (1 row)
 
-SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' AS not_unescaped;
-   not_unescaped    
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+      not_an_escape      
+-------------------------
+ dollar \u0024 character
+(1 row)
+
+SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+ERROR:  unsupported Unicode escape sequence
+LINE 1: SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fai...
+                     ^
+DETAIL:  \u0000 cannot be converted to text.
+CONTEXT:  JSON data, line 1: { "a":...
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
+   not_an_escape    
 --------------------
  null \u0000 escape
 (1 row)
diff --git a/src/test/regress/sql/json.sql b/src/test/regress/sql/json.sql
index 36a6674ff91..53a37a88439 100644
--- a/src/test/regress/sql/json.sql
+++ b/src/test/regress/sql/json.sql
@@ -111,14 +111,6 @@ SET LOCAL TIME ZONE -8;
 select to_json(timestamptz '2014-05-28 12:22:35.614298-04');
 COMMIT;
 
--- unicode escape - backslash is not escaped
-
-select to_json(text '\uabcd');
-
--- any other backslash is escaped
-
-select to_json(text '\abcd');
-
 --json_agg
 
 SELECT json_agg(q)
@@ -401,9 +393,17 @@ select json '{ "a":  "\ude04X" }' -> 'a'; -- orphan low surrogate
 
 --handling of simple unicode escapes
 
+select json '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+select json '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+select json '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+select json '{ "a":  "null \u0000 escape" }' as not_unescaped;
+select json '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+
 select json '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
 select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
-select json '{ "a":  "null \u0000 escape" }' ->> 'a' as not_unescaped;
+select json '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+select json '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+select json '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
 
 --json_typeof() function
 select value, json_typeof(value)
diff --git a/src/test/regress/sql/jsonb.sql b/src/test/regress/sql/jsonb.sql
index a8461039330..53cc2393c62 100644
--- a/src/test/regress/sql/jsonb.sql
+++ b/src/test/regress/sql/jsonb.sql
@@ -10,7 +10,8 @@ SELECT '"\v"'::jsonb;			-- ERROR, not a valid JSON escape
 SELECT '"\u"'::jsonb;			-- ERROR, incomplete escape
 SELECT '"\u00"'::jsonb;			-- ERROR, incomplete escape
 SELECT '"\u000g"'::jsonb;		-- ERROR, g is not a hex digit
-SELECT '"\u0000"'::jsonb;		-- OK, legal escape
+SELECT '"\u0045"'::jsonb;		-- OK, legal escape
+SELECT '"\u0000"'::jsonb;		-- ERROR, we don't support U+0000
 -- use octet_length here so we don't get an odd unicode char in the
 -- output
 SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
@@ -73,14 +74,6 @@ SET LOCAL TIME ZONE -8;
 select to_jsonb(timestamptz '2014-05-28 12:22:35.614298-04');
 COMMIT;
 
--- unicode escape - backslash is not escaped
-
-select to_jsonb(text '\uabcd');
-
--- any other backslash is escaped
-
-select to_jsonb(text '\abcd');
-
 --jsonb_agg
 
 CREATE TEMP TABLE rows AS
@@ -488,9 +481,18 @@ SELECT jsonb '{ "a":  "\ud83dX" }' -> 'a'; -- orphan high surrogate
 SELECT jsonb '{ "a":  "\ude04X" }' -> 'a'; -- orphan low surrogate
 
 -- handling of simple unicode escapes
-SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' AS correct_in_utf8;
-SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' AS correct_everyWHERE;
-SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' AS not_unescaped;
+
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' as not_an_escape;
+SELECT jsonb '{ "a":  "null \u0000 escape" }' as fails;
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' as not_an_escape;
+
+SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
+SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
+SELECT jsonb '{ "a":  "dollar \\u0024 character" }' ->> 'a' as not_an_escape;
+SELECT jsonb '{ "a":  "null \u0000 escape" }' ->> 'a' as fails;
+SELECT jsonb '{ "a":  "null \\u0000 escape" }' ->> 'a' as not_an_escape;
 
 -- jsonb_to_record and jsonb_to_recordset
author	Tom Lane <tgl@sss.pgh.pa.us>	2015-01-30 14:44:46 -0500
committer	Tom Lane <tgl@sss.pgh.pa.us>	2015-01-30 14:44:56 -0500
commit	451d2808151e56c2c70893b8c3ee72af393a9f1d (patch)
tree	3380b5cfd00c81235c609a805a02b773ecf77d79 /src
parent	e40d43f88eb8617e7c1b3a03dec595efe6066f72 (diff)
download	postgresql-451d2808151e56c2c70893b8c3ee72af393a9f1d.tar.gz postgresql-451d2808151e56c2c70893b8c3ee72af393a9f1d.zip