1 files changed, 9 insertions, 36 deletions
diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l
index 70681b789d3..be0a2cfa2f7 100644
--- a/src/backend/utils/adt/jsonpath_scan.l
+++ b/src/backend/utils/adt/jsonpath_scan.l
@@ -486,13 +486,6 @@ hexval(char c)
 static void
 addUnicodeChar(int ch)
 {
-	/*
-	 * For UTF8, replace the escape sequence by the actual
-	 * utf8 character in lex->strval. Do this also for other
-	 * encodings if the escape designates an ASCII character,
-	 * otherwise raise an error.
-	 */
-
 	if (ch == 0)
 	{
 		/* We can't allow this, since our TEXT type doesn't */
@@ -501,40 +494,20 @@ addUnicodeChar(int ch)
 				 errmsg("unsupported Unicode escape sequence"),
 				  errdetail("\\u0000 cannot be converted to text.")));
 	}
-	else if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		char utf8str[5];
-		int utf8len;
-
-		unicode_to_utf8(ch, (unsigned char *) utf8str);
-		utf8len = pg_utf_mblen((unsigned char *) utf8str);
-		addstring(false, utf8str, utf8len);
-	}
-	else if (ch <= 0x007f)
-	{
-		/*
-		 * This is the only way to designate things like a
-		 * form feed character in JSON, so it's useful in all
-		 * encodings.
-		 */
-		addchar(false, (char) ch);
-	}
 	else
 	{
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-				 errmsg("invalid input syntax for type %s", "jsonpath"),
-				 errdetail("Unicode escape values cannot be used for code "
-						   "point values above 007F when the server encoding "
-						   "is not UTF8.")));
+		char		cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
+
+		pg_unicode_to_server(ch, (unsigned char *) cbuf);
+		addstring(false, cbuf, strlen(cbuf));
 	}
 }
 
-/* Add unicode character and process its hi surrogate */
+/* Add unicode character, processing any surrogate pairs */
 static void
 addUnicode(int ch, int *hi_surrogate)
 {
-	if (ch >= 0xd800 && ch <= 0xdbff)
+	if (is_utf16_surrogate_first(ch))
 	{
 		if (*hi_surrogate != -1)
 			ereport(ERROR,
@@ -542,10 +515,10 @@ addUnicode(int ch, int *hi_surrogate)
 					 errmsg("invalid input syntax for type %s", "jsonpath"),
 					 errdetail("Unicode high surrogate must not follow "
 							   "a high surrogate.")));
-		*hi_surrogate = (ch & 0x3ff) << 10;
+		*hi_surrogate = ch;
 		return;
 	}
-	else if (ch >= 0xdc00 && ch <= 0xdfff)
+	else if (is_utf16_surrogate_second(ch))
 	{
 		if (*hi_surrogate == -1)
 			ereport(ERROR,
@@ -553,7 +526,7 @@ addUnicode(int ch, int *hi_surrogate)
 					 errmsg("invalid input syntax for type %s", "jsonpath"),
 					 errdetail("Unicode low surrogate must follow a high "
 							   "surrogate.")));
-		ch = 0x10000 + *hi_surrogate + (ch & 0x3ff);
+		ch = surrogate_pair_to_codepoint(*hi_surrogate, ch);
 		*hi_surrogate = -1;
 	}
 	else if (*hi_surrogate != -1)