18 files changed, 557 insertions, 162 deletions
diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c
index 1bf1144c4fd..be86eb37fef 100644
--- a/src/backend/parser/parser.c
+++ b/src/backend/parser/parser.c
@@ -292,22 +292,14 @@ hexval(unsigned char c)
 	return 0;					/* not reached */
 }
 
-/* is Unicode code point acceptable in database's encoding? */
+/* is Unicode code point acceptable? */
 static void
-check_unicode_value(pg_wchar c, int pos, core_yyscan_t yyscanner)
+check_unicode_value(pg_wchar c)
 {
-	/* See also addunicode() in scan.l */
-	if (c == 0 || c > 0x10FFFF)
+	if (!is_valid_unicode_codepoint(c))
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
-				 errmsg("invalid Unicode escape value"),
-				 scanner_errposition(pos, yyscanner)));
-
-	if (c > 0x7F && GetDatabaseEncoding() != PG_UTF8)
-		ereport(ERROR,
-				(errcode(ERRCODE_SYNTAX_ERROR),
-				 errmsg("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8"),
-				 scanner_errposition(pos, yyscanner)));
+				 errmsg("invalid Unicode escape value")));
 }
 
 /* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */
@@ -338,20 +330,39 @@ str_udeescape(const char *str, char escape,
 	const char *in;
 	char	   *new,
 			   *out;
+	size_t		new_len;
 	pg_wchar	pair_first = 0;
+	ScannerCallbackState scbstate;
 
 	/*
-	 * This relies on the subtle assumption that a UTF-8 expansion cannot be
-	 * longer than its escaped representation.
+	 * Guesstimate that result will be no longer than input, but allow enough
+	 * padding for Unicode conversion.
 	 */
-	new = palloc(strlen(str) + 1);
+	new_len = strlen(str) + MAX_UNICODE_EQUIVALENT_STRING + 1;
+	new = palloc(new_len);
 
 	in = str;
 	out = new;
 	while (*in)
 	{
+		/* Enlarge string if needed */
+		size_t		out_dist = out - new;
+
+		if (out_dist > new_len - (MAX_UNICODE_EQUIVALENT_STRING + 1))
+		{
+			new_len *= 2;
+			new = repalloc(new, new_len);
+			out = new + out_dist;
+		}
+
 		if (in[0] == escape)
 		{
+			/*
+			 * Any errors reported while processing this escape sequence will
+			 * have an error cursor pointing at the escape.
+			 */
+			setup_scanner_errposition_callback(&scbstate, yyscanner,
+											   in - str + position + 3);	/* 3 for U&" */
 			if (in[1] == escape)
 			{
 				if (pair_first)
@@ -370,9 +381,7 @@ str_udeescape(const char *str, char escape,
 					(hexval(in[2]) << 8) +
 					(hexval(in[3]) << 4) +
 					hexval(in[4]);
-				check_unicode_value(unicode,
-									in - str + position + 3,	/* 3 for U&" */
-									yyscanner);
+				check_unicode_value(unicode);
 				if (pair_first)
 				{
 					if (is_utf16_surrogate_second(unicode))
@@ -390,8 +399,8 @@ str_udeescape(const char *str, char escape,
 					pair_first = unicode;
 				else
 				{
-					unicode_to_utf8(unicode, (unsigned char *) out);
-					out += pg_mblen(out);
+					pg_unicode_to_server(unicode, (unsigned char *) out);
+					out += strlen(out);
 				}
 				in += 5;
 			}
@@ -411,9 +420,7 @@ str_udeescape(const char *str, char escape,
 					(hexval(in[5]) << 8) +
 					(hexval(in[6]) << 4) +
 					hexval(in[7]);
-				check_unicode_value(unicode,
-									in - str + position + 3,	/* 3 for U&" */
-									yyscanner);
+				check_unicode_value(unicode);
 				if (pair_first)
 				{
 					if (is_utf16_surrogate_second(unicode))
@@ -431,17 +438,18 @@ str_udeescape(const char *str, char escape,
 					pair_first = unicode;
 				else
 				{
-					unicode_to_utf8(unicode, (unsigned char *) out);
-					out += pg_mblen(out);
+					pg_unicode_to_server(unicode, (unsigned char *) out);
+					out += strlen(out);
 				}
 				in += 8;
 			}
 			else
 				ereport(ERROR,
 						(errcode(ERRCODE_SYNTAX_ERROR),
-						 errmsg("invalid Unicode escape value"),
-						 scanner_errposition(in - str + position + 3,	/* 3 for U&" */
-											 yyscanner)));
+						 errmsg("invalid Unicode escape"),
+						 errhint("Unicode escapes must be \\XXXX or \\+XXXXXX.")));
+
+			cancel_scanner_errposition_callback(&scbstate);
 		}
 		else
 		{
@@ -457,15 +465,13 @@ str_udeescape(const char *str, char escape,
 		goto invalid_pair;
 
 	*out = '\0';
+	return new;
 
 	/*
-	 * We could skip pg_verifymbstr if we didn't process any non-7-bit-ASCII
-	 * codes; but it's probably not worth the trouble, since this isn't likely
-	 * to be a performance-critical path.
+	 * We might get here with the error callback active, or not.  Call
+	 * scanner_errposition to make sure an error cursor appears; if the
+	 * callback is active, this is duplicative but harmless.
 	 */
-	pg_verifymbstr(new, out - new, false);
-	return new;
-
 invalid_pair:
 	ereport(ERROR,
 			(errcode(ERRCODE_SYNTAX_ERROR),
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 84c73914a85..b1ea0cb5384 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -106,6 +106,18 @@ const uint16 ScanKeywordTokens[] = {
  */
 #define ADVANCE_YYLLOC(delta)  ( *(yylloc) += (delta) )
 
+/*
+ * Sometimes, we do want yylloc to point into the middle of a token; this is
+ * useful for instance to throw an error about an escape sequence within a
+ * string literal.  But if we find no error there, we want to revert yylloc
+ * to the token start, so that that's the location reported to the parser.
+ * Use PUSH_YYLLOC/POP_YYLLOC to save/restore yylloc around such code.
+ * (Currently the implied "stack" is just one location, but someday we might
+ * need to nest these.)
+ */
+#define PUSH_YYLLOC()	(yyextra->save_yylloc = *(yylloc))
+#define POP_YYLLOC()	(*(yylloc) = yyextra->save_yylloc)
+
 #define startlit()	( yyextra->literallen = 0 )
 static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner);
 static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
@@ -605,8 +617,18 @@ other			.
 <xe>{xeunicode} {
 					pg_wchar	c = strtoul(yytext + 2, NULL, 16);
 
+					/*
+					 * For consistency with other productions, issue any
+					 * escape warning with cursor pointing to start of string.
+					 * We might want to change that, someday.
+					 */
 					check_escape_warning(yyscanner);
 
+					/* Remember start of overall string token ... */
+					PUSH_YYLLOC();
+					/* ... and set the error cursor to point at this esc seq */
+					SET_YYLLOC();
+
 					if (is_utf16_surrogate_first(c))
 					{
 						yyextra->utf16_first_part = c;
@@ -616,10 +638,18 @@ other			.
 						yyerror("invalid Unicode surrogate pair");
 					else
 						addunicode(c, yyscanner);
+
+					/* Restore yylloc to be start of string token */
+					POP_YYLLOC();
 				}
 <xeu>{xeunicode} {
 					pg_wchar	c = strtoul(yytext + 2, NULL, 16);
 
+					/* Remember start of overall string token ... */
+					PUSH_YYLLOC();
+					/* ... and set the error cursor to point at this esc seq */
+					SET_YYLLOC();
+
 					if (!is_utf16_surrogate_second(c))
 						yyerror("invalid Unicode surrogate pair");
 
@@ -627,12 +657,21 @@ other			.
 
 					addunicode(c, yyscanner);
 
+					/* Restore yylloc to be start of string token */
+					POP_YYLLOC();
+
 					BEGIN(xe);
 				}
-<xeu>.			{ yyerror("invalid Unicode surrogate pair"); }
-<xeu>\n			{ yyerror("invalid Unicode surrogate pair"); }
-<xeu><<EOF>>	{ yyerror("invalid Unicode surrogate pair"); }
+<xeu>. |
+<xeu>\n |
+<xeu><<EOF>>	{
+					/* Set the error cursor to point at missing esc seq */
+					SET_YYLLOC();
+					yyerror("invalid Unicode surrogate pair");
+				}
 <xe,xeu>{xeunicodefail}	{
+					/* Set the error cursor to point at malformed esc seq */
+					SET_YYLLOC();
 					ereport(ERROR,
 							(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
 							 errmsg("invalid Unicode escape"),
@@ -1029,12 +1068,13 @@ other			.
  * scanner_errposition
  *		Report a lexer or grammar error cursor position, if possible.
  *
- * This is expected to be used within an ereport() call.  The return value
+ * This is expected to be used within an ereport() call, or via an error
+ * callback such as setup_scanner_errposition_callback().  The return value
  * is a dummy (always 0, in fact).
  *
  * Note that this can only be used for messages emitted during raw parsing
- * (essentially, scan.l and gram.y), since it requires the yyscanner struct
- * to still be available.
+ * (essentially, scan.l, parser.c, and gram.y), since it requires the
+ * yyscanner struct to still be available.
  */
 int
 scanner_errposition(int location, core_yyscan_t yyscanner)
@@ -1051,6 +1091,62 @@ scanner_errposition(int location, core_yyscan_t yyscanner)
 }
 
 /*
+ * Error context callback for inserting scanner error location.
+ *
+ * Note that this will be called for *any* error occurring while the
+ * callback is installed.  We avoid inserting an irrelevant error location
+ * if the error is a query cancel --- are there any other important cases?
+ */
+static void
+scb_error_callback(void *arg)
+{
+	ScannerCallbackState *scbstate = (ScannerCallbackState *) arg;
+
+	if (geterrcode() != ERRCODE_QUERY_CANCELED)
+		(void) scanner_errposition(scbstate->location, scbstate->yyscanner);
+}
+
+/*
+ * setup_scanner_errposition_callback
+ *		Arrange for non-scanner errors to report an error position
+ *
+ * Sometimes the scanner calls functions that aren't part of the scanner
+ * subsystem and can't reasonably be passed the yyscanner pointer; yet
+ * we would like any errors thrown in those functions to be tagged with an
+ * error location.  Use this function to set up an error context stack
+ * entry that will accomplish that.  Usage pattern:
+ *
+ *		declare a local variable "ScannerCallbackState scbstate"
+ *		...
+ *		setup_scanner_errposition_callback(&scbstate, yyscanner, location);
+ *		call function that might throw error;
+ *		cancel_scanner_errposition_callback(&scbstate);
+ */
+void
+setup_scanner_errposition_callback(ScannerCallbackState *scbstate,
+								   core_yyscan_t yyscanner,
+								   int location)
+{
+	/* Setup error traceback support for ereport() */
+	scbstate->yyscanner = yyscanner;
+	scbstate->location = location;
+	scbstate->errcallback.callback = scb_error_callback;
+	scbstate->errcallback.arg = (void *) scbstate;
+	scbstate->errcallback.previous = error_context_stack;
+	error_context_stack = &scbstate->errcallback;
+}
+
+/*
+ * Cancel a previously-set-up errposition callback.
+ */
+void
+cancel_scanner_errposition_callback(ScannerCallbackState *scbstate)
+{
+	/* Pop the error context stack */
+	error_context_stack = scbstate->errcallback.previous;
+}
+
+/*
  * scanner_yyerror
  *		Report a lexer or grammar error.
  *
@@ -1226,19 +1322,20 @@ process_integer_literal(const char *token, YYSTYPE *lval)
 static void
 addunicode(pg_wchar c, core_yyscan_t yyscanner)
 {
-	char		buf[8];
+	ScannerCallbackState scbstate;
+	char		buf[MAX_UNICODE_EQUIVALENT_STRING + 1];
 
-	/* See also check_unicode_value() in parser.c */
-	if (c == 0 || c > 0x10FFFF)
+	if (!is_valid_unicode_codepoint(c))
 		yyerror("invalid Unicode escape value");
-	if (c > 0x7F)
-	{
-		if (GetDatabaseEncoding() != PG_UTF8)
-			yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8");
-		yyextra->saw_non_ascii = true;
-	}
-	unicode_to_utf8(c, (unsigned char *) buf);
-	addlit(buf, pg_mblen(buf), yyscanner);
+
+	/*
+	 * We expect that pg_unicode_to_server() will complain about any
+	 * unconvertible code point, so we don't have to set saw_non_ascii.
+	 */
+	setup_scanner_errposition_callback(&scbstate, yyscanner, *(yylloc));
+	pg_unicode_to_server(c, (unsigned char *) buf);
+	cancel_scanner_errposition_callback(&scbstate);
+	addlit(buf, strlen(buf), yyscanner);
 }
 
 static unsigned char
diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l
index 70681b789d3..be0a2cfa2f7 100644
--- a/src/backend/utils/adt/jsonpath_scan.l
+++ b/src/backend/utils/adt/jsonpath_scan.l
@@ -486,13 +486,6 @@ hexval(char c)
 static void
 addUnicodeChar(int ch)
 {
-	/*
-	 * For UTF8, replace the escape sequence by the actual
-	 * utf8 character in lex->strval. Do this also for other
-	 * encodings if the escape designates an ASCII character,
-	 * otherwise raise an error.
-	 */
-
 	if (ch == 0)
 	{
 		/* We can't allow this, since our TEXT type doesn't */
@@ -501,40 +494,20 @@ addUnicodeChar(int ch)
 				 errmsg("unsupported Unicode escape sequence"),
 				  errdetail("\\u0000 cannot be converted to text.")));
 	}
-	else if (GetDatabaseEncoding() == PG_UTF8)
-	{
-		char utf8str[5];
-		int utf8len;
-
-		unicode_to_utf8(ch, (unsigned char *) utf8str);
-		utf8len = pg_utf_mblen((unsigned char *) utf8str);
-		addstring(false, utf8str, utf8len);
-	}
-	else if (ch <= 0x007f)
-	{
-		/*
-		 * This is the only way to designate things like a
-		 * form feed character in JSON, so it's useful in all
-		 * encodings.
-		 */
-		addchar(false, (char) ch);
-	}
 	else
 	{
-		ereport(ERROR,
-				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
-				 errmsg("invalid input syntax for type %s", "jsonpath"),
-				 errdetail("Unicode escape values cannot be used for code "
-						   "point values above 007F when the server encoding "
-						   "is not UTF8.")));
+		char		cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
+
+		pg_unicode_to_server(ch, (unsigned char *) cbuf);
+		addstring(false, cbuf, strlen(cbuf));
 	}
 }
 
-/* Add unicode character and process its hi surrogate */
+/* Add unicode character, processing any surrogate pairs */
 static void
 addUnicode(int ch, int *hi_surrogate)
 {
-	if (ch >= 0xd800 && ch <= 0xdbff)
+	if (is_utf16_surrogate_first(ch))
 	{
 		if (*hi_surrogate != -1)
 			ereport(ERROR,
@@ -542,10 +515,10 @@ addUnicode(int ch, int *hi_surrogate)
 					 errmsg("invalid input syntax for type %s", "jsonpath"),
 					 errdetail("Unicode high surrogate must not follow "
 							   "a high surrogate.")));
-		*hi_surrogate = (ch & 0x3ff) << 10;
+		*hi_surrogate = ch;
 		return;
 	}
-	else if (ch >= 0xdc00 && ch <= 0xdfff)
+	else if (is_utf16_surrogate_second(ch))
 	{
 		if (*hi_surrogate == -1)
 			ereport(ERROR,
@@ -553,7 +526,7 @@ addUnicode(int ch, int *hi_surrogate)
 					 errmsg("invalid input syntax for type %s", "jsonpath"),
 					 errdetail("Unicode low surrogate must follow a high "
 							   "surrogate.")));
-		ch = 0x10000 + *hi_surrogate + (ch & 0x3ff);
+		ch = surrogate_pair_to_codepoint(*hi_surrogate, ch);
 		*hi_surrogate = -1;
 	}
 	else if (*hi_surrogate != -1)
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index c7ae1eded80..4c299057a6f 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -2086,26 +2086,6 @@ map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
 
 
 /*
- * Map a Unicode codepoint into the current server encoding.
- */
-static char *
-unicode_to_sqlchar(pg_wchar c)
-{
-	char		utf8string[8];	/* need room for trailing zero */
-	char	   *result;
-
-	memset(utf8string, 0, sizeof(utf8string));
-	unicode_to_utf8(c, (unsigned char *) utf8string);
-
-	result = pg_any_to_server(utf8string, strlen(utf8string), PG_UTF8);
-	/* if pg_any_to_server didn't strdup, we must */
-	if (result == utf8string)
-		result = pstrdup(result);
-	return result;
-}
-
-
-/*
  * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
  */
 char *
@@ -2125,10 +2105,12 @@ map_xml_name_to_sql_identifier(const char *name)
 			&& isxdigit((unsigned char) *(p + 5))
 			&& *(p + 6) == '_')
 		{
+			char		cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
 			unsigned int u;
 
 			sscanf(p + 2, "%X", &u);
-			appendStringInfoString(&buf, unicode_to_sqlchar(u));
+			pg_unicode_to_server(u, (unsigned char *) cbuf);
+			appendStringInfoString(&buf, cbuf);
 			p += 6;
 		}
 		else
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
index 86787bcb319..a8e13cacfde 100644
--- a/src/backend/utils/mb/mbutils.c
+++ b/src/backend/utils/mb/mbutils.c
@@ -68,6 +68,13 @@ static FmgrInfo *ToServerConvProc = NULL;
 static FmgrInfo *ToClientConvProc = NULL;
 
 /*
+ * This variable stores the conversion function to convert from UTF-8
+ * to the server encoding.  It's NULL if the server encoding *is* UTF-8,
+ * or if we lack a conversion function for this.
+ */
+static FmgrInfo *Utf8ToServerConvProc = NULL;
+
+/*
  * These variables track the currently-selected encodings.
  */
 static const pg_enc2name *ClientEncoding = &pg_enc2name_tbl[PG_SQL_ASCII];
@@ -273,6 +280,8 @@ SetClientEncoding(int encoding)
 void
 InitializeClientEncoding(void)
 {
+	int			current_server_encoding;
+
 	Assert(!backend_startup_complete);
 	backend_startup_complete = true;
 
@@ -289,6 +298,35 @@ InitializeClientEncoding(void)
 						pg_enc2name_tbl[pending_client_encoding].name,
 						GetDatabaseEncodingName())));
 	}
+
+	/*
+	 * Also look up the UTF8-to-server conversion function if needed.  Since
+	 * the server encoding is fixed within any one backend process, we don't
+	 * have to do this more than once.
+	 */
+	current_server_encoding = GetDatabaseEncoding();
+	if (current_server_encoding != PG_UTF8 &&
+		current_server_encoding != PG_SQL_ASCII)
+	{
+		Oid			utf8_to_server_proc;
+
+		Assert(IsTransactionState());
+		utf8_to_server_proc =
+			FindDefaultConversionProc(PG_UTF8,
+									  current_server_encoding);
+		/* If there's no such conversion, just leave the pointer as NULL */
+		if (OidIsValid(utf8_to_server_proc))
+		{
+			FmgrInfo   *finfo;
+
+			finfo = (FmgrInfo *) MemoryContextAlloc(TopMemoryContext,
+													sizeof(FmgrInfo));
+			fmgr_info_cxt(utf8_to_server_proc, finfo,
+						  TopMemoryContext);
+			/* Set Utf8ToServerConvProc only after data is fully valid */
+			Utf8ToServerConvProc = finfo;
+		}
+	}
 }
 
 /*
@@ -752,6 +790,73 @@ perform_default_encoding_conversion(const char *src, int len,
 	return result;
 }
 
+/*
+ * Convert a single Unicode code point into a string in the server encoding.
+ *
+ * The code point given by "c" is converted and stored at *s, which must
+ * have at least MAX_UNICODE_EQUIVALENT_STRING+1 bytes available.
+ * The output will have a trailing '\0'.  Throws error if the conversion
+ * cannot be performed.
+ *
+ * Note that this relies on having previously looked up any required
+ * conversion function.  That's partly for speed but mostly because the parser
+ * may call this outside any transaction, or in an aborted transaction.
+ */
+void
+pg_unicode_to_server(pg_wchar c, unsigned char *s)
+{
+	unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
+	int			c_as_utf8_len;
+	int			server_encoding;
+
+	/*
+	 * Complain if invalid Unicode code point.  The choice of errcode here is
+	 * debatable, but really our caller should have checked this anyway.
+	 */
+	if (!is_valid_unicode_codepoint(c))
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("invalid Unicode code point")));
+
+	/* Otherwise, if it's in ASCII range, conversion is trivial */
+	if (c <= 0x7F)
+	{
+		s[0] = (unsigned char) c;
+		s[1] = '\0';
+		return;
+	}
+
+	/* If the server encoding is UTF-8, we just need to reformat the code */
+	server_encoding = GetDatabaseEncoding();
+	if (server_encoding == PG_UTF8)
+	{
+		unicode_to_utf8(c, s);
+		s[pg_utf_mblen(s)] = '\0';
+		return;
+	}
+
+	/* For all other cases, we must have a conversion function available */
+	if (Utf8ToServerConvProc == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("conversion between %s and %s is not supported",
+						pg_enc2name_tbl[PG_UTF8].name,
+						GetDatabaseEncodingName())));
+
+	/* Construct UTF-8 source string */
+	unicode_to_utf8(c, c_as_utf8);
+	c_as_utf8_len = pg_utf_mblen(c_as_utf8);
+	c_as_utf8[c_as_utf8_len] = '\0';
+
+	/* Convert, or throw error if we can't */
+	FunctionCall5(Utf8ToServerConvProc,
+				  Int32GetDatum(PG_UTF8),
+				  Int32GetDatum(server_encoding),
+				  CStringGetDatum(c_as_utf8),
+				  CStringGetDatum(s),
+				  Int32GetDatum(c_as_utf8_len));
+}
+
 
 /* convert a multibyte string to a wchar */
 int
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
index f08a03c1690..7df231c3851 100644
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -744,21 +744,21 @@ json_lex_string(JsonLexContext *lex)
 				}
 				if (lex->strval != NULL)
 				{
-					char		utf8str[5];
-					int			utf8len;
-
-					if (ch >= 0xd800 && ch <= 0xdbff)
+					/*
+					 * Combine surrogate pairs.
+					 */
+					if (is_utf16_surrogate_first(ch))
 					{
 						if (hi_surrogate != -1)
 							return JSON_UNICODE_HIGH_SURROGATE;
-						hi_surrogate = (ch & 0x3ff) << 10;
+						hi_surrogate = ch;
 						continue;
 					}
-					else if (ch >= 0xdc00 && ch <= 0xdfff)
+					else if (is_utf16_surrogate_second(ch))
 					{
 						if (hi_surrogate == -1)
 							return JSON_UNICODE_LOW_SURROGATE;
-						ch = 0x10000 + hi_surrogate + (ch & 0x3ff);
+						ch = surrogate_pair_to_codepoint(hi_surrogate, ch);
 						hi_surrogate = -1;
 					}
 
@@ -766,35 +766,52 @@ json_lex_string(JsonLexContext *lex)
 						return JSON_UNICODE_LOW_SURROGATE;
 
 					/*
-					 * For UTF8, replace the escape sequence by the actual
-					 * utf8 character in lex->strval. Do this also for other
-					 * encodings if the escape designates an ASCII character,
-					 * otherwise raise an error.
+					 * Reject invalid cases.  We can't have a value above
+					 * 0xFFFF here (since we only accepted 4 hex digits
+					 * above), so no need to test for out-of-range chars.
 					 */
-
 					if (ch == 0)
 					{
 						/* We can't allow this, since our TEXT type doesn't */
 						return JSON_UNICODE_CODE_POINT_ZERO;
 					}
-					else if (lex->input_encoding == PG_UTF8)
+
+					/*
+					 * Add the represented character to lex->strval.  In the
+					 * backend, we can let pg_unicode_to_server() handle any
+					 * required character set conversion; in frontend, we can
+					 * only deal with trivial conversions.
+					 *
+					 * Note: pg_unicode_to_server() will throw an error for a
+					 * conversion failure, rather than returning a failure
+					 * indication.  That seems OK.
+					 */
+#ifndef FRONTEND
+					{
+						char		cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
+
+						pg_unicode_to_server(ch, (unsigned char *) cbuf);
+						appendStringInfoString(lex->strval, cbuf);
+					}
+#else
+					if (lex->input_encoding == PG_UTF8)
 					{
+						/* OK, we can map the code point to UTF8 easily */
+						char		utf8str[5];
+						int			utf8len;
+
 						unicode_to_utf8(ch, (unsigned char *) utf8str);
 						utf8len = pg_utf_mblen((unsigned char *) utf8str);
 						appendBinaryStringInfo(lex->strval, utf8str, utf8len);
 					}
 					else if (ch <= 0x007f)
 					{
-						/*
-						 * This is the only way to designate things like a
-						 * form feed character in JSON, so it's useful in all
-						 * encodings.
-						 */
+						/* The ASCII range is the same in all encodings */
 						appendStringInfoChar(lex->strval, (char) ch);
 					}
 					else
 						return JSON_UNICODE_HIGH_ESCAPE;
-
+#endif							/* FRONTEND */
 				}
 			}
 			else if (lex->strval != NULL)
@@ -1083,7 +1100,8 @@ json_errdetail(JsonParseErrorType error, JsonLexContext *lex)
 		case JSON_UNICODE_ESCAPE_FORMAT:
 			return _("\"\\u\" must be followed by four hexadecimal digits.");
 		case JSON_UNICODE_HIGH_ESCAPE:
-			return _("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.");
+			/* note: this case is only reachable in frontend not backend */
+			return _("Unicode escape values cannot be used for code point values above 007F when the encoding is not UTF8.");
 		case JSON_UNICODE_HIGH_SURROGATE:
 			return _("Unicode high surrogate must not follow a high surrogate.");
 		case JSON_UNICODE_LOW_SURROGATE:
diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h
index b8892ef730e..494aefc7fab 100644
--- a/src/include/mb/pg_wchar.h
+++ b/src/include/mb/pg_wchar.h
@@ -316,6 +316,15 @@ typedef enum pg_enc
 #define MAX_CONVERSION_GROWTH  4
 
 /*
+ * Maximum byte length of the string equivalent to any one Unicode code point,
+ * in any backend encoding.  The current value assumes that a 4-byte UTF-8
+ * character might expand by MAX_CONVERSION_GROWTH, which is a huge
+ * overestimate.  But in current usage we don't allocate large multiples of
+ * this, so there's little point in being stingy.
+ */
+#define MAX_UNICODE_EQUIVALENT_STRING	16
+
+/*
  * Table for mapping an encoding number to official encoding name and
  * possibly other subsidiary data.  Be careful to check encoding number
  * before accessing a table entry!
@@ -506,6 +515,12 @@ typedef uint32 (*utf_local_conversion_func) (uint32 code);
  * Some handy functions for Unicode-specific tests.
  */
 static inline bool
+is_valid_unicode_codepoint(pg_wchar c)
+{
+	return (c > 0 && c <= 0x10FFFF);
+}
+
+static inline bool
 is_utf16_surrogate_first(pg_wchar c)
 {
 	return (c >= 0xD800 && c <= 0xDBFF);
@@ -603,6 +618,8 @@ extern char *pg_server_to_client(const char *s, int len);
 extern char *pg_any_to_server(const char *s, int len, int encoding);
 extern char *pg_server_to_any(const char *s, int len, int encoding);
 
+extern void pg_unicode_to_server(pg_wchar c, unsigned char *s);
+
 extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
 extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
 
diff --git a/src/include/parser/scanner.h b/src/include/parser/scanner.h
index 7a0e5e5d982..a27352afc14 100644
--- a/src/include/parser/scanner.h
+++ b/src/include/parser/scanner.h
@@ -99,9 +99,13 @@ typedef struct core_yy_extra_type
 	int			literallen;		/* actual current string length */
 	int			literalalloc;	/* current allocated buffer size */
 
+	/*
+	 * Random assorted scanner state.
+	 */
 	int			state_before_str_stop;	/* start cond. before end quote */
 	int			xcdepth;		/* depth of nesting in slash-star comments */
 	char	   *dolqstart;		/* current $foo$ quote start string */
+	YYLTYPE		save_yylloc;	/* one-element stack for PUSH_YYLLOC() */
 
 	/* first part of UTF16 surrogate pair for Unicode escapes */
 	int32		utf16_first_part;
@@ -116,6 +120,14 @@ typedef struct core_yy_extra_type
  */
 typedef void *core_yyscan_t;
 
+/* Support for scanner_errposition_callback function */
+typedef struct ScannerCallbackState
+{
+	core_yyscan_t yyscanner;
+	int			location;
+	ErrorContextCallback errcallback;
+} ScannerCallbackState;
+
 
 /* Constant data exported from parser/scan.l */
 extern PGDLLIMPORT const uint16 ScanKeywordTokens[];
@@ -129,6 +141,10 @@ extern void scanner_finish(core_yyscan_t yyscanner);
 extern int	core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp,
 					   core_yyscan_t yyscanner);
 extern int	scanner_errposition(int location, core_yyscan_t yyscanner);
+extern void setup_scanner_errposition_callback(ScannerCallbackState *scbstate,
+											   core_yyscan_t yyscanner,
+											   int location);
+extern void cancel_scanner_errposition_callback(ScannerCallbackState *scbstate);
 extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner) pg_attribute_noreturn();
 
 #endif							/* SCANNER_H */
diff --git a/src/test/regress/expected/json_encoding.out b/src/test/regress/expected/json_encoding.out
index d8d34f4ff6a..f343f74fe18 100644
--- a/src/test/regress/expected/json_encoding.out
+++ b/src/test/regress/expected/json_encoding.out
@@ -1,4 +1,19 @@
+--
 -- encoding-sensitive tests for json and jsonb
+--
+-- We provide expected-results files for UTF8 (json_encoding.out)
+-- and for SQL_ASCII (json_encoding_1.out).  Skip otherwise.
+SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII')
+       AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+SELECT getdatabaseencoding();           -- just to label the results files
+ getdatabaseencoding 
+---------------------
+ UTF8
+(1 row)
+
 -- first json
 -- basic unicode input
 SELECT '"\u"'::json;			-- ERROR, incomplete escape
diff --git a/src/test/regress/expected/json_encoding_1.out b/src/test/regress/expected/json_encoding_1.out
index 79ed78e1c5f..e2fc131b0fa 100644
--- a/src/test/regress/expected/json_encoding_1.out
+++ b/src/test/regress/expected/json_encoding_1.out
@@ -1,4 +1,19 @@
+--
 -- encoding-sensitive tests for json and jsonb
+--
+-- We provide expected-results files for UTF8 (json_encoding.out)
+-- and for SQL_ASCII (json_encoding_1.out).  Skip otherwise.
+SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII')
+       AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+SELECT getdatabaseencoding();           -- just to label the results files
+ getdatabaseencoding 
+---------------------
+ SQL_ASCII
+(1 row)
+
 -- first json
 -- basic unicode input
 SELECT '"\u"'::json;			-- ERROR, incomplete escape
@@ -33,9 +48,7 @@ SELECT '"\uaBcD"'::json;		-- OK, uppercase and lower case both OK
 
 -- handling of unicode surrogate pairs
 select json '{ "a":  "\ud83d\ude04\ud83d\udc36" }' -> 'a' as correct_in_utf8;
-ERROR:  unsupported Unicode escape sequence
-DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
-CONTEXT:  JSON data, line 1: { "a":...
+ERROR:  conversion between UTF8 and SQL_ASCII is not supported
 select json '{ "a":  "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
 ERROR:  invalid input syntax for type json
 DETAIL:  Unicode high surrogate must not follow a high surrogate.
@@ -84,9 +97,7 @@ select json '{ "a":  "null \\u0000 escape" }' as not_an_escape;
 (1 row)
 
 select json '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
-ERROR:  unsupported Unicode escape sequence
-DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
-CONTEXT:  JSON data, line 1: { "a":...
+ERROR:  conversion between UTF8 and SQL_ASCII is not supported
 select json '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
  correct_everywhere 
 --------------------
@@ -144,18 +155,14 @@ CONTEXT:  JSON data, line 1: ...
 -- use octet_length here so we don't get an odd unicode char in the
 -- output
 SELECT octet_length('"\uaBcD"'::jsonb::text); -- OK, uppercase and lower case both OK
-ERROR:  unsupported Unicode escape sequence
+ERROR:  conversion between UTF8 and SQL_ASCII is not supported
 LINE 1: SELECT octet_length('"\uaBcD"'::jsonb::text);
                             ^
-DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
-CONTEXT:  JSON data, line 1: ...
 -- handling of unicode surrogate pairs
 SELECT octet_length((jsonb '{ "a":  "\ud83d\ude04\ud83d\udc36" }' -> 'a')::text) AS correct_in_utf8;
-ERROR:  unsupported Unicode escape sequence
+ERROR:  conversion between UTF8 and SQL_ASCII is not supported
 LINE 1: SELECT octet_length((jsonb '{ "a":  "\ud83d\ude04\ud83d\udc3...
                                    ^
-DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
-CONTEXT:  JSON data, line 1: { "a":...
 SELECT jsonb '{ "a":  "\ud83d\ud83d" }' -> 'a'; -- 2 high surrogates in a row
 ERROR:  invalid input syntax for type json
 LINE 1: SELECT jsonb '{ "a":  "\ud83d\ud83d" }' -> 'a';
@@ -182,11 +189,9 @@ DETAIL:  Unicode low surrogate must follow a high surrogate.
 CONTEXT:  JSON data, line 1: { "a":...
 -- handling of simple unicode escapes
 SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as correct_in_utf8;
-ERROR:  unsupported Unicode escape sequence
+ERROR:  conversion between UTF8 and SQL_ASCII is not supported
 LINE 1: SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' as corr...
                      ^
-DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
-CONTEXT:  JSON data, line 1: { "a":...
 SELECT jsonb '{ "a":  "dollar \u0024 character" }' as correct_everywhere;
      correct_everywhere      
 -----------------------------
@@ -212,11 +217,9 @@ SELECT jsonb '{ "a":  "null \\u0000 escape" }' as not_an_escape;
 (1 row)
 
 SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a' as correct_in_utf8;
-ERROR:  unsupported Unicode escape sequence
+ERROR:  conversion between UTF8 and SQL_ASCII is not supported
 LINE 1: SELECT jsonb '{ "a":  "the Copyright \u00a9 sign" }' ->> 'a'...
                      ^
-DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
-CONTEXT:  JSON data, line 1: { "a":...
 SELECT jsonb '{ "a":  "dollar \u0024 character" }' ->> 'a' as correct_everywhere;
  correct_everywhere 
 --------------------
diff --git a/src/test/regress/expected/json_encoding_2.out b/src/test/regress/expected/json_encoding_2.out
new file mode 100644
index 00000000000..4fc8f0241ab
--- /dev/null
+++ b/src/test/regress/expected/json_encoding_2.out
@@ -0,0 +1,9 @@
+--
+-- encoding-sensitive tests for json and jsonb
+--
+-- We provide expected-results files for UTF8 (json_encoding.out)
+-- and for SQL_ASCII (json_encoding_1.out).  Skip otherwise.
+SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII')
+       AS skip_test \gset
+\if :skip_test
+\quit
diff --git a/src/test/regress/expected/jsonpath_encoding.out b/src/test/regress/expected/jsonpath_encoding.out
index ecffe095b59..7cbfb6abcf3 100644
--- a/src/test/regress/expected/jsonpath_encoding.out
+++ b/src/test/regress/expected/jsonpath_encoding.out
@@ -1,4 +1,19 @@
+--
 -- encoding-sensitive tests for jsonpath
+--
+-- We provide expected-results files for UTF8 (jsonpath_encoding.out)
+-- and for SQL_ASCII (jsonpath_encoding_1.out).  Skip otherwise.
+SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII')
+       AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+SELECT getdatabaseencoding();           -- just to label the results files
+ getdatabaseencoding 
+---------------------
+ UTF8
+(1 row)
+
 -- checks for double-quoted values
 -- basic unicode input
 SELECT '"\u"'::jsonpath;		-- ERROR, incomplete escape
diff --git a/src/test/regress/expected/jsonpath_encoding_1.out b/src/test/regress/expected/jsonpath_encoding_1.out
index c8cc2173a8c..005136c9657 100644
--- a/src/test/regress/expected/jsonpath_encoding_1.out
+++ b/src/test/regress/expected/jsonpath_encoding_1.out
@@ -1,4 +1,19 @@
+--
 -- encoding-sensitive tests for jsonpath
+--
+-- We provide expected-results files for UTF8 (jsonpath_encoding.out)
+-- and for SQL_ASCII (jsonpath_encoding_1.out).  Skip otherwise.
+SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII')
+       AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+SELECT getdatabaseencoding();           -- just to label the results files
+ getdatabaseencoding 
+---------------------
+ SQL_ASCII
+(1 row)
+
 -- checks for double-quoted values
 -- basic unicode input
 SELECT '"\u"'::jsonpath;		-- ERROR, incomplete escape
@@ -19,16 +34,14 @@ LINE 1: SELECT '"\u0000"'::jsonpath;
                ^
 DETAIL:  \u0000 cannot be converted to text.
 SELECT '"\uaBcD"'::jsonpath;	-- OK, uppercase and lower case both OK
-ERROR:  invalid input syntax for type jsonpath
+ERROR:  conversion between UTF8 and SQL_ASCII is not supported
 LINE 1: SELECT '"\uaBcD"'::jsonpath;
                ^
-DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
 -- handling of unicode surrogate pairs
 select '"\ud83d\ude04\ud83d\udc36"'::jsonpath as correct_in_utf8;
-ERROR:  invalid input syntax for type jsonpath
+ERROR:  conversion between UTF8 and SQL_ASCII is not supported
 LINE 1: select '"\ud83d\ude04\ud83d\udc36"'::jsonpath as correct_in_...
                ^
-DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
 select '"\ud83d\ud83d"'::jsonpath; -- 2 high surrogates in a row
 ERROR:  invalid input syntax for type jsonpath
 LINE 1: select '"\ud83d\ud83d"'::jsonpath;
@@ -51,10 +64,9 @@ LINE 1: select '"\ude04X"'::jsonpath;
 DETAIL:  Unicode low surrogate must follow a high surrogate.
 --handling of simple unicode escapes
 select '"the Copyright \u00a9 sign"'::jsonpath as correct_in_utf8;
-ERROR:  invalid input syntax for type jsonpath
+ERROR:  conversion between UTF8 and SQL_ASCII is not supported
 LINE 1: select '"the Copyright \u00a9 sign"'::jsonpath as correct_in...
                ^
-DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
 select '"dollar \u0024 character"'::jsonpath as correct_everywhere;
   correct_everywhere  
 ----------------------
@@ -98,16 +110,14 @@ LINE 1: SELECT '$."\u0000"'::jsonpath;
                ^
 DETAIL:  \u0000 cannot be converted to text.
 SELECT '$."\uaBcD"'::jsonpath;	-- OK, uppercase and lower case both OK
-ERROR:  invalid input syntax for type jsonpath
+ERROR:  conversion between UTF8 and SQL_ASCII is not supported
 LINE 1: SELECT '$."\uaBcD"'::jsonpath;
                ^
-DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
 -- handling of unicode surrogate pairs
 select '$."\ud83d\ude04\ud83d\udc36"'::jsonpath as correct_in_utf8;
-ERROR:  invalid input syntax for type jsonpath
+ERROR:  conversion between UTF8 and SQL_ASCII is not supported
 LINE 1: select '$."\ud83d\ude04\ud83d\udc36"'::jsonpath as correct_i...
                ^
-DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
 select '$."\ud83d\ud83d"'::jsonpath; -- 2 high surrogates in a row
 ERROR:  invalid input syntax for type jsonpath
 LINE 1: select '$."\ud83d\ud83d"'::jsonpath;
@@ -130,10 +140,9 @@ LINE 1: select '$."\ude04X"'::jsonpath;
 DETAIL:  Unicode low surrogate must follow a high surrogate.
 --handling of simple unicode escapes
 select '$."the Copyright \u00a9 sign"'::jsonpath as correct_in_utf8;
-ERROR:  invalid input syntax for type jsonpath
+ERROR:  conversion between UTF8 and SQL_ASCII is not supported
 LINE 1: select '$."the Copyright \u00a9 sign"'::jsonpath as correct_...
                ^
-DETAIL:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8.
 select '$."dollar \u0024 character"'::jsonpath as correct_everywhere;
    correct_everywhere   
 ------------------------
diff --git a/src/test/regress/expected/jsonpath_encoding_2.out b/src/test/regress/expected/jsonpath_encoding_2.out
new file mode 100644
index 00000000000..bb71bfe72c4
--- /dev/null
+++ b/src/test/regress/expected/jsonpath_encoding_2.out
@@ -0,0 +1,9 @@
+--
+-- encoding-sensitive tests for jsonpath
+--
+-- We provide expected-results files for UTF8 (jsonpath_encoding.out)
+-- and for SQL_ASCII (jsonpath_encoding_1.out).  Skip otherwise.
+SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII')
+       AS skip_test \gset
+\if :skip_test
+\quit
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 60cb86193c7..6c4443afcf1 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -35,6 +35,12 @@ SELECT U&'d!0061t\+000061' UESCAPE '!' AS U&"d*0061t\+000061" UESCAPE '*';
  dat\+000061
 (1 row)
 
+SELECT U&'a\\b' AS "a\b";
+ a\b 
+-----
+ a\b
+(1 row)
+
 SELECT U&' \' UESCAPE '!' AS "tricky";
  tricky 
 --------
@@ -48,13 +54,15 @@ SELECT 'tricky' AS U&"\" UESCAPE '!';
 (1 row)
 
 SELECT U&'wrong: \061';
-ERROR:  invalid Unicode escape value
+ERROR:  invalid Unicode escape
 LINE 1: SELECT U&'wrong: \061';
                          ^
+HINT:  Unicode escapes must be \XXXX or \+XXXXXX.
 SELECT U&'wrong: \+0061';
-ERROR:  invalid Unicode escape value
+ERROR:  invalid Unicode escape
 LINE 1: SELECT U&'wrong: \+0061';
                          ^
+HINT:  Unicode escapes must be \XXXX or \+XXXXXX.
 SELECT U&'wrong: +0061' UESCAPE +;
 ERROR:  UESCAPE must be followed by a simple string literal at or near "+"
 LINE 1: SELECT U&'wrong: +0061' UESCAPE +;
@@ -63,6 +71,77 @@ SELECT U&'wrong: +0061' UESCAPE '+';
 ERROR:  invalid Unicode escape character at or near "'+'"
 LINE 1: SELECT U&'wrong: +0061' UESCAPE '+';
                                         ^
+SELECT U&'wrong: \db99';
+ERROR:  invalid Unicode surrogate pair
+LINE 1: SELECT U&'wrong: \db99';
+                              ^
+SELECT U&'wrong: \db99xy';
+ERROR:  invalid Unicode surrogate pair
+LINE 1: SELECT U&'wrong: \db99xy';
+                              ^
+SELECT U&'wrong: \db99\\';
+ERROR:  invalid Unicode surrogate pair
+LINE 1: SELECT U&'wrong: \db99\\';
+                              ^
+SELECT U&'wrong: \db99\0061';
+ERROR:  invalid Unicode surrogate pair
+LINE 1: SELECT U&'wrong: \db99\0061';
+                              ^
+SELECT U&'wrong: \+00db99\+000061';
+ERROR:  invalid Unicode surrogate pair
+LINE 1: SELECT U&'wrong: \+00db99\+000061';
+                                 ^
+SELECT U&'wrong: \+2FFFFF';
+ERROR:  invalid Unicode escape value
+LINE 1: SELECT U&'wrong: \+2FFFFF';
+                         ^
+-- while we're here, check the same cases in E-style literals
+SELECT E'd\u0061t\U00000061' AS "data";
+ data 
+------
+ data
+(1 row)
+
+SELECT E'a\\b' AS "a\b";
+ a\b 
+-----
+ a\b
+(1 row)
+
+SELECT E'wrong: \u061';
+ERROR:  invalid Unicode escape
+LINE 1: SELECT E'wrong: \u061';
+                        ^
+HINT:  Unicode escapes must be \uXXXX or \UXXXXXXXX.
+SELECT E'wrong: \U0061';
+ERROR:  invalid Unicode escape
+LINE 1: SELECT E'wrong: \U0061';
+                        ^
+HINT:  Unicode escapes must be \uXXXX or \UXXXXXXXX.
+SELECT E'wrong: \udb99';
+ERROR:  invalid Unicode surrogate pair at or near "'"
+LINE 1: SELECT E'wrong: \udb99';
+                              ^
+SELECT E'wrong: \udb99xy';
+ERROR:  invalid Unicode surrogate pair at or near "x"
+LINE 1: SELECT E'wrong: \udb99xy';
+                              ^
+SELECT E'wrong: \udb99\\';
+ERROR:  invalid Unicode surrogate pair at or near "\"
+LINE 1: SELECT E'wrong: \udb99\\';
+                              ^
+SELECT E'wrong: \udb99\u0061';
+ERROR:  invalid Unicode surrogate pair at or near "\u0061"
+LINE 1: SELECT E'wrong: \udb99\u0061';
+                              ^
+SELECT E'wrong: \U0000db99\U00000061';
+ERROR:  invalid Unicode surrogate pair at or near "\U00000061"
+LINE 1: SELECT E'wrong: \U0000db99\U00000061';
+                                  ^
+SELECT E'wrong: \U002FFFFF';
+ERROR:  invalid Unicode escape value at or near "\U002FFFFF"
+LINE 1: SELECT E'wrong: \U002FFFFF';
+                        ^
 SET standard_conforming_strings TO off;
 SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061";
 ERROR:  unsafe use of string constant with Unicode escapes
diff --git a/src/test/regress/sql/json_encoding.sql b/src/test/regress/sql/json_encoding.sql
index 87a2d564ff3..d7fac69733d 100644
--- a/src/test/regress/sql/json_encoding.sql
+++ b/src/test/regress/sql/json_encoding.sql
@@ -1,5 +1,16 @@
-
+--
 -- encoding-sensitive tests for json and jsonb
+--
+
+-- We provide expected-results files for UTF8 (json_encoding.out)
+-- and for SQL_ASCII (json_encoding_1.out).  Skip otherwise.
+SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII')
+       AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+
+SELECT getdatabaseencoding();           -- just to label the results files
 
 -- first json
 
diff --git a/src/test/regress/sql/jsonpath_encoding.sql b/src/test/regress/sql/jsonpath_encoding.sql
index 3a23b728182..55d9e30b95c 100644
--- a/src/test/regress/sql/jsonpath_encoding.sql
+++ b/src/test/regress/sql/jsonpath_encoding.sql
@@ -1,5 +1,16 @@
-
+--
 -- encoding-sensitive tests for jsonpath
+--
+
+-- We provide expected-results files for UTF8 (jsonpath_encoding.out)
+-- and for SQL_ASCII (jsonpath_encoding_1.out).  Skip otherwise.
+SELECT getdatabaseencoding() NOT IN ('UTF8', 'SQL_ASCII')
+       AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+
+SELECT getdatabaseencoding();           -- just to label the results files
 
 -- checks for double-quoted values
 
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index c5cd15142a5..3e28cd198f4 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -21,6 +21,7 @@ SET standard_conforming_strings TO on;
 
 SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061";
 SELECT U&'d!0061t\+000061' UESCAPE '!' AS U&"d*0061t\+000061" UESCAPE '*';
+SELECT U&'a\\b' AS "a\b";
 
 SELECT U&' \' UESCAPE '!' AS "tricky";
 SELECT 'tricky' AS U&"\" UESCAPE '!';
@@ -30,6 +31,25 @@ SELECT U&'wrong: \+0061';
 SELECT U&'wrong: +0061' UESCAPE +;
 SELECT U&'wrong: +0061' UESCAPE '+';
 
+SELECT U&'wrong: \db99';
+SELECT U&'wrong: \db99xy';
+SELECT U&'wrong: \db99\\';
+SELECT U&'wrong: \db99\0061';
+SELECT U&'wrong: \+00db99\+000061';
+SELECT U&'wrong: \+2FFFFF';
+
+-- while we're here, check the same cases in E-style literals
+SELECT E'd\u0061t\U00000061' AS "data";
+SELECT E'a\\b' AS "a\b";
+SELECT E'wrong: \u061';
+SELECT E'wrong: \U0061';
+SELECT E'wrong: \udb99';
+SELECT E'wrong: \udb99xy';
+SELECT E'wrong: \udb99\\';
+SELECT E'wrong: \udb99\u0061';
+SELECT E'wrong: \U0000db99\U00000061';
+SELECT E'wrong: \U002FFFFF';
+
 SET standard_conforming_strings TO off;
 
 SELECT U&'d\0061t\+000061' AS U&"d\0061t\+000061";