]> git.kaiwu.me - njs.git/commitdiff
Improved readability of surrogate pairs handling.
authorDmitry Volyntsev <xeioex@nginx.com>
Mon, 27 Jul 2020 14:34:35 +0000 (14:34 +0000)
committerDmitry Volyntsev <xeioex@nginx.com>
Mon, 27 Jul 2020 14:34:35 +0000 (14:34 +0000)
src/njs_json.c
src/njs_parser.c
src/njs_string.c
src/njs_string.h
src/njs_unicode.h
src/njs_utf16.c

index 975693b2f6822882843cfee2d088edad003aac50..07767f9138dd68f6423edd9cbbfae84cd7a1f8f2 100644 (file)
@@ -738,7 +738,7 @@ njs_json_parse_string(njs_json_parse_ctx_t *ctx, njs_value_t *value,
                 p += 4;
 
                 if (njs_fast_path(njs_surrogate_trailing(utf_low))) {
-                    utf = njs_string_surrogate_pair(utf, utf_low);
+                    utf = njs_surrogate_pair(utf, utf_low);
 
                 } else if (njs_surrogate_leading(utf_low)) {
                     utf = NJS_UNICODE_REPLACEMENT;
index 81443db224bb560c65e6c61d3ceed73ecd34ac2d..f93f30432ca1b558856f439a9a26e08eee72b37d 100644 (file)
@@ -8088,7 +8088,7 @@ njs_parser_escape_string_create(njs_parser_t *parser, njs_lexer_token_t *token,
 
         if (cp_pair != 0) {
             if (njs_fast_path(njs_surrogate_trailing(cp))) {
-                cp = njs_string_surrogate_pair(cp_pair, cp);
+                cp = njs_surrogate_pair(cp_pair, cp);
 
             } else if (njs_slow_path(njs_surrogate_leading(cp))) {
                 cp = NJS_UNICODE_REPLACEMENT;
@@ -8238,7 +8238,7 @@ njs_parser_escape_string_calc_length(njs_parser_t *parser,
 
         if (cp_pair != 0) {
             if (njs_fast_path(njs_surrogate_trailing(cp))) {
-                cp = njs_string_surrogate_pair(cp_pair, cp);
+                cp = njs_surrogate_pair(cp_pair, cp);
 
             } else if (njs_slow_path(njs_surrogate_leading(cp))) {
                 cp = NJS_UNICODE_REPLACEMENT;
index 6bd5d7f0073dd2264b3834fc9921d59a468e0fd7..206d488410f56a799b0dbb82bb9363a403216b10 100644 (file)
@@ -4272,7 +4272,7 @@ njs_string_encode_uri(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs,
                         goto uri_error;
                     }
 
-                    cp = njs_string_surrogate_pair(cp, cp_low);
+                    cp = njs_surrogate_pair(cp, cp_low);
                     size += njs_utf8_size(cp) * 3;
                     continue;
                 }
@@ -4312,7 +4312,7 @@ njs_string_encode_uri(njs_vm_t *vm, njs_value_t *args, njs_uint_t nargs,
 
         if (njs_slow_path(njs_surrogate_leading(cp))) {
             cp_low = njs_utf8_decode(&ctx, &src, end);
-            cp = njs_string_surrogate_pair(cp, cp_low);
+            cp = njs_surrogate_pair(cp, cp_low);
         }
 
         njs_utf8_encode(encode, cp);
index fbd07d15690c0f128c208879818d45758dcf50d2..3ec22c62ed837f498a6e28e25be23c222ac6ba70 100644 (file)
 /* The maximum signed int32_t. */
 #define NJS_STRING_MAX_LENGTH  0x7fffffff
 
-#define njs_surrogate_leading(cp)    ((cp) >= 0xd800 && (cp) <= 0xdbff)
-
-#define njs_surrogate_trailing(cp)   ((cp) >= 0xdc00 && (cp) <= 0xdfff)
-
-#define njs_surrogate_any(cp)        ((cp) >= 0xd800 && (cp) <= 0xdfff)
-
-/* Converting surrogate pair to code point.  */
-#define njs_string_surrogate_pair(high, low)                                  \
-    (0x10000 + ((high - 0xd800) << 10) + (low - 0xdc00))
-
 /*
  * NJS_STRING_MAP_STRIDE should be power of two to use shift and binary
  * AND operations instead of division and remainder operations but no
index a45ce68253be38b301c1775e4203a0727ac87bcb..2e0bcba7a37d8fee482d1f54990fe64a181b49ee 100644 (file)
@@ -23,5 +23,17 @@ typedef struct {
     u_char    upper;
 } njs_unicode_decode_t;
 
+#define njs_surrogate_leading(cp)                                             \
+    (((unsigned) (cp) - 0xd800) <= 0xdbff - 0xd800)
+
+#define njs_surrogate_trailing(cp)                                            \
+    (((unsigned) (cp) - 0xdc00) <= 0xdfff - 0xdc00)
+
+#define njs_surrogate_any(cp)                                                 \
+    (((unsigned) (cp) - 0xd800) <= 0xdfff - 0xd800)
+
+#define njs_surrogate_pair(high, low)                                         \
+    (0x10000 + (((high) - 0xd800) << 10) + ((low) - 0xdc00))
+
 
 #endif /* _NJS_UNICODE_H_INCLUDED_ */
index 6626286a891f90d18b77e40b0470757b2bd537e0..b47fbf50bfcba936a3f38c74f07f945a89b4c472 100644 (file)
@@ -79,9 +79,8 @@ lead_state:
 #endif
 
     if (ctx->codepoint != 0x00) {
-        if ((unsigned) (unit - 0xDC00) <= (0xDFFF - 0xDC00)) {
-            unit = 0x10000 + ((ctx->codepoint - 0xD800) << 10)
-                   + (unit - 0xDC00);
+        if (njs_surrogate_trailing(unit)) {
+            unit = njs_surrogate_pair(ctx->codepoint, unit);
 
             ctx->codepoint = 0x00;
 
@@ -96,10 +95,8 @@ lead_state:
         return NJS_UNICODE_ERROR;
     }
 
-    /* Surrogate pair. */
-
-    if ((unsigned) (unit - 0xD800) <= (0xDFFF - 0xD800)) {
-        if ((unsigned) (unit - 0xDC00) <= (0xDFFF - 0xDC00)) {
+    if (njs_surrogate_any(unit)) {
+        if (njs_surrogate_trailing(unit)) {
             return NJS_UNICODE_ERROR;
         }