summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFabrice Bellard <fabrice@bellard.org>2023-12-22 11:05:49 +0100
committerFabrice Bellard <fabrice@bellard.org>2023-12-22 11:05:49 +0100
commit07ff4749926ee71d69f3e196264d9f192f1e308f (patch)
tree4e5e984d870b0e75970e53c4ef0b3787024e9dcd
parent7cefa7b121c8a800969672853b035278ebd6b24c (diff)
downloadquickjs-07ff4749926ee71d69f3e196264d9f192f1e308f.tar.gz
quickjs-07ff4749926ee71d69f3e196264d9f192f1e308f.zip
use Unicode normalization in String.prototype.localeCompare
-rw-r--r--quickjs.c131
-rw-r--r--test262_errors.txt2
2 files changed, 100 insertions, 33 deletions
diff --git a/quickjs.c b/quickjs.c
index c80a300..dfcacda 100644
--- a/quickjs.c
+++ b/quickjs.c
@@ -41109,26 +41109,6 @@ static BOOL test_final_sigma(JSString *p, int sigma_pos)
return !lre_is_cased(c1);
}
-static JSValue js_string_localeCompare(JSContext *ctx, JSValueConst this_val,
- int argc, JSValueConst *argv)
-{
- JSValue a, b;
- int cmp;
-
- a = JS_ToStringCheckObject(ctx, this_val);
- if (JS_IsException(a))
- return JS_EXCEPTION;
- b = JS_ToString(ctx, argv[0]);
- if (JS_IsException(b)) {
- JS_FreeValue(ctx, a);
- return JS_EXCEPTION;
- }
- cmp = js_string_compare(ctx, JS_VALUE_GET_STRING(a), JS_VALUE_GET_STRING(b));
- JS_FreeValue(ctx, a);
- JS_FreeValue(ctx, b);
- return JS_NewInt32(ctx, cmp);
-}
-
static JSValue js_string_toLowerCase(JSContext *ctx, JSValueConst this_val,
int argc, JSValueConst *argv, int to_lower)
{
@@ -41214,23 +41194,38 @@ static JSValue JS_NewUTF32String(JSContext *ctx, const uint32_t *buf, int len)
return JS_EXCEPTION;
}
+static int js_string_normalize1(JSContext *ctx, uint32_t **pout_buf,
+ JSValueConst val,
+ UnicodeNormalizationEnum n_type)
+{
+ int buf_len, out_len;
+ uint32_t *buf, *out_buf;
+
+ buf_len = JS_ToUTF32String(ctx, &buf, val);
+ if (buf_len < 0)
+ return -1;
+ out_len = unicode_normalize(&out_buf, buf, buf_len, n_type,
+ ctx->rt, (DynBufReallocFunc *)js_realloc_rt);
+ js_free(ctx, buf);
+ if (out_len < 0)
+ return -1;
+ *pout_buf = out_buf;
+ return out_len;
+}
+
static JSValue js_string_normalize(JSContext *ctx, JSValueConst this_val,
int argc, JSValueConst *argv)
{
const char *form, *p;
size_t form_len;
- int is_compat, buf_len, out_len;
+ int is_compat, out_len;
UnicodeNormalizationEnum n_type;
JSValue val;
- uint32_t *buf, *out_buf;
+ uint32_t *out_buf;
val = JS_ToStringCheckObject(ctx, this_val);
if (JS_IsException(val))
return val;
- buf_len = JS_ToUTF32String(ctx, &buf, val);
- JS_FreeValue(ctx, val);
- if (buf_len < 0)
- return JS_EXCEPTION;
if (argc == 0 || JS_IsUndefined(argv[0])) {
n_type = UNICODE_NFC;
@@ -41256,22 +41251,96 @@ static JSValue js_string_normalize(JSContext *ctx, JSValueConst this_val,
JS_FreeCString(ctx, form);
JS_ThrowRangeError(ctx, "bad normalization form");
fail1:
- js_free(ctx, buf);
+ JS_FreeValue(ctx, val);
return JS_EXCEPTION;
}
JS_FreeCString(ctx, form);
}
- out_len = unicode_normalize(&out_buf, buf, buf_len, n_type,
- ctx->rt, (DynBufReallocFunc *)js_realloc_rt);
- js_free(ctx, buf);
+ out_len = js_string_normalize1(ctx, &out_buf, val, n_type);
+ JS_FreeValue(ctx, val);
if (out_len < 0)
return JS_EXCEPTION;
val = JS_NewUTF32String(ctx, out_buf, out_len);
js_free(ctx, out_buf);
return val;
}
-#endif /* CONFIG_ALL_UNICODE */
+
+/* return < 0, 0 or > 0 */
+static int js_UTF32_compare(const uint32_t *buf1, int buf1_len,
+ const uint32_t *buf2, int buf2_len)
+{
+ int i, len, c, res;
+ len = min_int(buf1_len, buf2_len);
+ for(i = 0; i < len; i++) {
+ /* Note: range is limited so a subtraction is valid */
+ c = buf1[i] - buf2[i];
+ if (c != 0)
+ return c;
+ }
+ if (buf1_len == buf2_len)
+ res = 0;
+ else if (buf1_len < buf2_len)
+ res = -1;
+ else
+ res = 1;
+ return res;
+}
+
+static JSValue js_string_localeCompare(JSContext *ctx, JSValueConst this_val,
+ int argc, JSValueConst *argv)
+{
+ JSValue a, b;
+ int cmp, a_len, b_len;
+ uint32_t *a_buf, *b_buf;
+
+ a = JS_ToStringCheckObject(ctx, this_val);
+ if (JS_IsException(a))
+ return JS_EXCEPTION;
+ b = JS_ToString(ctx, argv[0]);
+ if (JS_IsException(b)) {
+ JS_FreeValue(ctx, a);
+ return JS_EXCEPTION;
+ }
+ a_len = js_string_normalize1(ctx, &a_buf, a, UNICODE_NFC);
+ JS_FreeValue(ctx, a);
+ if (a_len < 0) {
+ JS_FreeValue(ctx, b);
+ return JS_EXCEPTION;
+ }
+
+ b_len = js_string_normalize1(ctx, &b_buf, b, UNICODE_NFC);
+ JS_FreeValue(ctx, b);
+ if (b_len < 0) {
+ js_free(ctx, a_buf);
+ return JS_EXCEPTION;
+ }
+ cmp = js_UTF32_compare(a_buf, a_len, b_buf, b_len);
+ js_free(ctx, a_buf);
+ js_free(ctx, b_buf);
+ return JS_NewInt32(ctx, cmp);
+}
+#else /* CONFIG_ALL_UNICODE */
+static JSValue js_string_localeCompare(JSContext *ctx, JSValueConst this_val,
+ int argc, JSValueConst *argv)
+{
+ JSValue a, b;
+ int cmp;
+
+ a = JS_ToStringCheckObject(ctx, this_val);
+ if (JS_IsException(a))
+ return JS_EXCEPTION;
+ b = JS_ToString(ctx, argv[0]);
+ if (JS_IsException(b)) {
+ JS_FreeValue(ctx, a);
+ return JS_EXCEPTION;
+ }
+ cmp = js_string_compare(ctx, JS_VALUE_GET_STRING(a), JS_VALUE_GET_STRING(b));
+ JS_FreeValue(ctx, a);
+ JS_FreeValue(ctx, b);
+ return JS_NewInt32(ctx, cmp);
+}
+#endif /* !CONFIG_ALL_UNICODE */
/* also used for String.prototype.valueOf */
static JSValue js_string_toString(JSContext *ctx, JSValueConst this_val,
diff --git a/test262_errors.txt b/test262_errors.txt
index 77ee6cc..31eeb6e 100644
--- a/test262_errors.txt
+++ b/test262_errors.txt
@@ -5,8 +5,6 @@ test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: Test262Er
test262/test/built-ins/RegExp/lookahead-quantifier-match-groups.js:27: strict mode: Test262Error: Expected [a, abc] and [a, undefined] to have the same contents. ? quantifier
test262/test/built-ins/RegExp/unicode_full_case_folding.js:20: Test262Error: \u0390 does not match \u1fd3
test262/test/built-ins/RegExp/unicode_full_case_folding.js:20: strict mode: Test262Error: \u0390 does not match \u1fd3
-test262/test/built-ins/String/prototype/localeCompare/15.5.4.9_CE.js:62: Test262Error: String.prototype.localeCompare considers ö (\u006f\u0308) ≠ ö (\u00f6).
-test262/test/built-ins/String/prototype/localeCompare/15.5.4.9_CE.js:62: strict mode: Test262Error: String.prototype.localeCompare considers ö (\u006f\u0308) ≠ ö (\u00f6).
test262/test/built-ins/TypedArray/prototype/sort/sort-tonumber.js:30: TypeError: ArrayBuffer is detached (Testing with Float64Array.)
test262/test/built-ins/TypedArray/prototype/sort/sort-tonumber.js:30: strict mode: TypeError: ArrayBuffer is detached (Testing with Float64Array.)
test262/test/language/expressions/assignment/target-member-computed-reference-null.js:32: Test262Error: Expected a DummyError but got a TypeError