summaryrefslogtreecommitdiff
path: root/quickjs.c
diff options
context:
space:
mode:
authorFabrice Bellard <fabrice@bellard.org>2025-04-10 10:34:40 +0200
committerFabrice Bellard <fabrice@bellard.org>2025-04-10 10:34:40 +0200
commit25ffdb418ea221eaab7605f24aa3087323cd501d (patch)
tree82ed37d9c88edb27d041aaec4c07a563d771ba4e /quickjs.c
parent9d3776d0d45ca437ddb7f9079ae0367102abc90f (diff)
downloadquickjs-25ffdb418ea221eaab7605f24aa3087323cd501d.tar.gz
quickjs-25ffdb418ea221eaab7605f24aa3087323cd501d.zip
fixed the handling of unicode identifiers
Diffstat (limited to 'quickjs.c')
-rw-r--r--quickjs.c24
1 files changed, 17 insertions, 7 deletions
diff --git a/quickjs.c b/quickjs.c
index db5f04c..90d8fe3 100644
--- a/quickjs.c
+++ b/quickjs.c
@@ -2861,14 +2861,26 @@ static JSAtom JS_NewAtomStr(JSContext *ctx, JSString *p)
return __JS_NewAtom(rt, p, JS_ATOM_TYPE_STRING);
}
+/* XXX: optimize */
+static size_t count_ascii(const uint8_t *buf, size_t len)
+{
+ const uint8_t *p, *p_end;
+ p = buf;
+ p_end = buf + len;
+ while (p < p_end && *p < 128)
+ p++;
+ return p - buf;
+}
+
/* str is UTF-8 encoded */
JSAtom JS_NewAtomLen(JSContext *ctx, const char *str, size_t len)
{
JSValue val;
- if (len == 0 || !is_digit(*str)) {
- // XXX: this will not work if UTF-8 encoded str contains non ASCII bytes
- JSAtom atom = __JS_FindAtom(ctx->rt, str, len, JS_ATOM_TYPE_STRING);
+ if (len == 0 ||
+ (!is_digit(*str) &&
+ count_ascii((const uint8_t *)str, len) == len)) {
+ JSAtom atom = __JS_FindAtom(ctx->rt, str, len, JS_ATOM_TYPE_STRING);
if (atom)
return atom;
}
@@ -3810,10 +3822,8 @@ JSValue JS_NewStringLen(JSContext *ctx, const char *buf, size_t buf_len)
p_start = (const uint8_t *)buf;
p_end = p_start + buf_len;
- p = p_start;
- while (p < p_end && *p < 128)
- p++;
- len1 = p - p_start;
+ len1 = count_ascii(p_start, buf_len);
+ p = p_start + len1;
if (len1 > JS_STRING_LEN_MAX)
return JS_ThrowInternalError(ctx, "string too long");
if (p == p_end) {