summaryrefslogtreecommitdiff
path: root/quickjs.c
diff options
context:
space:
mode:
authorFabrice Bellard <fabrice@bellard.org>2025-03-25 16:01:40 +0100
committerFabrice Bellard <fabrice@bellard.org>2025-03-25 16:01:40 +0100
commit156d981afeb07d7520ad75685e15b8dabd95f35a (patch)
treef7de0796d6df17ec8d8d12834dd0cce5d6c78892 /quickjs.c
parent372ad84e9a940b94678a3102d015d2f684dd9097 (diff)
downloadquickjs-156d981afeb07d7520ad75685e15b8dabd95f35a.tar.gz
quickjs-156d981afeb07d7520ad75685e15b8dabd95f35a.zip
added string ropes for faster concatenation of long strings (issue #67)
Diffstat (limited to 'quickjs.c')
-rw-r--r--quickjs.c624
1 files changed, 562 insertions, 62 deletions
diff --git a/quickjs.c b/quickjs.c
index 0bcb9fe..f2d62ce 100644
--- a/quickjs.c
+++ b/quickjs.c
@@ -105,6 +105,7 @@
//#define DUMP_MODULE_RESOLVE
//#define DUMP_PROMISE
//#define DUMP_READ_OBJECT
+//#define DUMP_ROPE_REBALANCE
/* test the GC by forcing it before each object allocation */
//#define FORCE_GC_AT_MALLOC
@@ -196,11 +197,24 @@ typedef enum JSErrorEnum {
#define JS_STACK_SIZE_MAX 65534
#define JS_STRING_LEN_MAX ((1 << 30) - 1)
+/* strings <= this length are not concatenated using ropes. if too
+ small, the rope memory overhead becomes high. */
+#define JS_STRING_ROPE_SHORT_LEN 512
+/* specific threshold for initial rope use */
+#define JS_STRING_ROPE_SHORT2_LEN 8192
+/* rope depth at which we rebalance */
+#define JS_STRING_ROPE_MAX_DEPTH 60
+
#define __exception __attribute__((warn_unused_result))
typedef struct JSShape JSShape;
typedef struct JSString JSString;
typedef struct JSString JSAtomStruct;
+typedef struct JSObject JSObject;
+
+#define JS_VALUE_GET_OBJ(v) ((JSObject *)JS_VALUE_GET_PTR(v))
+#define JS_VALUE_GET_STRING(v) ((JSString *)JS_VALUE_GET_PTR(v))
+#define JS_VALUE_GET_STRING_ROPE(v) ((JSStringRope *)JS_VALUE_GET_PTR(v))
typedef enum {
JS_GC_PHASE_NONE,
@@ -483,6 +497,17 @@ struct JSString {
} u;
};
+typedef struct JSStringRope {
+ JSRefCountHeader header; /* must come first, 32-bit */
+ uint32_t len;
+ uint8_t is_wide_char; /* 0 = 8 bits, 1 = 16 bits characters */
+ uint8_t depth; /* max depth of the rope tree */
+ /* XXX: could reduce memory usage by using a direct pointer with
+ bit 0 to select rope or string */
+ JSValue left;
+ JSValue right; /* might be the empty string */
+} JSStringRope;
+
typedef struct JSClosureVar {
uint8_t is_local : 1;
uint8_t is_arg : 1;
@@ -1123,7 +1148,8 @@ static int JS_CreateProperty(JSContext *ctx, JSObject *p,
JSAtom prop, JSValueConst val,
JSValueConst getter, JSValueConst setter,
int flags);
-static int js_string_memcmp(const JSString *p1, const JSString *p2, int len);
+static int js_string_memcmp(const JSString *p1, int pos1, const JSString *p2,
+ int pos2, int len);
static void reset_weak_ref(JSRuntime *rt, JSObject *p);
static JSValue js_array_buffer_constructor3(JSContext *ctx,
JSValueConst new_target,
@@ -2337,6 +2363,17 @@ static uint32_t hash_string(const JSString *str, uint32_t h)
return h;
}
+static uint32_t hash_string_rope(JSValueConst val, uint32_t h)
+{
+ if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING) {
+ return hash_string(JS_VALUE_GET_STRING(val), h);
+ } else {
+ JSStringRope *r = JS_VALUE_GET_STRING_ROPE(val);
+ h = hash_string_rope(r->left, h);
+ return hash_string_rope(r->right, h);
+ }
+}
+
static __maybe_unused void JS_DumpChar(JSRuntime *rt, int c, int sep)
{
if (c == sep || c == '\\') {
@@ -2569,7 +2606,7 @@ static JSAtom __JS_NewAtom(JSRuntime *rt, JSString *str, int atom_type)
if (p->hash == h &&
p->atom_type == atom_type &&
p->len == len &&
- js_string_memcmp(p, str, len) == 0) {
+ js_string_memcmp(p, 0, str, 0, len) == 0) {
if (!__JS_AtomIsConst(i))
p->header.ref_count++;
goto done;
@@ -3645,13 +3682,21 @@ static int string_buffer_concat_value(StringBuffer *s, JSValueConst v)
return -1;
}
if (unlikely(JS_VALUE_GET_TAG(v) != JS_TAG_STRING)) {
- v1 = JS_ToString(s->ctx, v);
- if (JS_IsException(v1))
- return string_buffer_set_error(s);
- p = JS_VALUE_GET_STRING(v1);
- res = string_buffer_concat(s, p, 0, p->len);
- JS_FreeValue(s->ctx, v1);
- return res;
+ if (JS_VALUE_GET_TAG(v) == JS_TAG_STRING_ROPE) {
+ JSStringRope *r = JS_VALUE_GET_STRING_ROPE(v);
+ /* recursion is acceptable because the rope depth is bounded */
+ if (string_buffer_concat_value(s, r->left))
+ return -1;
+ return string_buffer_concat_value(s, r->right);
+ } else {
+ v1 = JS_ToString(s->ctx, v);
+ if (JS_IsException(v1))
+ return string_buffer_set_error(s);
+ p = JS_VALUE_GET_STRING(v1);
+ res = string_buffer_concat(s, p, 0, p->len);
+ JS_FreeValue(s->ctx, v1);
+ return res;
+ }
}
p = JS_VALUE_GET_STRING(v);
return string_buffer_concat(s, p, 0, p->len);
@@ -3961,20 +4006,21 @@ static int memcmp16(const uint16_t *src1, const uint16_t *src2, int len)
return 0;
}
-static int js_string_memcmp(const JSString *p1, const JSString *p2, int len)
+static int js_string_memcmp(const JSString *p1, int pos1, const JSString *p2,
+ int pos2, int len)
{
int res;
if (likely(!p1->is_wide_char)) {
if (likely(!p2->is_wide_char))
- res = memcmp(p1->u.str8, p2->u.str8, len);
+ res = memcmp(p1->u.str8 + pos1, p2->u.str8 + pos2, len);
else
- res = -memcmp16_8(p2->u.str16, p1->u.str8, len);
+ res = -memcmp16_8(p2->u.str16 + pos2, p1->u.str8 + pos1, len);
} else {
if (!p2->is_wide_char)
- res = memcmp16_8(p1->u.str16, p2->u.str8, len);
+ res = memcmp16_8(p1->u.str16 + pos1, p2->u.str8 + pos2, len);
else
- res = memcmp16(p1->u.str16, p2->u.str16, len);
+ res = memcmp16(p1->u.str16 + pos1, p2->u.str16 + pos2, len);
}
return res;
}
@@ -3985,7 +4031,7 @@ static int js_string_compare(JSContext *ctx,
{
int res, len;
len = min_int(p1->len, p2->len);
- res = js_string_memcmp(p1, p2, len);
+ res = js_string_memcmp(p1, 0, p2, 0, len);
if (res == 0) {
if (p1->len == p2->len)
res = 0;
@@ -4071,37 +4117,416 @@ static BOOL JS_ConcatStringInPlace(JSContext *ctx, JSString *p1, JSValueConst op
return FALSE;
}
+static JSValue JS_ConcatString2(JSContext *ctx, JSValue op1, JSValue op2)
+{
+ JSValue ret;
+ JSString *p1, *p2;
+ p1 = JS_VALUE_GET_STRING(op1);
+ if (JS_ConcatStringInPlace(ctx, p1, op2)) {
+ JS_FreeValue(ctx, op2);
+ return op1;
+ }
+ p2 = JS_VALUE_GET_STRING(op2);
+ ret = JS_ConcatString1(ctx, p1, p2);
+ JS_FreeValue(ctx, op1);
+ JS_FreeValue(ctx, op2);
+ return ret;
+}
+
+/* Return the character at position 'idx'. 'val' must be a string or rope */
+static int string_rope_get(JSValueConst val, uint32_t idx)
+{
+ if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING) {
+ return string_get(JS_VALUE_GET_STRING(val), idx);
+ } else {
+ JSStringRope *r = JS_VALUE_GET_STRING_ROPE(val);
+ uint32_t len;
+ if (JS_VALUE_GET_TAG(r->left) == JS_TAG_STRING)
+ len = JS_VALUE_GET_STRING(r->left)->len;
+ else
+ len = JS_VALUE_GET_STRING_ROPE(r->left)->len;
+ if (idx < len)
+ return string_rope_get(r->left, idx);
+ else
+ return string_rope_get(r->right, idx - len);
+ }
+}
+
+typedef struct {
+ JSValueConst stack[JS_STRING_ROPE_MAX_DEPTH];
+ int stack_len;
+} JSStringRopeIter;
+
+static void string_rope_iter_init(JSStringRopeIter *s, JSValueConst val)
+{
+ s->stack_len = 0;
+ s->stack[s->stack_len++] = val;
+}
+
+/* iterate thru a rope and return the strings in order */
+static JSString *string_rope_iter_next(JSStringRopeIter *s)
+{
+ JSValueConst val;
+ JSStringRope *r;
+
+ if (s->stack_len == 0)
+ return NULL;
+ val = s->stack[--s->stack_len];
+ for(;;) {
+ if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING)
+ return JS_VALUE_GET_STRING(val);
+ r = JS_VALUE_GET_STRING_ROPE(val);
+ assert(s->stack_len < JS_STRING_ROPE_MAX_DEPTH);
+ s->stack[s->stack_len++] = r->right;
+ val = r->left;
+ }
+}
+
+static uint32_t string_rope_get_len(JSValueConst val)
+{
+ if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING)
+ return JS_VALUE_GET_STRING(val)->len;
+ else
+ return JS_VALUE_GET_STRING_ROPE(val)->len;
+}
+
+static int js_string_rope_compare(JSContext *ctx, JSValueConst op1,
+ JSValueConst op2, BOOL eq_only)
+{
+ uint32_t len1, len2, len, pos1, pos2, l;
+ int res;
+ JSStringRopeIter it1, it2;
+ JSString *p1, *p2;
+
+ len1 = string_rope_get_len(op1);
+ len2 = string_rope_get_len(op2);
+ /* no need to go further for equality test if
+ different length */
+ if (eq_only && len1 != len2)
+ return 1;
+ len = min_uint32(len1, len2);
+ string_rope_iter_init(&it1, op1);
+ string_rope_iter_init(&it2, op2);
+ p1 = string_rope_iter_next(&it1);
+ p2 = string_rope_iter_next(&it2);
+ pos1 = 0;
+ pos2 = 0;
+ while (len != 0) {
+ l = min_uint32(p1->len - pos1, p2->len - pos2);
+ l = min_uint32(l, len);
+ res = js_string_memcmp(p1, pos1, p2, pos2, l);
+ if (res != 0)
+ return res;
+ len -= l;
+ pos1 += l;
+ if (pos1 >= p1->len) {
+ p1 = string_rope_iter_next(&it1);
+ pos1 = 0;
+ }
+ pos2 += l;
+ if (pos2 >= p2->len) {
+ p2 = string_rope_iter_next(&it2);
+ pos2 = 0;
+ }
+ }
+
+ if (len1 == len2)
+ res = 0;
+ else if (len1 < len2)
+ res = -1;
+ else
+ res = 1;
+ return res;
+}
+
+/* 'rope' must be a rope. return a string and modify the rope so that
+ it won't need to be linearized again. */
+static JSValue js_linearize_string_rope(JSContext *ctx, JSValue rope)
+{
+ StringBuffer b_s, *b = &b_s;
+ JSStringRope *r;
+ JSValue ret;
+
+ r = JS_VALUE_GET_STRING_ROPE(rope);
+
+ /* check whether it is already linearized */
+ if (JS_VALUE_GET_TAG(r->right) == JS_TAG_STRING &&
+ JS_VALUE_GET_STRING(r->right)->len == 0) {
+ ret = JS_DupValue(ctx, r->left);
+ JS_FreeValue(ctx, rope);
+ return ret;
+ }
+ if (string_buffer_init2(ctx, b, r->len, r->is_wide_char))
+ goto fail;
+ if (string_buffer_concat_value(b, rope))
+ goto fail;
+ ret = string_buffer_end(b);
+ if (r->header.ref_count > 1) {
+ /* update the rope so that it won't need to be linearized again */
+ JS_FreeValue(ctx, r->left);
+ JS_FreeValue(ctx, r->right);
+ r->left = JS_DupValue(ctx, ret);
+ r->right = JS_AtomToString(ctx, JS_ATOM_empty_string);
+ }
+ JS_FreeValue(ctx, rope);
+ return ret;
+ fail:
+ JS_FreeValue(ctx, rope);
+ return JS_EXCEPTION;
+}
+
+static JSValue js_rebalancee_string_rope(JSContext *ctx, JSValueConst rope);
+
+/* op1 and op2 must be strings or string ropes */
+static JSValue js_new_string_rope(JSContext *ctx, JSValue op1, JSValue op2)
+{
+ uint32_t len;
+ int is_wide_char, depth;
+ JSStringRope *r;
+ JSValue res;
+
+ if (JS_VALUE_GET_TAG(op1) == JS_TAG_STRING) {
+ JSString *p1 = JS_VALUE_GET_STRING(op1);
+ len = p1->len;
+ is_wide_char = p1->is_wide_char;
+ depth = 0;
+ } else {
+ JSStringRope *r1 = JS_VALUE_GET_STRING_ROPE(op1);
+ len = r1->len;
+ is_wide_char = r1->is_wide_char;
+ depth = r1->depth;
+ }
+
+ if (JS_VALUE_GET_TAG(op2) == JS_TAG_STRING) {
+ JSString *p2 = JS_VALUE_GET_STRING(op2);
+ len += p2->len;
+ is_wide_char |= p2->is_wide_char;
+ } else {
+ JSStringRope *r2 = JS_VALUE_GET_STRING_ROPE(op2);
+ len += r2->len;
+ is_wide_char |= r2->is_wide_char;
+ depth = max_int(depth, r2->depth);
+ }
+ if (len > JS_STRING_LEN_MAX) {
+ JS_ThrowInternalError(ctx, "string too long");
+ goto fail;
+ }
+ r = js_malloc(ctx, sizeof(*r));
+ if (!r)
+ goto fail;
+ r->header.ref_count = 1;
+ r->len = len;
+ r->is_wide_char = is_wide_char;
+ r->depth = depth + 1;
+ r->left = op1;
+ r->right = op2;
+ res = JS_MKPTR(JS_TAG_STRING_ROPE, r);
+ if (r->depth > JS_STRING_ROPE_MAX_DEPTH) {
+ JSValue res2;
+#ifdef DUMP_ROPE_REBALANCE
+ printf("rebalance: initial depth=%d\n", r->depth);
+#endif
+ res2 = js_rebalancee_string_rope(ctx, res);
+#ifdef DUMP_ROPE_REBALANCE
+ if (JS_VALUE_GET_TAG(res2) == JS_TAG_STRING_ROPE)
+ printf("rebalance: final depth=%d\n", JS_VALUE_GET_STRING_ROPE(res2)->depth);
+#endif
+ JS_FreeValue(ctx, res);
+ return res2;
+ } else {
+ return res;
+ }
+ fail:
+ JS_FreeValue(ctx, op1);
+ JS_FreeValue(ctx, op2);
+ return JS_EXCEPTION;
+}
+
+#define ROPE_N_BUCKETS 44
+
+/* Fibonacii numbers starting from F_2 */
+static const uint32_t rope_bucket_len[ROPE_N_BUCKETS] = {
+ 1, 2, 3, 5,
+ 8, 13, 21, 34,
+ 55, 89, 144, 233,
+ 377, 610, 987, 1597,
+ 2584, 4181, 6765, 10946,
+ 17711, 28657, 46368, 75025,
+ 121393, 196418, 317811, 514229,
+ 832040, 1346269, 2178309, 3524578,
+ 5702887, 9227465, 14930352, 24157817,
+ 39088169, 63245986, 102334155, 165580141,
+ 267914296, 433494437, 701408733, 1134903170, /* > JS_STRING_LEN_MAX */
+};
+
+static int js_rebalancee_string_rope_rec(JSContext *ctx, JSValue *buckets,
+ JSValueConst val)
+{
+ if (JS_VALUE_GET_TAG(val) == JS_TAG_STRING) {
+ JSString *p = JS_VALUE_GET_STRING(val);
+ uint32_t len, i;
+ JSValue a, b;
+
+ len = p->len;
+ if (len == 0)
+ return 0; /* nothing to do */
+ /* find the bucket i so that rope_bucket_len[i] <= len <
+ rope_bucket_len[i + 1] and concatenate the ropes in the
+ buckets before */
+ a = JS_NULL;
+ i = 0;
+ while (len >= rope_bucket_len[i + 1]) {
+ b = buckets[i];
+ if (!JS_IsNull(b)) {
+ buckets[i] = JS_NULL;
+ if (JS_IsNull(a)) {
+ a = b;
+ } else {
+ a = js_new_string_rope(ctx, b, a);
+ if (JS_IsException(a))
+ return -1;
+ }
+ }
+ i++;
+ }
+ if (!JS_IsNull(a)) {
+ a = js_new_string_rope(ctx, a, JS_DupValue(ctx, val));
+ if (JS_IsException(a))
+ return -1;
+ } else {
+ a = JS_DupValue(ctx, val);
+ }
+ while (!JS_IsNull(buckets[i])) {
+ a = js_new_string_rope(ctx, buckets[i], a);
+ buckets[i] = JS_NULL;
+ if (JS_IsException(a))
+ return -1;
+ i++;
+ }
+ buckets[i] = a;
+ } else {
+ JSStringRope *r = JS_VALUE_GET_STRING_ROPE(val);
+ js_rebalancee_string_rope_rec(ctx, buckets, r->left);
+ js_rebalancee_string_rope_rec(ctx, buckets, r->right);
+ }
+ return 0;
+}
+
+/* Return a new rope which is balanced. Algorithm from "Ropes: an
+ Alternative to Strings", Hans-J. Boehm, Russ Atkinson and Michael
+ Plass. */
+static JSValue js_rebalancee_string_rope(JSContext *ctx, JSValueConst rope)
+{
+ JSValue buckets[ROPE_N_BUCKETS], a, b;
+ int i;
+
+ for(i = 0; i < ROPE_N_BUCKETS; i++)
+ buckets[i] = JS_NULL;
+ if (js_rebalancee_string_rope_rec(ctx, buckets, rope))
+ goto fail;
+ a = JS_NULL;
+ for(i = 0; i < ROPE_N_BUCKETS; i++) {
+ b = buckets[i];
+ if (!JS_IsNull(b)) {
+ buckets[i] = JS_NULL;
+ if (JS_IsNull(a)) {
+ a = b;
+ } else {
+ a = js_new_string_rope(ctx, b, a);
+ if (JS_IsException(a))
+ goto fail;
+ }
+ }
+ }
+ /* fail safe */
+ if (JS_IsNull(a))
+ return JS_AtomToString(ctx, JS_ATOM_empty_string);
+ else
+ return a;
+ fail:
+ for(i = 0; i < ROPE_N_BUCKETS; i++) {
+ JS_FreeValue(ctx, buckets[i]);
+ }
+ return JS_EXCEPTION;
+}
+
/* op1 and op2 are converted to strings. For convenience, op1 or op2 =
JS_EXCEPTION are accepted and return JS_EXCEPTION. */
static JSValue JS_ConcatString(JSContext *ctx, JSValue op1, JSValue op2)
{
- JSValue ret;
JSString *p1, *p2;
- if (unlikely(JS_VALUE_GET_TAG(op1) != JS_TAG_STRING)) {
+ if (unlikely(JS_VALUE_GET_TAG(op1) != JS_TAG_STRING &&
+ JS_VALUE_GET_TAG(op1) != JS_TAG_STRING_ROPE)) {
op1 = JS_ToStringFree(ctx, op1);
if (JS_IsException(op1)) {
JS_FreeValue(ctx, op2);
return JS_EXCEPTION;
}
}
- if (unlikely(JS_VALUE_GET_TAG(op2) != JS_TAG_STRING)) {
+ if (unlikely(JS_VALUE_GET_TAG(op2) != JS_TAG_STRING &&
+ JS_VALUE_GET_TAG(op2) != JS_TAG_STRING_ROPE)) {
op2 = JS_ToStringFree(ctx, op2);
if (JS_IsException(op2)) {
JS_FreeValue(ctx, op1);
return JS_EXCEPTION;
}
}
- p1 = JS_VALUE_GET_STRING(op1);
- if (JS_ConcatStringInPlace(ctx, p1, op2)) {
- JS_FreeValue(ctx, op2);
- return op1;
+
+ /* normal concatenation for short strings */
+ if (JS_VALUE_GET_TAG(op2) == JS_TAG_STRING) {
+ p2 = JS_VALUE_GET_STRING(op2);
+ if (p2->len == 0) {
+ JS_FreeValue(ctx, op2);
+ return op1;
+ }
+ if (p2->len <= JS_STRING_ROPE_SHORT_LEN) {
+ if (JS_VALUE_GET_TAG(op1) == JS_TAG_STRING) {
+ p1 = JS_VALUE_GET_STRING(op1);
+ if (p1->len <= JS_STRING_ROPE_SHORT2_LEN) {
+ return JS_ConcatString2(ctx, op1, op2);
+ } else {
+ return js_new_string_rope(ctx, op1, op2);
+ }
+ } else {
+ JSStringRope *r1;
+ r1 = JS_VALUE_GET_STRING_ROPE(op1);
+ if (JS_VALUE_GET_TAG(r1->right) == JS_TAG_STRING &&
+ JS_VALUE_GET_STRING(r1->right)->len <= JS_STRING_ROPE_SHORT_LEN) {
+ JSValue val, ret;
+ val = JS_ConcatString2(ctx, JS_DupValue(ctx, r1->right), op2);
+ if (JS_IsException(val)) {
+ JS_FreeValue(ctx, op1);
+ return JS_EXCEPTION;
+ }
+ ret = js_new_string_rope(ctx, JS_DupValue(ctx, r1->left), val);
+ JS_FreeValue(ctx, op1);
+ return ret;
+ }
+ }
+ }
+ } else if (JS_VALUE_GET_TAG(op1) == JS_TAG_STRING) {
+ JSStringRope *r2;
+ p1 = JS_VALUE_GET_STRING(op1);
+ if (p1->len == 0) {
+ JS_FreeValue(ctx, op1);
+ return op2;
+ }
+ r2 = JS_VALUE_GET_STRING_ROPE(op2);
+ if (JS_VALUE_GET_TAG(r2->left) == JS_TAG_STRING &&
+ JS_VALUE_GET_STRING(r2->left)->len <= JS_STRING_ROPE_SHORT_LEN) {
+ JSValue val, ret;
+ val = JS_ConcatString2(ctx, op1, JS_DupValue(ctx, r2->left));
+ if (JS_IsException(val)) {
+ JS_FreeValue(ctx, op2);
+ return JS_EXCEPTION;
+ }
+ ret = js_new_string_rope(ctx, val, JS_DupValue(ctx, r2->right));
+ JS_FreeValue(ctx, op2);
+ return ret;
+ }
}
- p2 = JS_VALUE_GET_STRING(op2);
- ret = JS_ConcatString1(ctx, p1, p2);
- JS_FreeValue(ctx, op1);
- JS_FreeValue(ctx, op2);
- return ret;
+ return js_new_string_rope(ctx, op1, op2);
}
/* Shape support */
@@ -4757,7 +5182,9 @@ static int JS_SetObjectData(JSContext *ctx, JSValueConst obj, JSValue val)
case JS_CLASS_DATE:
case JS_CLASS_BIG_INT:
JS_FreeValue(ctx, p->u.object_data);
- p->u.object_data = val;
+ p->u.object_data = val; /* for JS_CLASS_STRING, 'val' must
+ be JS_TAG_STRING (and not a
+ rope) */
return 0;
}
}
@@ -5376,6 +5803,15 @@ void __JS_FreeValueRT(JSRuntime *rt, JSValue v)
}
}
break;
+ case JS_TAG_STRING_ROPE:
+ /* Note: recursion is acceptable because the rope depth is bounded */
+ {
+ JSStringRope *p = JS_VALUE_GET_STRING_ROPE(v);
+ JS_FreeValueRT(rt, p->left);
+ JS_FreeValueRT(rt, p->right);
+ js_free_rt(rt, p);
+ }
+ break;
case JS_TAG_OBJECT:
case JS_TAG_FUNCTION_BYTECODE:
{
@@ -6750,6 +7186,7 @@ static JSValueConst JS_GetPrototypePrimitive(JSContext *ctx, JSValueConst val)
val = ctx->class_proto[JS_CLASS_BOOLEAN];
break;
case JS_TAG_STRING:
+ case JS_TAG_STRING_ROPE:
val = ctx->class_proto[JS_CLASS_STRING];
break;
case JS_TAG_SYMBOL:
@@ -6949,14 +7386,24 @@ JSValue JS_GetPropertyInternal(JSContext *ctx, JSValueConst obj,
{
JSString *p1 = JS_VALUE_GET_STRING(obj);
if (__JS_AtomIsTaggedInt(prop)) {
- uint32_t idx, ch;
+ uint32_t idx;
idx = __JS_AtomToUInt32(prop);
if (idx < p1->len) {
- if (p1->is_wide_char)
- ch = p1->u.str16[idx];
- else
- ch = p1->u.str8[idx];
- return js_new_string_char(ctx, ch);
+ return js_new_string_char(ctx, string_get(p1, idx));
+ }
+ } else if (prop == JS_ATOM_length) {
+ return JS_NewInt32(ctx, p1->len);
+ }
+ }
+ break;
+ case JS_TAG_STRING_ROPE:
+ {
+ JSStringRope *p1 = JS_VALUE_GET_STRING_ROPE(obj);
+ if (__JS_AtomIsTaggedInt(prop)) {
+ uint32_t idx;
+ idx = __JS_AtomToUInt32(prop);
+ if (idx < p1->len) {
+ return js_new_string_char(ctx, string_rope_get(obj, idx));
}
} else if (prop == JS_ATOM_length) {
return JS_NewInt32(ctx, p1->len);
@@ -7263,13 +7710,12 @@ static uint32_t js_string_obj_get_length(JSContext *ctx,
JSValueConst obj)
{
JSObject *p;
- JSString *p1;
uint32_t len = 0;
/* This is a class exotic method: obj class_id is JS_CLASS_STRING */
p = JS_VALUE_GET_OBJ(obj);
if (JS_VALUE_GET_TAG(p->u.object_data) == JS_TAG_STRING) {
- p1 = JS_VALUE_GET_STRING(p->u.object_data);
+ JSString *p1 = JS_VALUE_GET_STRING(p->u.object_data);
len = p1->len;
}
return len;
@@ -9768,6 +10214,12 @@ static int JS_ToBoolFree(JSContext *ctx, JSValue val)
JS_FreeValue(ctx, val);
return ret;
}
+ case JS_TAG_STRING_ROPE:
+ {
+ BOOL ret = JS_VALUE_GET_STRING_ROPE(val)->len != 0;
+ JS_FreeValue(ctx, val);
+ return ret;
+ }
case JS_TAG_SHORT_BIG_INT:
return JS_VALUE_GET_SHORT_BIG_INT(val) != 0;
case JS_TAG_BIG_INT:
@@ -11558,6 +12010,7 @@ static JSValue JS_ToNumberHintFree(JSContext *ctx, JSValue val,
return JS_EXCEPTION;
goto redo;
case JS_TAG_STRING:
+ case JS_TAG_STRING_ROPE:
{
const char *str;
const char *p;
@@ -12164,7 +12617,7 @@ static JSValue js_dtoa2(JSContext *ctx,
return res;
}
-JSValue JS_ToStringInternal(JSContext *ctx, JSValueConst val, BOOL is_ToPropertyKey)
+static JSValue JS_ToStringInternal(JSContext *ctx, JSValueConst val, BOOL is_ToPropertyKey)
{
uint32_t tag;
const char *str;
@@ -12174,6 +12627,8 @@ JSValue JS_ToStringInternal(JSContext *ctx, JSValueConst val, BOOL is_ToProperty
switch(tag) {
case JS_TAG_STRING:
return JS_DupValue(ctx, val);
+ case JS_TAG_STRING_ROPE:
+ return js_linearize_string_rope(ctx, JS_DupValue(ctx, val));
case JS_TAG_INT:
{
int len;
@@ -12532,6 +12987,12 @@ static __maybe_unused void JS_DumpValueShort(JSRuntime *rt,
JS_DumpString(rt, p);
}
break;
+ case JS_TAG_STRING_ROPE:
+ {
+ JSStringRope *r = JS_VALUE_GET_STRING_ROPE(val);
+ printf("[rope len=%d depth=%d]", r->len, r->depth);
+ }
+ break;
case JS_TAG_FUNCTION_BYTECODE:
{
JSFunctionBytecode *b = JS_VALUE_GET_PTR(val);
@@ -12692,6 +13153,7 @@ static JSValue JS_ToBigIntFree(JSContext *ctx, JSValue val)
val = __JS_NewShortBigInt(ctx, JS_VALUE_GET_INT(val));
break;
case JS_TAG_STRING:
+ case JS_TAG_STRING_ROPE:
val = JS_StringToBigIntErr(ctx, val);
if (JS_IsException(val))
return val;
@@ -13117,6 +13579,11 @@ static no_inline __exception int js_binary_arith_slow(JSContext *ctx, JSValue *s
return -1;
}
+static inline BOOL tag_is_string(uint32_t tag)
+{
+ return tag == JS_TAG_STRING || tag == JS_TAG_STRING_ROPE;
+}
+
static no_inline __exception int js_add_slow(JSContext *ctx, JSValue *sp)
{
JSValue op1, op2;
@@ -13169,7 +13636,7 @@ static no_inline __exception int js_add_slow(JSContext *ctx, JSValue *sp)
tag2 = JS_VALUE_GET_NORM_TAG(op2);
}
- if (tag1 == JS_TAG_STRING || tag2 == JS_TAG_STRING) {
+ if (tag_is_string(tag1) || tag_is_string(tag2)) {
sp[-2] = JS_ConcatString(ctx, op1, op2);
if (JS_IsException(sp[-2]))
goto exception;
@@ -13511,11 +13978,13 @@ static no_inline int js_relational_slow(JSContext *ctx, JSValue *sp,
tag1 = JS_VALUE_GET_NORM_TAG(op1);
tag2 = JS_VALUE_GET_NORM_TAG(op2);
- if (tag1 == JS_TAG_STRING && tag2 == JS_TAG_STRING) {
- JSString *p1, *p2;
- p1 = JS_VALUE_GET_STRING(op1);
- p2 = JS_VALUE_GET_STRING(op2);
- res = js_string_compare(ctx, p1, p2);
+ if (tag_is_string(tag1) && tag_is_string(tag2)) {
+ if (tag1 == JS_TAG_STRING && tag2 == JS_TAG_STRING) {
+ res = js_string_compare(ctx, JS_VALUE_GET_STRING(op1),
+ JS_VALUE_GET_STRING(op2));
+ } else {
+ res = js_string_rope_compare(ctx, op1, op2, FALSE);
+ }
switch(op) {
case OP_lt:
res = (res < 0);
@@ -13539,16 +14008,16 @@ static no_inline int js_relational_slow(JSContext *ctx, JSValue *sp,
goto float64_compare;
} else {
if ((((tag1 == JS_TAG_BIG_INT || tag1 == JS_TAG_SHORT_BIG_INT) &&
- tag2 == JS_TAG_STRING) ||
+ tag_is_string(tag2)) ||
((tag2 == JS_TAG_BIG_INT || tag2 == JS_TAG_SHORT_BIG_INT) &&
- tag1 == JS_TAG_STRING))) {
- if (tag1 == JS_TAG_STRING) {
+ tag_is_string(tag1)))) {
+ if (tag_is_string(tag1)) {
op1 = JS_StringToBigInt(ctx, op1);
if (JS_VALUE_GET_TAG(op1) != JS_TAG_BIG_INT &&
JS_VALUE_GET_TAG(op1) != JS_TAG_SHORT_BIG_INT)
goto invalid_bigint_string;
}
- if (tag2 == JS_TAG_STRING) {
+ if (tag_is_string(tag2)) {
op2 = JS_StringToBigInt(ctx, op2);
if (JS_VALUE_GET_TAG(op2) != JS_TAG_BIG_INT &&
JS_VALUE_GET_TAG(op2) != JS_TAG_SHORT_BIG_INT) {
@@ -13665,18 +14134,21 @@ static no_inline __exception int js_eq_slow(JSContext *ctx, JSValue *sp,
} else if ((tag1 == JS_TAG_NULL && tag2 == JS_TAG_UNDEFINED) ||
(tag2 == JS_TAG_NULL && tag1 == JS_TAG_UNDEFINED)) {
res = TRUE;
- } else if ((tag1 == JS_TAG_STRING && tag_is_number(tag2)) ||
- (tag2 == JS_TAG_STRING && tag_is_number(tag1))) {
+ } else if (tag_is_string(tag1) && tag_is_string(tag2)) {
+ /* needed when comparing strings and ropes */
+ res = js_strict_eq2(ctx, op1, op2, JS_EQ_STRICT);
+ } else if ((tag_is_string(tag1) && tag_is_number(tag2)) ||
+ (tag_is_string(tag2) && tag_is_number(tag1))) {
if (tag1 == JS_TAG_BIG_INT || tag1 == JS_TAG_SHORT_BIG_INT ||
tag2 == JS_TAG_BIG_INT || tag2 == JS_TAG_SHORT_BIG_INT) {
- if (tag1 == JS_TAG_STRING) {
+ if (tag_is_string(tag1)) {
op1 = JS_StringToBigInt(ctx, op1);
if (JS_VALUE_GET_TAG(op1) != JS_TAG_BIG_INT &&
JS_VALUE_GET_TAG(op1) != JS_TAG_SHORT_BIG_INT)
goto invalid_bigint_string;
}
- if (tag2 == JS_TAG_STRING) {
+ if (tag_is_string(tag2)) {
op2 = JS_StringToBigInt(ctx, op2);
if (JS_VALUE_GET_TAG(op2) != JS_TAG_BIG_INT &&
JS_VALUE_GET_TAG(op2) != JS_TAG_SHORT_BIG_INT ) {
@@ -13707,9 +14179,9 @@ static no_inline __exception int js_eq_slow(JSContext *ctx, JSValue *sp,
op2 = JS_NewInt32(ctx, JS_VALUE_GET_INT(op2));
goto redo;
} else if ((tag1 == JS_TAG_OBJECT &&
- (tag_is_number(tag2) || tag2 == JS_TAG_STRING || tag2 == JS_TAG_SYMBOL)) ||
+ (tag_is_number(tag2) || tag_is_string(tag2) || tag2 == JS_TAG_SYMBOL)) ||
(tag2 == JS_TAG_OBJECT &&
- (tag_is_number(tag1) || tag1 == JS_TAG_STRING || tag1 == JS_TAG_SYMBOL))) {
+ (tag_is_number(tag1) || tag_is_string(tag1) || tag1 == JS_TAG_SYMBOL))) {
op1 = JS_ToPrimitiveFree(ctx, op1, HINT_NONE);
if (JS_IsException(op1)) {
JS_FreeValue(ctx, op2);
@@ -13805,14 +14277,15 @@ static BOOL js_strict_eq2(JSContext *ctx, JSValue op1, JSValue op2,
res = (tag1 == tag2);
break;
case JS_TAG_STRING:
+ case JS_TAG_STRING_ROPE:
{
- JSString *p1, *p2;
- if (tag1 != tag2) {
+ if (!tag_is_string(tag2)) {
res = FALSE;
+ } else if (tag1 == JS_TAG_STRING && tag2 == JS_TAG_STRING) {
+ res = (js_string_compare(ctx, JS_VALUE_GET_STRING(op1),
+ JS_VALUE_GET_STRING(op2)) == 0);
} else {
- p1 = JS_VALUE_GET_STRING(op1);
- p2 = JS_VALUE_GET_STRING(op2);
- res = (js_string_compare(ctx, p1, p2) == 0);
+ res = (js_string_rope_compare(ctx, op1, op2, TRUE) == 0);
}
}
break;
@@ -14070,6 +14543,7 @@ static __exception int js_operator_typeof(JSContext *ctx, JSValueConst op1)
atom = JS_ATOM_boolean;
break;
case JS_TAG_STRING:
+ case JS_TAG_STRING_ROPE:
atom = JS_ATOM_string;
break;
case JS_TAG_OBJECT:
@@ -34537,6 +35011,16 @@ static int JS_WriteObjectRec(BCWriterState *s, JSValueConst obj)
JS_WriteString(s, p);
}
break;
+ case JS_TAG_STRING_ROPE:
+ {
+ JSValue str;
+ str = JS_ToString(s->ctx, obj);
+ if (JS_IsException(str))
+ goto fail;
+ JS_WriteObjectRec(s, str);
+ JS_FreeValue(s->ctx, str);
+ }
+ break;
case JS_TAG_FUNCTION_BYTECODE:
if (!s->allow_bytecode)
goto invalid_tag;
@@ -36141,13 +36625,22 @@ static JSValue JS_ToObject(JSContext *ctx, JSValueConst val)
obj = JS_NewObjectClass(ctx, JS_CLASS_NUMBER);
goto set_value;
case JS_TAG_STRING:
+ case JS_TAG_STRING_ROPE:
/* XXX: should call the string constructor */
{
- JSString *p1 = JS_VALUE_GET_STRING(val);
+ JSValue str;
+ str = JS_ToString(ctx, val); /* ensure that we never store a rope */
+ if (JS_IsException(str))
+ return JS_EXCEPTION;
obj = JS_NewObjectClass(ctx, JS_CLASS_STRING);
- JS_DefinePropertyValue(ctx, obj, JS_ATOM_length, JS_NewInt32(ctx, p1->len), 0);
+ if (!JS_IsException(obj)) {
+ JS_DefinePropertyValue(ctx, obj, JS_ATOM_length,
+ JS_NewInt32(ctx, JS_VALUE_GET_STRING(str)->len), 0);
+ JS_SetObjectData(ctx, obj, JS_DupValue(ctx, str));
+ }
+ JS_FreeValue(ctx, str);
+ return obj;
}
- goto set_value;
case JS_TAG_BOOL:
obj = JS_NewObjectClass(ctx, JS_CLASS_BOOLEAN);
goto set_value;
@@ -40415,7 +40908,8 @@ static JSValue js_string_constructor(JSContext *ctx, JSValueConst new_target,
static JSValue js_thisStringValue(JSContext *ctx, JSValueConst this_val)
{
- if (JS_VALUE_GET_TAG(this_val) == JS_TAG_STRING)
+ if (JS_VALUE_GET_TAG(this_val) == JS_TAG_STRING ||
+ JS_VALUE_GET_TAG(this_val) == JS_TAG_STRING_ROPE)
return JS_DupValue(ctx, this_val);
if (JS_VALUE_GET_TAG(this_val) == JS_TAG_OBJECT) {
@@ -44334,6 +44828,7 @@ static JSValue js_json_check(JSContext *ctx, JSONStringifyContext *jsc,
if (JS_IsFunction(ctx, val))
break;
case JS_TAG_STRING:
+ case JS_TAG_STRING_ROPE:
case JS_TAG_INT:
case JS_TAG_FLOAT64:
case JS_TAG_BOOL:
@@ -44508,6 +45003,7 @@ static int js_json_to_str(JSContext *ctx, JSONStringifyContext *jsc,
concat_primitive:
switch (JS_VALUE_GET_NORM_TAG(val)) {
case JS_TAG_STRING:
+ case JS_TAG_STRING_ROPE:
val = JS_ToQuotedStringFree(ctx, val);
if (JS_IsException(val))
goto exception;
@@ -46115,6 +46611,9 @@ static uint32_t map_hash_key(JSValueConst key)
case JS_TAG_STRING:
h = hash_string(JS_VALUE_GET_STRING(key), 0);
break;
+ case JS_TAG_STRING_ROPE:
+ h = hash_string_rope(key, 0);
+ break;
case JS_TAG_OBJECT:
case JS_TAG_SYMBOL:
h = (uintptr_t)JS_VALUE_GET_PTR(key) * 3163;
@@ -49698,6 +50197,7 @@ static JSValue JS_ToBigIntCtorFree(JSContext *ctx, JSValue val)
}
break;
case JS_TAG_STRING:
+ case JS_TAG_STRING_ROPE:
val = JS_StringToBigIntErr(ctx, val);
break;
case JS_TAG_OBJECT: