/*
* nJSVM supports two string variants:
*
- * 1) short strings which size is lesser than 14 bytes, these strings are
- * stored inside njs_value_t (see njs_vm.h for details);
+ * 1) short strings which size is less than or equal to 14 (NJS_STRING_SHORT)
+ * bytes, these strings are stored inside njs_value_t (see njs_vm.h for
+ * details);
*
* 2) and long strings using additional njs_string_t structure.
* This structure has the start field to support external strings.
#define NJS_STRING_MAX_LENGTH 0x7fffffff
/*
- * Should be power of two to use shift and binary and operations instead of
- * division and remainder operations but no less than 16 because the maximum
- * length of short string inlined in njs_value_t is less than 16 bytes.
+ * NJS_STRING_MAP_STRIDE should be power of two to use shift and binary
+ * AND operations instead of division and remainder operations but no
+ * less than 16 because the maximum length of short string inlined in
+ * njs_value_t is less than 16 bytes.
*/
#define NJS_STRING_MAP_STRIDE 32
(((length - 1) / NJS_STRING_MAP_STRIDE) * sizeof(uint32_t))
/*
- * The JavaScript standard states that strings are stored in UTF-16.
- * nJSVM allows to store any byte sequences in strings. A size of the
- * string in bytes is stored in the size field. If a byte sequence is
- * valid UTF-8 string then its length is stored in the UTF-8 length field.
- * Otherwise, the length field is zero. If a string is UTF-8 string then
- * string functions work with UTF-8 characters positions and lengths.
- * Othersise they work with byte positions and lengths. Using UTF-8
- * encoding does not allow to get quickly a character at specified position.
- * To speed up this search a map of offsets is stored after the UTF-8 string.
- * The map is aligned to uint32_t and contains byte positions of each
- * NJS_STRING_MAP_STRIDE UTF-8 character except zero position. The map
- * can be initialized on demand. If a string come outside JavaScript as
- * byte sequnece just to be concatenated or to be used in regular expressions
- * the offset map is not required.
+ * ECMAScript strings are stored in UTF-16. nJSVM however, allows to store
+ * any byte sequences in strings. A size of string in bytes is stored in the
+ * size field. If byte sequence is valid UTF-8 string then its length is
+ * stored in the UTF-8 length field. Otherwise, the length field is zero.
+ * If a string is UTF-8 string then string functions use UTF-8 characters
+ * positions and lengths. Otherwise they use with byte positions and lengths.
+ * Using UTF-8 encoding does not allow to get quickly a character at specified
+ * position. To speed up this search a map of offsets is stored after the
+ * UTF-8 string. The map is aligned to uint32_t and contains byte positions
+ * of each NJS_STRING_MAP_STRIDE UTF-8 character except zero position. The
+ * map can be initialized on demand. Unitialized map is marked with zero
+ * value in the first map element. If string comes outside JavaScript as
+ * byte string just to be concatenated or to match regular expressions the
+ * offset map is not required.
*
* The map is not allocated:
- * 1) if the length is zero hence it is a byte string;
- * 2) if the size and length are equal so the string contains only ASCII
- * characters map is not required;
- * 3) if the length is less than NJS_STRING_MAP_STRIDE.
+ * 1) if string length is zero hence string is a byte string;
+ * 2) if string size and length are equal so the string contains only
+ * ASCII characters and map is not required;
+ * 3) if string length is less than NJS_STRING_MAP_STRIDE.
*
* The current implementation does not support Unicode surrogate pairs.
- * If offset in map points to surrogate pair then the previous offset
- * should be used and so on until start of the string.
+ * It can be implemented later if it will be required using the following
+ * algorithm: if offset in map points to surrogate pair then the previous
+ * offset should be used and so on until start of the string.
*/
struct njs_string_s {