diff options
author | bellard <6490144+bellard@users.noreply.github.com> | 2020-09-06 18:53:08 +0200 |
---|---|---|
committer | bellard <6490144+bellard@users.noreply.github.com> | 2020-09-06 18:53:08 +0200 |
commit | 91459fb6723e29e923380cec0023af93819ae69d (patch) | |
tree | 6a1aff8d9b290ed184d1481da50d0e6b4a9a324c /libunicode.h | |
parent | 9096e544ba2357eeadc6f09fc6e5cf58db7751bc (diff) | |
download | quickjs-91459fb6723e29e923380cec0023af93819ae69d.tar.gz quickjs-91459fb6723e29e923380cec0023af93819ae69d.zip |
2020-01-05 release
Diffstat (limited to 'libunicode.h')
-rw-r--r-- | libunicode.h | 124 |
1 files changed, 124 insertions, 0 deletions
diff --git a/libunicode.h b/libunicode.h new file mode 100644 index 0000000..cfa600a --- /dev/null +++ b/libunicode.h @@ -0,0 +1,124 @@ +/* + * Unicode utilities + * + * Copyright (c) 2017-2018 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef LIBUNICODE_H +#define LIBUNICODE_H + +#include <inttypes.h> + +#define LRE_BOOL int /* for documentation purposes */ + +/* define it to include all the unicode tables (40KB larger) */ +#define CONFIG_ALL_UNICODE + +#define LRE_CC_RES_LEN_MAX 3 + +typedef enum { + UNICODE_NFC, + UNICODE_NFD, + UNICODE_NFKC, + UNICODE_NFKD, +} UnicodeNormalizationEnum; + +int lre_case_conv(uint32_t *res, uint32_t c, int conv_type); +LRE_BOOL lre_is_cased(uint32_t c); +LRE_BOOL lre_is_case_ignorable(uint32_t c); + +/* char ranges */ + +typedef struct { + int len; /* in points, always even */ + int size; + uint32_t *points; /* points sorted by increasing value */ + void *mem_opaque; + void *(*realloc_func)(void *opaque, void *ptr, size_t size); +} CharRange; + +typedef enum { + CR_OP_UNION, + CR_OP_INTER, + CR_OP_XOR, +} CharRangeOpEnum; + +void cr_init(CharRange *cr, void *mem_opaque, void *(*realloc_func)(void *opaque, void *ptr, size_t size)); +void cr_free(CharRange *cr); +int cr_realloc(CharRange *cr, int size); +int cr_copy(CharRange *cr, const CharRange *cr1); + +static inline int cr_add_point(CharRange *cr, uint32_t v) +{ + if (cr->len >= cr->size) { + if (cr_realloc(cr, cr->len + 1)) + return -1; + } + cr->points[cr->len++] = v; + return 0; +} + +static inline int cr_add_interval(CharRange *cr, uint32_t c1, uint32_t c2) +{ + if ((cr->len + 2) > cr->size) { + if (cr_realloc(cr, cr->len + 2)) + return -1; + } + cr->points[cr->len++] = c1; + cr->points[cr->len++] = c2; + return 0; +} + +int cr_union1(CharRange *cr, const uint32_t *b_pt, int b_len); + +static inline int cr_union_interval(CharRange *cr, uint32_t c1, uint32_t c2) +{ + uint32_t b_pt[2]; + b_pt[0] = c1; + b_pt[1] = c2 + 1; + return cr_union1(cr, b_pt, 2); +} + +int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len, + const uint32_t *b_pt, int b_len, int op); + +int cr_invert(CharRange *cr); + +#ifdef CONFIG_ALL_UNICODE + +LRE_BOOL lre_is_id_start(uint32_t c); +LRE_BOOL lre_is_id_continue(uint32_t c); + +int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len, + UnicodeNormalizationEnum n_type, + void *opaque, void *(*realloc_func)(void *opaque, void *ptr, size_t size)); + +/* Unicode character range functions */ + +int unicode_script(CharRange *cr, + const char *script_name, LRE_BOOL is_ext); +int unicode_general_category(CharRange *cr, const char *gc_name); +int unicode_prop(CharRange *cr, const char *prop_name); + +#endif /* CONFIG_ALL_UNICODE */ + +#undef LRE_BOOL + +#endif /* LIBUNICODE_H */ |