summaryrefslogtreecommitdiff
path: root/libunicode-table.h
diff options
context:
space:
mode:
authorCharlie Gordon <github@chqrlie.org>2024-05-05 12:10:24 +0200
committerGitHub <noreply@github.com>2024-05-05 12:10:24 +0200
commit1402478d8d280a1a62dfb76327dd569d6307a025 (patch)
treee9f4f938b677b069db88d1004f3dcb4ab3d88755 /libunicode-table.h
parent3b45d155c77bbdfe9177b1e03db830d2aff0b2a8 (diff)
downloadquickjs-1402478d8d280a1a62dfb76327dd569d6307a025.tar.gz
quickjs-1402478d8d280a1a62dfb76327dd569d6307a025.zip
Improve unicode table handling (#286)
- Document table and index formats - Add size statistics - Fix UBSAN issue in `get_le24()` Fixes #285
Diffstat (limited to 'libunicode-table.h')
-rw-r--r--libunicode-table.h161
1 files changed, 116 insertions, 45 deletions
diff --git a/libunicode-table.h b/libunicode-table.h
index 513ed94..72d495e 100644
--- a/libunicode-table.h
+++ b/libunicode-table.h
@@ -189,9 +189,13 @@ static const uint8_t unicode_prop_Cased1_table[196] = {
};
static const uint8_t unicode_prop_Cased1_index[21] = {
- 0xb9, 0x02, 0xe0, 0xc0, 0x1d, 0x20, 0xe5, 0x2c,
- 0x20, 0xb1, 0x07, 0x21, 0xc1, 0xd6, 0x21, 0x4a,
- 0xf1, 0x01, 0x8a, 0xf1, 0x01,
+ 0xb9, 0x02, 0xe0, // 002B9 at 39
+ 0xc0, 0x1d, 0x20, // 01DC0 at 65
+ 0xe5, 0x2c, 0x20, // 02CE5 at 97
+ 0xb1, 0x07, 0x21, // 107B1 at 129
+ 0xc1, 0xd6, 0x21, // 1D6C1 at 161
+ 0x4a, 0xf1, 0x01, // 1F14A at 192
+ 0x8a, 0xf1, 0x01, // 1F18A at 224 (upper bound)
};
static const uint8_t unicode_prop_Case_Ignorable_table[737] = {
@@ -291,15 +295,29 @@ static const uint8_t unicode_prop_Case_Ignorable_table[737] = {
};
static const uint8_t unicode_prop_Case_Ignorable_index[69] = {
- 0xbe, 0x05, 0x00, 0xfe, 0x07, 0x00, 0x52, 0x0a,
- 0xa0, 0xc1, 0x0b, 0x00, 0x82, 0x0d, 0x00, 0x3f,
- 0x10, 0x80, 0xd4, 0x17, 0x40, 0xcf, 0x1a, 0x20,
- 0xf5, 0x1c, 0x00, 0x80, 0x20, 0x00, 0x16, 0xa0,
- 0x00, 0xc6, 0xa8, 0x00, 0xc2, 0xaa, 0x60, 0x56,
- 0xfe, 0x20, 0xb1, 0x07, 0x01, 0x75, 0x10, 0x01,
- 0xeb, 0x12, 0x21, 0x41, 0x16, 0x01, 0x5c, 0x1a,
- 0x01, 0x43, 0x1f, 0x01, 0x2e, 0xcf, 0x41, 0x25,
- 0xe0, 0x01, 0xf0, 0x01, 0x0e,
+ 0xbe, 0x05, 0x00, // 005BE at 32
+ 0xfe, 0x07, 0x00, // 007FE at 64
+ 0x52, 0x0a, 0xa0, // 00A52 at 101
+ 0xc1, 0x0b, 0x00, // 00BC1 at 128
+ 0x82, 0x0d, 0x00, // 00D82 at 160
+ 0x3f, 0x10, 0x80, // 0103F at 196
+ 0xd4, 0x17, 0x40, // 017D4 at 226
+ 0xcf, 0x1a, 0x20, // 01ACF at 257
+ 0xf5, 0x1c, 0x00, // 01CF5 at 288
+ 0x80, 0x20, 0x00, // 02080 at 320
+ 0x16, 0xa0, 0x00, // 0A016 at 352
+ 0xc6, 0xa8, 0x00, // 0A8C6 at 384
+ 0xc2, 0xaa, 0x60, // 0AAC2 at 419
+ 0x56, 0xfe, 0x20, // 0FE56 at 449
+ 0xb1, 0x07, 0x01, // 107B1 at 480
+ 0x75, 0x10, 0x01, // 11075 at 512
+ 0xeb, 0x12, 0x21, // 112EB at 545
+ 0x41, 0x16, 0x01, // 11641 at 576
+ 0x5c, 0x1a, 0x01, // 11A5C at 608
+ 0x43, 0x1f, 0x01, // 11F43 at 640
+ 0x2e, 0xcf, 0x41, // 1CF2E at 674
+ 0x25, 0xe0, 0x01, // 1E025 at 704
+ 0xf0, 0x01, 0x0e, // E01F0 at 736 (upper bound)
};
static const uint8_t unicode_prop_ID_Start_table[1100] = {
@@ -444,20 +462,41 @@ static const uint8_t unicode_prop_ID_Start_table[1100] = {
};
static const uint8_t unicode_prop_ID_Start_index[105] = {
- 0xf6, 0x03, 0x20, 0xa6, 0x07, 0x00, 0xa9, 0x09,
- 0x20, 0xb1, 0x0a, 0x00, 0xba, 0x0b, 0x20, 0x3b,
- 0x0d, 0x20, 0xc7, 0x0e, 0x20, 0x49, 0x12, 0x00,
- 0x9b, 0x16, 0x00, 0xac, 0x19, 0x00, 0xc0, 0x1d,
- 0x80, 0x80, 0x20, 0x20, 0x70, 0x2d, 0x00, 0x00,
- 0x32, 0x00, 0xda, 0xa7, 0x00, 0x4c, 0xaa, 0x20,
- 0xc7, 0xd7, 0x20, 0xfc, 0xfd, 0x20, 0x9d, 0x02,
- 0x21, 0x96, 0x05, 0x01, 0xf3, 0x08, 0x01, 0xb3,
- 0x0c, 0x21, 0x73, 0x11, 0x61, 0x34, 0x13, 0x01,
- 0x1b, 0x17, 0x21, 0x8a, 0x1a, 0x01, 0x34, 0x1f,
- 0x21, 0xbf, 0x6a, 0x01, 0x23, 0xb1, 0xa1, 0xad,
- 0xd4, 0x01, 0x6f, 0xd7, 0x01, 0xff, 0xe7, 0x61,
- 0x5e, 0xee, 0x01, 0xe1, 0xeb, 0x22, 0xb0, 0x23,
- 0x03,
+ 0xf6, 0x03, 0x20, // 003F6 at 33
+ 0xa6, 0x07, 0x00, // 007A6 at 64
+ 0xa9, 0x09, 0x20, // 009A9 at 97
+ 0xb1, 0x0a, 0x00, // 00AB1 at 128
+ 0xba, 0x0b, 0x20, // 00BBA at 161
+ 0x3b, 0x0d, 0x20, // 00D3B at 193
+ 0xc7, 0x0e, 0x20, // 00EC7 at 225
+ 0x49, 0x12, 0x00, // 01249 at 256
+ 0x9b, 0x16, 0x00, // 0169B at 288
+ 0xac, 0x19, 0x00, // 019AC at 320
+ 0xc0, 0x1d, 0x80, // 01DC0 at 356
+ 0x80, 0x20, 0x20, // 02080 at 385
+ 0x70, 0x2d, 0x00, // 02D70 at 416
+ 0x00, 0x32, 0x00, // 03200 at 448
+ 0xda, 0xa7, 0x00, // 0A7DA at 480
+ 0x4c, 0xaa, 0x20, // 0AA4C at 513
+ 0xc7, 0xd7, 0x20, // 0D7C7 at 545
+ 0xfc, 0xfd, 0x20, // 0FDFC at 577
+ 0x9d, 0x02, 0x21, // 1029D at 609
+ 0x96, 0x05, 0x01, // 10596 at 640
+ 0xf3, 0x08, 0x01, // 108F3 at 672
+ 0xb3, 0x0c, 0x21, // 10CB3 at 705
+ 0x73, 0x11, 0x61, // 11173 at 739
+ 0x34, 0x13, 0x01, // 11334 at 768
+ 0x1b, 0x17, 0x21, // 1171B at 801
+ 0x8a, 0x1a, 0x01, // 11A8A at 832
+ 0x34, 0x1f, 0x21, // 11F34 at 865
+ 0xbf, 0x6a, 0x01, // 16ABF at 896
+ 0x23, 0xb1, 0xa1, // 1B123 at 933
+ 0xad, 0xd4, 0x01, // 1D4AD at 960
+ 0x6f, 0xd7, 0x01, // 1D76F at 992
+ 0xff, 0xe7, 0x61, // 1E7FF at 1027
+ 0x5e, 0xee, 0x01, // 1EE5E at 1056
+ 0xe1, 0xeb, 0x22, // 2EBE1 at 1089
+ 0xb0, 0x23, 0x03, // 323B0 at 1120 (upper bound)
};
static const uint8_t unicode_prop_ID_Continue1_table[660] = {
@@ -547,14 +586,27 @@ static const uint8_t unicode_prop_ID_Continue1_table[660] = {
};
static const uint8_t unicode_prop_ID_Continue1_index[63] = {
- 0xfa, 0x06, 0x00, 0x70, 0x09, 0x00, 0xf0, 0x0a,
- 0x40, 0x57, 0x0c, 0x00, 0xf0, 0x0d, 0x60, 0xc7,
- 0x0f, 0x20, 0xea, 0x17, 0x40, 0x05, 0x1b, 0x00,
- 0x41, 0x20, 0x00, 0x0c, 0xa8, 0x80, 0x37, 0xaa,
- 0x20, 0x50, 0xfe, 0x20, 0x3a, 0x0d, 0x21, 0x74,
- 0x11, 0x01, 0x5a, 0x14, 0x21, 0x44, 0x19, 0x81,
- 0x5a, 0x1d, 0xa1, 0xf5, 0x6a, 0x21, 0x45, 0xd2,
- 0x41, 0xaf, 0xe2, 0x21, 0xf0, 0x01, 0x0e,
+ 0xfa, 0x06, 0x00, // 006FA at 32
+ 0x70, 0x09, 0x00, // 00970 at 64
+ 0xf0, 0x0a, 0x40, // 00AF0 at 98
+ 0x57, 0x0c, 0x00, // 00C57 at 128
+ 0xf0, 0x0d, 0x60, // 00DF0 at 163
+ 0xc7, 0x0f, 0x20, // 00FC7 at 193
+ 0xea, 0x17, 0x40, // 017EA at 226
+ 0x05, 0x1b, 0x00, // 01B05 at 256
+ 0x41, 0x20, 0x00, // 02041 at 288
+ 0x0c, 0xa8, 0x80, // 0A80C at 324
+ 0x37, 0xaa, 0x20, // 0AA37 at 353
+ 0x50, 0xfe, 0x20, // 0FE50 at 385
+ 0x3a, 0x0d, 0x21, // 10D3A at 417
+ 0x74, 0x11, 0x01, // 11174 at 448
+ 0x5a, 0x14, 0x21, // 1145A at 481
+ 0x44, 0x19, 0x81, // 11944 at 516
+ 0x5a, 0x1d, 0xa1, // 11D5A at 549
+ 0xf5, 0x6a, 0x21, // 16AF5 at 577
+ 0x45, 0xd2, 0x41, // 1D245 at 610
+ 0xaf, 0xe2, 0x21, // 1E2AF at 641
+ 0xf0, 0x01, 0x0e, // E01F0 at 672 (upper bound)
};
#ifdef CONFIG_ALL_UNICODE
@@ -676,17 +728,35 @@ static const uint8_t unicode_cc_table[899] = {
};
static const uint8_t unicode_cc_index[87] = {
- 0x4d, 0x03, 0x00, 0x97, 0x05, 0x20, 0xc6, 0x05,
- 0x00, 0xe7, 0x06, 0x00, 0x45, 0x07, 0x00, 0x9c,
- 0x08, 0x00, 0x4d, 0x09, 0x00, 0x3c, 0x0b, 0x00,
- 0x3d, 0x0d, 0x00, 0x36, 0x0f, 0x00, 0x38, 0x10,
- 0x20, 0x3a, 0x19, 0x00, 0xcb, 0x1a, 0x20, 0xd3,
- 0x1c, 0x00, 0xcf, 0x1d, 0x00, 0xe2, 0x20, 0x00,
- 0x2e, 0x30, 0x20, 0x2b, 0xa9, 0x20, 0xed, 0xab,
- 0x00, 0x39, 0x0a, 0x01, 0x51, 0x0f, 0x01, 0x73,
- 0x11, 0x01, 0x75, 0x13, 0x01, 0x2b, 0x17, 0x21,
- 0x3f, 0x1c, 0x21, 0x9e, 0xbc, 0x21, 0x08, 0xe0,
- 0x01, 0x44, 0xe9, 0x01, 0x4b, 0xe9, 0x01,
+ 0x4d, 0x03, 0x00, // 0034D at 32
+ 0x97, 0x05, 0x20, // 00597 at 65
+ 0xc6, 0x05, 0x00, // 005C6 at 96
+ 0xe7, 0x06, 0x00, // 006E7 at 128
+ 0x45, 0x07, 0x00, // 00745 at 160
+ 0x9c, 0x08, 0x00, // 0089C at 192
+ 0x4d, 0x09, 0x00, // 0094D at 224
+ 0x3c, 0x0b, 0x00, // 00B3C at 256
+ 0x3d, 0x0d, 0x00, // 00D3D at 288
+ 0x36, 0x0f, 0x00, // 00F36 at 320
+ 0x38, 0x10, 0x20, // 01038 at 353
+ 0x3a, 0x19, 0x00, // 0193A at 384
+ 0xcb, 0x1a, 0x20, // 01ACB at 417
+ 0xd3, 0x1c, 0x00, // 01CD3 at 448
+ 0xcf, 0x1d, 0x00, // 01DCF at 480
+ 0xe2, 0x20, 0x00, // 020E2 at 512
+ 0x2e, 0x30, 0x20, // 0302E at 545
+ 0x2b, 0xa9, 0x20, // 0A92B at 577
+ 0xed, 0xab, 0x00, // 0ABED at 608
+ 0x39, 0x0a, 0x01, // 10A39 at 640
+ 0x51, 0x0f, 0x01, // 10F51 at 672
+ 0x73, 0x11, 0x01, // 11173 at 704
+ 0x75, 0x13, 0x01, // 11375 at 736
+ 0x2b, 0x17, 0x21, // 1172B at 769
+ 0x3f, 0x1c, 0x21, // 11C3F at 801
+ 0x9e, 0xbc, 0x21, // 1BC9E at 833
+ 0x08, 0xe0, 0x01, // 1E008 at 864
+ 0x44, 0xe9, 0x01, // 1E944 at 896
+ 0x4b, 0xe9, 0x01, // 1E94B at 928 (upper bound)
};
static const uint32_t unicode_decomp_table1[699] = {
@@ -4484,3 +4554,4 @@ static const uint16_t unicode_prop_len_table[] = {
};
#endif /* CONFIG_ALL_UNICODE */
+/* 62 tables / 32261 bytes, 5 index / 345 bytes */