summaryrefslogtreecommitdiff
path: root/unicode_gen.c
diff options
context:
space:
mode:
authorCharlie Gordon <github@chqrlie.org>2024-02-10 16:18:11 +0100
committerCharlie Gordon <github@chqrlie.org>2024-02-10 16:19:17 +0100
commit37bd4ae62db064984a5956ab534920dfa21e3c5d (patch)
tree3bfd1df7e0e68e53771fab0fca9e17a1ef7b8156 /unicode_gen.c
parent6f480abbc8b2abe91fcc0fa58aa07c367e1dcb36 (diff)
downloadquickjs-37bd4ae62db064984a5956ab534920dfa21e3c5d.tar.gz
quickjs-37bd4ae62db064984a5956ab534920dfa21e3c5d.zip
Strip trailing spaces
Diffstat (limited to 'unicode_gen.c')
-rw-r--r--unicode_gen.c140
1 files changed, 70 insertions, 70 deletions
diff --git a/unicode_gen.c b/unicode_gen.c
index 54da2c1..1d2be2a 100644
--- a/unicode_gen.c
+++ b/unicode_gen.c
@@ -1,6 +1,6 @@
/*
* Generation of Unicode tables
- *
+ *
* Copyright (c) 2017-2018 Fabrice Bellard
* Copyright (c) 2017-2018 Charlie Gordon
*
@@ -291,7 +291,7 @@ void parse_unicode_data(const char *filename)
const char *p;
int code, lc, uc, last_code;
CCInfo *ci, *tab = unicode_db;
-
+
f = fopen(filename, "rb");
if (!f) {
perror(filename);
@@ -314,7 +314,7 @@ void parse_unicode_data(const char *filename)
code = strtoul(p, NULL, 16);
lc = 0;
uc = 0;
-
+
p = get_field(line, 12);
if (p && *p != ';') {
uc = strtoul(p, NULL, 16);
@@ -350,7 +350,7 @@ void parse_unicode_data(const char *filename)
}
ci->general_category = i;
}
-
+
p = get_field(line, 3);
if (p && *p != ';' && *p != '\0') {
int cc;
@@ -402,7 +402,7 @@ void parse_unicode_data(const char *filename)
if (p && *p == 'Y') {
set_prop(code, PROP_Bidi_Mirrored, 1);
}
-
+
/* handle ranges */
get_field_buf(buf1, sizeof(buf1), line, 1);
if (strstr(buf1, " Last>")) {
@@ -416,7 +416,7 @@ void parse_unicode_data(const char *filename)
}
last_code = code;
}
-
+
fclose(f);
}
@@ -427,7 +427,7 @@ void parse_special_casing(CCInfo *tab, const char *filename)
const char *p;
int code;
CCInfo *ci;
-
+
f = fopen(filename, "rb");
if (!f) {
perror(filename);
@@ -458,8 +458,8 @@ void parse_special_casing(CCInfo *tab, const char *filename)
if (*p != '#' && *p != '\0')
continue;
}
-
-
+
+
p = get_field(line, 1);
if (p && *p != ';') {
ci->l_len = 0;
@@ -492,7 +492,7 @@ void parse_special_casing(CCInfo *tab, const char *filename)
ci->u_len = 0;
}
}
-
+
fclose(f);
}
@@ -503,7 +503,7 @@ void parse_case_folding(CCInfo *tab, const char *filename)
const char *p;
int code, status;
CCInfo *ci;
-
+
f = fopen(filename, "rb");
if (!f) {
perror(filename);
@@ -535,7 +535,7 @@ void parse_case_folding(CCInfo *tab, const char *filename)
status = *p;
if (status != 'C' && status != 'S' && status != 'F')
continue;
-
+
p = get_field(line, 2);
assert(p != NULL);
if (status == 'S') {
@@ -555,7 +555,7 @@ void parse_case_folding(CCInfo *tab, const char *filename)
ci->f_data[ci->f_len++] = strtoul(p, (char **)&p, 16);
}
}
-
+
fclose(f);
}
@@ -564,7 +564,7 @@ void parse_composition_exclusions(const char *filename)
FILE *f;
char line[4096], *p;
uint32_t c0;
-
+
f = fopen(filename, "rb");
if (!f) {
perror(filename);
@@ -592,7 +592,7 @@ void parse_derived_core_properties(const char *filename)
char line[4096], *p, buf[256], *q;
uint32_t c0, c1, c;
int i;
-
+
f = fopen(filename, "rb");
if (!f) {
perror(filename);
@@ -648,7 +648,7 @@ void parse_derived_norm_properties(const char *filename)
FILE *f;
char line[4096], *p, buf[256], *q;
uint32_t c0, c1, c;
-
+
f = fopen(filename, "rb");
if (!f) {
perror(filename);
@@ -698,7 +698,7 @@ void parse_prop_list(const char *filename)
char line[4096], *p, buf[256], *q;
uint32_t c0, c1, c;
int i;
-
+
f = fopen(filename, "rb");
if (!f) {
perror(filename);
@@ -752,7 +752,7 @@ void parse_scripts(const char *filename)
char line[4096], *p, buf[256], *q;
uint32_t c0, c1, c;
int i;
-
+
f = fopen(filename, "rb");
if (!f) {
perror(filename);
@@ -807,7 +807,7 @@ void parse_script_extensions(const char *filename)
int i;
uint8_t script_ext[255];
int script_ext_len;
-
+
f = fopen(filename, "rb");
if (!f) {
perror(filename);
@@ -972,7 +972,7 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
ci1 = &tab[code + 1];
ci2 = &tab[code + 2];
te->code = code;
-
+
if (ci->l_len == 1 && ci->l_data[0] == code + 2 &&
ci->f_len == 1 && ci->f_data[0] == ci->l_data[0] &&
ci->u_len == 0 &&
@@ -1140,7 +1140,7 @@ void find_run_type(TableEntry *te, CCInfo *tab, int code)
te->data = 0;
return;
}
-
+
ci = &tab[code];
is_lower = ci->l_len > 0;
len = 1;
@@ -1221,7 +1221,7 @@ void build_conv_table(CCInfo *tab)
int code, i, j;
CCInfo *ci;
TableEntry *te;
-
+
te = conv_table;
for(code = 0; code <= CHARCODE_MAX; code++) {
ci = &tab[code];
@@ -1245,7 +1245,7 @@ void build_conv_table(CCInfo *tab)
for(i = 0; i < conv_table_len; i++) {
int data_index;
te = &conv_table[i];
-
+
switch(te->type) {
case RUN_TYPE_U:
case RUN_TYPE_L:
@@ -1374,7 +1374,7 @@ static int sp_cc_cmp(const void *p1, const void *p2)
return memcmp(c1->f_data, c2->f_data, sizeof(c1->f_data[0]) * c1->f_len);
}
}
-
+
/* dump the case special cases (multi character results which are
identical and need specific handling in lre_canonicalize() */
void dump_case_folding_special_cases(CCInfo *tab)
@@ -1394,7 +1394,7 @@ void dump_case_folding_special_cases(CCInfo *tab)
len = 1;
while ((i + len) <= CHARCODE_MAX && !sp_cc_cmp(&perm[i], &perm[i + len]))
len++;
-
+
if (len > 1) {
for(j = i; j < i + len; j++)
dump_cc_info(&tab[perm[j]], perm[j]);
@@ -1405,7 +1405,7 @@ void dump_case_folding_special_cases(CCInfo *tab)
free(perm);
global_tab = NULL;
}
-
+
int tabcmp(const int *tab1, const int *tab2, int n)
{
@@ -1490,7 +1490,7 @@ void build_prop_table(FILE *f, int prop_index, BOOL add_index)
const uint32_t *buf;
int buf_len, block_end_pos, bit;
char cname[128];
-
+
dbuf_init(dbuf1);
for(i = 0; i <= CHARCODE_MAX;) {
@@ -1506,15 +1506,15 @@ void build_prop_table(FILE *f, int prop_index, BOOL add_index)
dbuf_put_u32(dbuf1, n - 1);
i += n;
}
-
+
dbuf_init(dbuf);
dbuf_init(dbuf2);
buf = (uint32_t *)dbuf1->buf;
buf_len = dbuf1->size / sizeof(buf[0]);
-
+
/* the first value is assumed to be 0 */
assert(get_prop(0, prop_index) == 0);
-
+
block_end_pos = PROP_BLOCK_LEN;
i = 0;
code = 0;
@@ -1575,7 +1575,7 @@ void build_prop_table(FILE *f, int prop_index, BOOL add_index)
snprintf(cname, sizeof(cname), "unicode_prop_%s_index", unicode_prop_name[prop_index]);
dump_byte_table(f, cname, dbuf2->buf, dbuf2->size);
}
-
+
dbuf_free(dbuf);
dbuf_free(dbuf1);
dbuf_free(dbuf2);
@@ -1690,7 +1690,7 @@ void build_general_category_table(FILE *f)
printf(" %d", cw_len_count[i]);
printf(" ], length=%d bytes\n", (int)dbuf->size);
#endif
-
+
dump_byte_table(f, "unicode_gc_table", dbuf->buf, dbuf->size);
dbuf_free(dbuf);
@@ -1760,7 +1760,7 @@ void build_script_table(FILE *f)
printf(" %d", cw_len_count[i]);
printf(" ], length=%d bytes\n", (int)dbuf->size);
#endif
-
+
dump_byte_table(f, "unicode_script_table", dbuf->buf, dbuf->size);
dbuf_free(dbuf);
@@ -1810,7 +1810,7 @@ void build_script_ext_table(FILE *f)
cw_count);
printf(", length=%d bytes\n", (int)dbuf->size);
#endif
-
+
dump_byte_table(f, "unicode_script_ext_table", dbuf->buf, dbuf->size);
dbuf_free(dbuf);
@@ -1822,7 +1822,7 @@ void build_script_ext_table(FILE *f)
void build_prop_list_table(FILE *f)
{
int i;
-
+
for(i = 0; i < PROP_TABLE_COUNT; i++) {
if (i == PROP_ID_Start ||
i == PROP_Case_Ignorable ||
@@ -1832,7 +1832,7 @@ void build_prop_list_table(FILE *f)
build_prop_table(f, i, FALSE);
}
}
-
+
fprintf(f, "typedef enum {\n");
for(i = 0; i < PROP_COUNT; i++)
fprintf(f, " UNICODE_PROP_%s,\n", unicode_prop_name[i]);
@@ -1870,7 +1870,7 @@ void check_case_conv(void)
int l, error;
CCInfo ci_s, *ci1, *ci = &ci_s;
int code;
-
+
for(code = 0; code <= CHARCODE_MAX; code++) {
ci1 = &tab[code];
*ci = *ci1;
@@ -1986,7 +1986,7 @@ void build_cc_table(FILE *f)
DynBuf dbuf1_s, *dbuf1 = &dbuf1_s;
int cw_len_tab[3], cw_start, block_end_pos;
uint32_t v;
-
+
dbuf_init(dbuf);
dbuf_init(dbuf1);
cc_table_len = 0;
@@ -2058,7 +2058,7 @@ void build_cc_table(FILE *f)
dbuf_putc(dbuf1, v);
dbuf_putc(dbuf1, v >> 8);
dbuf_putc(dbuf1, v >> 16);
-
+
dump_byte_table(f, "unicode_cc_table", dbuf->buf, dbuf->size);
dump_byte_table(f, "unicode_cc_index", dbuf1->buf, dbuf1->size);
@@ -2163,7 +2163,7 @@ const int decomp_incr_tab[4][4] = {
/*
entry size:
type bits
- code 18
+ code 18
len 7
compat 1
type 5
@@ -2272,7 +2272,7 @@ void find_decomp_run(DecompEntry *tab_de, int i)
DecompEntry de_s, *de = &de_s;
CCInfo *ci, *ci1, *ci2;
int l, j, n, len_max;
-
+
ci = &unicode_db[i];
l = ci->decomp_len;
if (l == 0) {
@@ -2283,12 +2283,12 @@ void find_decomp_run(DecompEntry *tab_de, int i)
/* the offset for the compose table has only 6 bits, so we must
limit if it can be used by the compose table */
if (!ci->is_compat && !ci->is_excluded && l == 2)
- len_max = 64;
+ len_max = 64;
else
len_max = 127;
-
+
tab_de[i].cost = 0x7fffffff;
-
+
if (!is_16bit(ci->decomp_data, l)) {
assert(l <= 2);
@@ -2331,7 +2331,7 @@ void find_decomp_run(DecompEntry *tab_de, int i)
if (de->cost < tab_de[i].cost) {
tab_de[i] = *de;
}
-
+
if (!((i + n) <= CHARCODE_MAX && n < len_max))
break;
ci1 = &unicode_db[i + n];
@@ -2344,7 +2344,7 @@ void find_decomp_run(DecompEntry *tab_de, int i)
n++;
}
}
-
+
if (l <= 8 || l == 18) {
int c_min, c_max, c;
c_min = c_max = -1;
@@ -2415,7 +2415,7 @@ void find_decomp_run(DecompEntry *tab_de, int i)
/* check if a single char is increasing */
if (l <= 4) {
int idx1, idx;
-
+
for(idx1 = 1; (idx = decomp_incr_tab[l - 1][idx1]) >= 0; idx1++) {
n = 1;
for(;;) {
@@ -2499,7 +2499,7 @@ void find_decomp_run(DecompEntry *tab_de, int i)
if (l == 2) {
BOOL is_16bit;
-
+
n = 0;
is_16bit = FALSE;
for(;;) {
@@ -2544,7 +2544,7 @@ void add_decomp_data(uint8_t *data_buf, int *pidx, DecompEntry *de)
{
int i, j, idx, c;
CCInfo *ci;
-
+
idx = *pidx;
de->data_index = idx;
if (de->type <= DECOMP_TYPE_C1) {
@@ -2695,9 +2695,9 @@ void build_decompose_table(FILE *f)
int i, array_len, code_max, data_len, count;
DecompEntry *tab_de, de_s, *de = &de_s;
uint8_t *data_buf;
-
+
code_max = CHARCODE_MAX;
-
+
tab_de = mallocz((code_max + 2) * sizeof(*tab_de));
for(i = code_max; i >= 0; i--) {
@@ -2721,7 +2721,7 @@ void build_decompose_table(FILE *f)
/* dump */
{
int size, size1;
-
+
printf("START LEN TYPE L C SIZE\n");
size = 0;
for(i = 0; i <= code_max; i++) {
@@ -2735,7 +2735,7 @@ void build_decompose_table(FILE *f)
size += size1;
}
}
-
+
printf("array_len=%d estimated size=%d bytes actual=%d bytes\n",
array_len, size, array_len * 6 + data_len);
}
@@ -2773,7 +2773,7 @@ void build_decompose_table(FILE *f)
}
}
fprintf(f, "\n};\n\n");
-
+
fprintf(f, "static const uint8_t unicode_decomp_data[%u] = {",
data_len);
for(i = 0; i < data_len; i++) {
@@ -2786,7 +2786,7 @@ void build_decompose_table(FILE *f)
build_compose_table(f, tab_de);
free(data_buf);
-
+
free(tab_de);
}
@@ -2817,7 +2817,7 @@ static int get_decomp_pos(const DecompEntry *tab_de, int c)
{
int i, v, k;
const DecompEntry *de;
-
+
k = 0;
for(i = 0; i <= CHARCODE_MAX; i++) {
de = &tab_de[i];
@@ -2840,14 +2840,14 @@ void build_compose_table(FILE *f, const DecompEntry *tab_de)
{
int i, v, tab_ce_len;
ComposeEntry *ce, *tab_ce;
-
+
tab_ce = malloc(sizeof(*tab_ce) * COMPOSE_LEN_MAX);
tab_ce_len = 0;
for(i = 0; i <= CHARCODE_MAX; i++) {
CCInfo *ci = &unicode_db[i];
if (ci->decomp_len == 2 && !ci->is_compat &&
!ci->is_excluded) {
- assert(tab_ce_len < COMPOSE_LEN_MAX);
+ assert(tab_ce_len < COMPOSE_LEN_MAX);
ce = &tab_ce[tab_ce_len++];
ce->c[0] = ci->decomp_data[0];
ce->c[1] = ci->decomp_data[1];
@@ -2865,7 +2865,7 @@ void build_compose_table(FILE *f, const DecompEntry *tab_de)
}
}
#endif
-
+
fprintf(f, "static const uint16_t unicode_comp_table[%u] = {",
tab_ce_len);
for(i = 0; i < tab_ce_len; i++) {
@@ -2880,7 +2880,7 @@ void build_compose_table(FILE *f, const DecompEntry *tab_de)
fprintf(f, " 0x%04x,", v);
}
fprintf(f, "\n};\n\n");
-
+
free(tab_ce);
}
@@ -2929,7 +2929,7 @@ void check_compose_table(void)
}
}
}
-
+
}
@@ -2969,7 +2969,7 @@ void check_cc_table(void)
#ifdef PROFILE
{
int64_t ti, count;
-
+
ti = get_time_ns();
count = 0;
/* only do it on meaningful chars */
@@ -2992,7 +2992,7 @@ void normalization_test(const char *filename)
int *in_str, *nfc_str, *nfd_str, *nfkc_str, *nfkd_str;
int in_len, nfc_len, nfd_len, nfkc_len, nfkd_len;
int *buf, buf_len, pos;
-
+
f = fopen(filename, "rb");
if (!f) {
perror(filename);
@@ -3023,7 +3023,7 @@ void normalization_test(const char *filename)
buf_len = unicode_normalize((uint32_t **)&buf, (uint32_t *)in_str, in_len, UNICODE_NFKD, NULL, NULL);
check_str("nfkd", pos, in_str, in_len, buf, buf_len, nfkd_str, nfkd_len);
free(buf);
-
+
buf_len = unicode_normalize((uint32_t **)&buf, (uint32_t *)in_str, in_len, UNICODE_NFC, NULL, NULL);
check_str("nfc", pos, in_str, in_len, buf, buf_len, nfc_str, nfc_len);
free(buf);
@@ -3046,7 +3046,7 @@ int main(int argc, char **argv)
{
const char *unicode_db_path, *outfilename;
char filename[1024];
-
+
if (argc < 2) {
printf("usage: %s unicode_db_path [output_file]\n"
"\n"
@@ -3067,13 +3067,13 @@ int main(int argc, char **argv)
snprintf(filename, sizeof(filename), "%s/SpecialCasing.txt", unicode_db_path);
parse_special_casing(unicode_db, filename);
-
+
snprintf(filename, sizeof(filename), "%s/CaseFolding.txt", unicode_db_path);
parse_case_folding(unicode_db, filename);
snprintf(filename, sizeof(filename), "%s/CompositionExclusions.txt", unicode_db_path);
parse_composition_exclusions(filename);
-
+
snprintf(filename, sizeof(filename), "%s/DerivedCoreProperties.txt", unicode_db_path);
parse_derived_core_properties(filename);
@@ -3089,7 +3089,7 @@ int main(int argc, char **argv)
snprintf(filename, sizeof(filename), "%s/ScriptExtensions.txt",
unicode_db_path);
parse_script_extensions(filename);
-
+
snprintf(filename, sizeof(filename), "%s/emoji-data.txt",
unicode_db_path);
parse_prop_list(filename);
@@ -3098,7 +3098,7 @@ int main(int argc, char **argv)
build_conv_table(unicode_db);
#ifdef DUMP_CASE_FOLDING_SPECIAL_CASES
- dump_case_folding_special_cases(unicode_db);
+ dump_case_folding_special_cases(unicode_db);
#endif
if (!outfilename) {
@@ -3117,7 +3117,7 @@ int main(int argc, char **argv)
} else
{
FILE *fo = fopen(outfilename, "wb");
-
+
if (!fo) {
perror(outfilename);
exit(1);