diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/common/unicode/Makefile | 4 | ||||
-rw-r--r-- | src/common/unicode/generate-unicode_nonspacing_table.pl (renamed from src/common/unicode/generate-unicode_combining_table.pl) | 12 | ||||
-rw-r--r-- | src/common/wchar.c | 8 | ||||
-rw-r--r-- | src/include/common/unicode_nonspacing_table.h (renamed from src/include/common/unicode_combining_table.h) | 33 |
4 files changed, 34 insertions, 23 deletions
diff --git a/src/common/unicode/Makefile b/src/common/unicode/Makefile index 60e01e748f8..382da476cf9 100644 --- a/src/common/unicode/Makefile +++ b/src/common/unicode/Makefile @@ -18,7 +18,7 @@ LIBS += $(PTHREAD_LIBS) # By default, do nothing. all: -update-unicode: unicode_norm_table.h unicode_combining_table.h unicode_east_asian_fw_table.h unicode_normprops_table.h unicode_norm_hashfunc.h +update-unicode: unicode_norm_table.h unicode_nonspacing_table.h unicode_east_asian_fw_table.h unicode_normprops_table.h unicode_norm_hashfunc.h mv $^ $(top_srcdir)/src/include/common/ $(MAKE) normalization-check @@ -35,7 +35,7 @@ unicode_norm_hashfunc.h: unicode_norm_table.h unicode_norm_table.h: generate-unicode_norm_table.pl UnicodeData.txt CompositionExclusions.txt $(PERL) $< -unicode_combining_table.h: generate-unicode_combining_table.pl UnicodeData.txt +unicode_nonspacing_table.h: generate-unicode_nonspacing_table.pl UnicodeData.txt $(PERL) $^ >$@ unicode_east_asian_fw_table.h: generate-unicode_east_asian_fw_table.pl EastAsianWidth.txt diff --git a/src/common/unicode/generate-unicode_combining_table.pl b/src/common/unicode/generate-unicode_nonspacing_table.pl index 8177c20260b..3161eed4a26 100644 --- a/src/common/unicode/generate-unicode_combining_table.pl +++ b/src/common/unicode/generate-unicode_nonspacing_table.pl @@ -15,9 +15,9 @@ my $prev_codepoint; my $count = 0; print - "/* generated by src/common/unicode/generate-unicode_combining_table.pl, do not edit */\n\n"; + "/* generated by src/common/unicode/generate-unicode_nonspacing_table.pl, do not edit */\n\n"; -print "static const struct mbinterval combining[] = {\n"; +print "static const struct mbinterval nonspacing[] = {\n"; foreach my $line (<ARGV>) { @@ -25,9 +25,11 @@ foreach my $line (<ARGV>) my @fields = split ';', $line; $codepoint = hex $fields[0]; - if ($fields[2] eq 'Me' || $fields[2] eq 'Mn') + # Me and Mn refer to combining characters + # Cf refers to format characters + if ($fields[2] eq 'Me' || $fields[2] eq 'Mn' || $fields[2] eq 'Cf') { - # combining character, save for start of range + # non-spacing character, save for start of range if (!defined($range_start)) { $range_start = $codepoint; @@ -35,7 +37,7 @@ foreach my $line (<ARGV>) } else { - # not a combining character, print out previous range if any + # not a non-spacing character, print out previous range if any if (defined($range_start)) { printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint; diff --git a/src/common/wchar.c b/src/common/wchar.c index fa8854d9e9f..3b3fc53009f 100644 --- a/src/common/wchar.c +++ b/src/common/wchar.c @@ -620,7 +620,7 @@ mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max) * value of -1. * * - Non-spacing and enclosing combining characters (general - * category code Mn or Me in the Unicode database) have a + * category code Mn, Me or Cf in the Unicode database) have a * column width of 0. * * - Spacing characters in the East Asian Wide (W) or East Asian @@ -638,7 +638,7 @@ mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max) static int ucs_wcwidth(pg_wchar ucs) { -#include "common/unicode_combining_table.h" +#include "common/unicode_nonspacing_table.h" #include "common/unicode_east_asian_fw_table.h" /* test for 8-bit control characters */ @@ -657,8 +657,8 @@ ucs_wcwidth(pg_wchar ucs) * factor for display width leads to the correct behavior, so do that * search first. */ - if (mbbisearch(ucs, combining, - sizeof(combining) / sizeof(struct mbinterval) - 1)) + if (mbbisearch(ucs, nonspacing, + sizeof(nonspacing) / sizeof(struct mbinterval) - 1)) return 0; /* binary search in table of wide characters */ diff --git a/src/include/common/unicode_combining_table.h b/src/include/common/unicode_nonspacing_table.h index de1eab3a954..7605712abc3 100644 --- a/src/include/common/unicode_combining_table.h +++ b/src/include/common/unicode_nonspacing_table.h @@ -1,6 +1,7 @@ -/* generated by src/common/unicode/generate-unicode_combining_table.pl, do not edit */ +/* generated by src/common/unicode/generate-unicode_nonspacing_table.pl, do not edit */ -static const struct mbinterval combining[] = { +static const struct mbinterval nonspacing[] = { + {0x00AD, 0x00AD}, {0x0300, 0x036F}, {0x0483, 0x0489}, {0x0591, 0x05BD}, @@ -8,13 +9,16 @@ static const struct mbinterval combining[] = { {0x05C1, 0x05C2}, {0x05C4, 0x05C5}, {0x05C7, 0x05C7}, + {0x0600, 0x0605}, {0x0610, 0x061A}, + {0x061C, 0x061C}, {0x064B, 0x065F}, {0x0670, 0x0670}, - {0x06D6, 0x06DC}, + {0x06D6, 0x06DD}, {0x06DF, 0x06E4}, {0x06E7, 0x06E8}, {0x06EA, 0x06ED}, + {0x070F, 0x070F}, {0x0711, 0x0711}, {0x0730, 0x074A}, {0x07A6, 0x07B0}, @@ -25,9 +29,8 @@ static const struct mbinterval combining[] = { {0x0825, 0x0827}, {0x0829, 0x082D}, {0x0859, 0x085B}, - {0x0898, 0x089F}, - {0x08CA, 0x08E1}, - {0x08E3, 0x0902}, + {0x0890, 0x089F}, + {0x08CA, 0x0902}, {0x093A, 0x093A}, {0x093C, 0x093C}, {0x0941, 0x0948}, @@ -114,8 +117,7 @@ static const struct mbinterval combining[] = { {0x17C6, 0x17C6}, {0x17C9, 0x17D3}, {0x17DD, 0x17DD}, - {0x180B, 0x180D}, - {0x180F, 0x180F}, + {0x180B, 0x180F}, {0x1885, 0x1886}, {0x18A9, 0x18A9}, {0x1920, 0x1922}, @@ -152,6 +154,9 @@ static const struct mbinterval combining[] = { {0x1CF4, 0x1CF4}, {0x1CF8, 0x1CF9}, {0x1DC0, 0x1DFF}, + {0x200B, 0x200F}, + {0x202A, 0x202E}, + {0x2060, 0x206F}, {0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F}, @@ -196,6 +201,8 @@ static const struct mbinterval combining[] = { {0xFB1E, 0xFB1E}, {0xFE00, 0xFE0F}, {0xFE20, 0xFE2F}, + {0xFEFF, 0xFEFF}, + {0xFFF9, 0xFFFB}, {0x101FD, 0x101FD}, {0x102E0, 0x102E0}, {0x10376, 0x1037A}, @@ -213,7 +220,8 @@ static const struct mbinterval combining[] = { {0x1107F, 0x11081}, {0x110B3, 0x110B6}, {0x110B9, 0x110BA}, - {0x110C2, 0x110C2}, + {0x110BD, 0x110BD}, + {0x110C2, 0x110CD}, {0x11100, 0x11102}, {0x11127, 0x1112B}, {0x1112D, 0x11134}, @@ -281,15 +289,16 @@ static const struct mbinterval combining[] = { {0x11D95, 0x11D95}, {0x11D97, 0x11D97}, {0x11EF3, 0x11EF4}, + {0x13430, 0x13438}, {0x16AF0, 0x16AF4}, {0x16B30, 0x16B36}, {0x16F4F, 0x16F4F}, {0x16F8F, 0x16F92}, {0x16FE4, 0x16FE4}, {0x1BC9D, 0x1BC9E}, - {0x1CF00, 0x1CF46}, + {0x1BCA0, 0x1CF46}, {0x1D167, 0x1D169}, - {0x1D17B, 0x1D182}, + {0x1D173, 0x1D182}, {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244}, @@ -304,5 +313,5 @@ static const struct mbinterval combining[] = { {0x1E2EC, 0x1E2EF}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A}, - {0xE0100, 0xE01EF}, + {0xE0001, 0xE01EF}, }; |