aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common/unicode/Makefile4
-rw-r--r--src/common/unicode/generate-unicode_nonspacing_table.pl (renamed from src/common/unicode/generate-unicode_combining_table.pl)12
-rw-r--r--src/common/wchar.c8
-rw-r--r--src/include/common/unicode_nonspacing_table.h (renamed from src/include/common/unicode_combining_table.h)33
4 files changed, 34 insertions, 23 deletions
diff --git a/src/common/unicode/Makefile b/src/common/unicode/Makefile
index 60e01e748f8..382da476cf9 100644
--- a/src/common/unicode/Makefile
+++ b/src/common/unicode/Makefile
@@ -18,7 +18,7 @@ LIBS += $(PTHREAD_LIBS)
# By default, do nothing.
all:
-update-unicode: unicode_norm_table.h unicode_combining_table.h unicode_east_asian_fw_table.h unicode_normprops_table.h unicode_norm_hashfunc.h
+update-unicode: unicode_norm_table.h unicode_nonspacing_table.h unicode_east_asian_fw_table.h unicode_normprops_table.h unicode_norm_hashfunc.h
mv $^ $(top_srcdir)/src/include/common/
$(MAKE) normalization-check
@@ -35,7 +35,7 @@ unicode_norm_hashfunc.h: unicode_norm_table.h
unicode_norm_table.h: generate-unicode_norm_table.pl UnicodeData.txt CompositionExclusions.txt
$(PERL) $<
-unicode_combining_table.h: generate-unicode_combining_table.pl UnicodeData.txt
+unicode_nonspacing_table.h: generate-unicode_nonspacing_table.pl UnicodeData.txt
$(PERL) $^ >$@
unicode_east_asian_fw_table.h: generate-unicode_east_asian_fw_table.pl EastAsianWidth.txt
diff --git a/src/common/unicode/generate-unicode_combining_table.pl b/src/common/unicode/generate-unicode_nonspacing_table.pl
index 8177c20260b..3161eed4a26 100644
--- a/src/common/unicode/generate-unicode_combining_table.pl
+++ b/src/common/unicode/generate-unicode_nonspacing_table.pl
@@ -15,9 +15,9 @@ my $prev_codepoint;
my $count = 0;
print
- "/* generated by src/common/unicode/generate-unicode_combining_table.pl, do not edit */\n\n";
+ "/* generated by src/common/unicode/generate-unicode_nonspacing_table.pl, do not edit */\n\n";
-print "static const struct mbinterval combining[] = {\n";
+print "static const struct mbinterval nonspacing[] = {\n";
foreach my $line (<ARGV>)
{
@@ -25,9 +25,11 @@ foreach my $line (<ARGV>)
my @fields = split ';', $line;
$codepoint = hex $fields[0];
- if ($fields[2] eq 'Me' || $fields[2] eq 'Mn')
+ # Me and Mn refer to combining characters
+ # Cf refers to format characters
+ if ($fields[2] eq 'Me' || $fields[2] eq 'Mn' || $fields[2] eq 'Cf')
{
- # combining character, save for start of range
+ # non-spacing character, save for start of range
if (!defined($range_start))
{
$range_start = $codepoint;
@@ -35,7 +37,7 @@ foreach my $line (<ARGV>)
}
else
{
- # not a combining character, print out previous range if any
+ # not a non-spacing character, print out previous range if any
if (defined($range_start))
{
printf "\t{0x%04X, 0x%04X},\n", $range_start, $prev_codepoint;
diff --git a/src/common/wchar.c b/src/common/wchar.c
index fa8854d9e9f..3b3fc53009f 100644
--- a/src/common/wchar.c
+++ b/src/common/wchar.c
@@ -620,7 +620,7 @@ mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
* value of -1.
*
* - Non-spacing and enclosing combining characters (general
- * category code Mn or Me in the Unicode database) have a
+ * category code Mn, Me or Cf in the Unicode database) have a
* column width of 0.
*
* - Spacing characters in the East Asian Wide (W) or East Asian
@@ -638,7 +638,7 @@ mbbisearch(pg_wchar ucs, const struct mbinterval *table, int max)
static int
ucs_wcwidth(pg_wchar ucs)
{
-#include "common/unicode_combining_table.h"
+#include "common/unicode_nonspacing_table.h"
#include "common/unicode_east_asian_fw_table.h"
/* test for 8-bit control characters */
@@ -657,8 +657,8 @@ ucs_wcwidth(pg_wchar ucs)
* factor for display width leads to the correct behavior, so do that
* search first.
*/
- if (mbbisearch(ucs, combining,
- sizeof(combining) / sizeof(struct mbinterval) - 1))
+ if (mbbisearch(ucs, nonspacing,
+ sizeof(nonspacing) / sizeof(struct mbinterval) - 1))
return 0;
/* binary search in table of wide characters */
diff --git a/src/include/common/unicode_combining_table.h b/src/include/common/unicode_nonspacing_table.h
index de1eab3a954..7605712abc3 100644
--- a/src/include/common/unicode_combining_table.h
+++ b/src/include/common/unicode_nonspacing_table.h
@@ -1,6 +1,7 @@
-/* generated by src/common/unicode/generate-unicode_combining_table.pl, do not edit */
+/* generated by src/common/unicode/generate-unicode_nonspacing_table.pl, do not edit */
-static const struct mbinterval combining[] = {
+static const struct mbinterval nonspacing[] = {
+ {0x00AD, 0x00AD},
{0x0300, 0x036F},
{0x0483, 0x0489},
{0x0591, 0x05BD},
@@ -8,13 +9,16 @@ static const struct mbinterval combining[] = {
{0x05C1, 0x05C2},
{0x05C4, 0x05C5},
{0x05C7, 0x05C7},
+ {0x0600, 0x0605},
{0x0610, 0x061A},
+ {0x061C, 0x061C},
{0x064B, 0x065F},
{0x0670, 0x0670},
- {0x06D6, 0x06DC},
+ {0x06D6, 0x06DD},
{0x06DF, 0x06E4},
{0x06E7, 0x06E8},
{0x06EA, 0x06ED},
+ {0x070F, 0x070F},
{0x0711, 0x0711},
{0x0730, 0x074A},
{0x07A6, 0x07B0},
@@ -25,9 +29,8 @@ static const struct mbinterval combining[] = {
{0x0825, 0x0827},
{0x0829, 0x082D},
{0x0859, 0x085B},
- {0x0898, 0x089F},
- {0x08CA, 0x08E1},
- {0x08E3, 0x0902},
+ {0x0890, 0x089F},
+ {0x08CA, 0x0902},
{0x093A, 0x093A},
{0x093C, 0x093C},
{0x0941, 0x0948},
@@ -114,8 +117,7 @@ static const struct mbinterval combining[] = {
{0x17C6, 0x17C6},
{0x17C9, 0x17D3},
{0x17DD, 0x17DD},
- {0x180B, 0x180D},
- {0x180F, 0x180F},
+ {0x180B, 0x180F},
{0x1885, 0x1886},
{0x18A9, 0x18A9},
{0x1920, 0x1922},
@@ -152,6 +154,9 @@ static const struct mbinterval combining[] = {
{0x1CF4, 0x1CF4},
{0x1CF8, 0x1CF9},
{0x1DC0, 0x1DFF},
+ {0x200B, 0x200F},
+ {0x202A, 0x202E},
+ {0x2060, 0x206F},
{0x20D0, 0x20F0},
{0x2CEF, 0x2CF1},
{0x2D7F, 0x2D7F},
@@ -196,6 +201,8 @@ static const struct mbinterval combining[] = {
{0xFB1E, 0xFB1E},
{0xFE00, 0xFE0F},
{0xFE20, 0xFE2F},
+ {0xFEFF, 0xFEFF},
+ {0xFFF9, 0xFFFB},
{0x101FD, 0x101FD},
{0x102E0, 0x102E0},
{0x10376, 0x1037A},
@@ -213,7 +220,8 @@ static const struct mbinterval combining[] = {
{0x1107F, 0x11081},
{0x110B3, 0x110B6},
{0x110B9, 0x110BA},
- {0x110C2, 0x110C2},
+ {0x110BD, 0x110BD},
+ {0x110C2, 0x110CD},
{0x11100, 0x11102},
{0x11127, 0x1112B},
{0x1112D, 0x11134},
@@ -281,15 +289,16 @@ static const struct mbinterval combining[] = {
{0x11D95, 0x11D95},
{0x11D97, 0x11D97},
{0x11EF3, 0x11EF4},
+ {0x13430, 0x13438},
{0x16AF0, 0x16AF4},
{0x16B30, 0x16B36},
{0x16F4F, 0x16F4F},
{0x16F8F, 0x16F92},
{0x16FE4, 0x16FE4},
{0x1BC9D, 0x1BC9E},
- {0x1CF00, 0x1CF46},
+ {0x1BCA0, 0x1CF46},
{0x1D167, 0x1D169},
- {0x1D17B, 0x1D182},
+ {0x1D173, 0x1D182},
{0x1D185, 0x1D18B},
{0x1D1AA, 0x1D1AD},
{0x1D242, 0x1D244},
@@ -304,5 +313,5 @@ static const struct mbinterval combining[] = {
{0x1E2EC, 0x1E2EF},
{0x1E8D0, 0x1E8D6},
{0x1E944, 0x1E94A},
- {0xE0100, 0xE01EF},
+ {0xE0001, 0xE01EF},
};