aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/mb/Unicode/Makefile2
-rwxr-xr-xsrc/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl7
-rwxr-xr-xsrc/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl3
-rwxr-xr-xsrc/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl7
-rw-r--r--src/backend/utils/mb/Unicode/convutils.pm62
5 files changed, 44 insertions, 37 deletions
diff --git a/src/backend/utils/mb/Unicode/Makefile b/src/backend/utils/mb/Unicode/Makefile
index 9084f030091..da307d8eb95 100644
--- a/src/backend/utils/mb/Unicode/Makefile
+++ b/src/backend/utils/mb/Unicode/Makefile
@@ -122,7 +122,7 @@ euc-jis-2004-std.txt sjis-0213-2004-std.txt:
$(DOWNLOAD) http://x0213.org/codetable/$(@F)
gb-18030-2000.xml windows-949-2000.xml:
- $(DOWNLOAD) https://ssl.icu-project.org/repos/icu/data/trunk/charset/data/xml/$(@F)
+ $(DOWNLOAD) https://raw.githubusercontent.com/unicode-org/icu-data/master/charset/data/xml/$(@F)
GB2312.TXT:
$(DOWNLOAD) 'http://trac.greenstone.org/browser/trunk/gsdl/unicode/MAPPINGS/EASTASIA/GB/GB2312.TXT?rev=1842&format=txt'
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
index 092a5b44f55..6d1681a18a3 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
@@ -24,12 +24,13 @@ my @all;
while (my $line = <$in>)
{
- if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
+ if ($line =~ /^0x(\w+)\s*U\+(\w+)\+(\w+)\s*#\s*(\S.*)?\s*$/)
{
# combined characters
my ($c, $u1, $u2) = ($1, $2, $3);
- my $rest = "U+" . $u1 . "+" . $u2 . $4;
+ # The "\t \t" below is just to avoid insubstantial diffs.
+ my $rest = "U+" . $u1 . "+" . $u2 . "\t \t" . $4;
my $code = hex($c);
my $ucs1 = hex($u1);
my $ucs2 = hex($u2);
@@ -45,7 +46,7 @@ while (my $line = <$in>)
l => $.
};
}
- elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
+ elsif ($line =~ /^0x(\w+)\s*U\+(\w+)\s*#\s*(\S.*)?\s*$/)
{
# non-combined characters
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
index 1d88c0296ee..d8bed27e1b1 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
@@ -80,7 +80,8 @@ foreach my $i (@$ct932)
}
}
-foreach my $i (@mapping)
+# extract only SJIS characers
+foreach my $i (grep defined $_->{sjis}, @mapping)
{
my $sjis = $i->{sjis};
diff --git a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
index b516e91306f..b86714dd46d 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
@@ -24,12 +24,13 @@ my @mapping;
while (my $line = <$in>)
{
- if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
+ if ($line =~ /^0x(\w+)\s*U\+(\w+)\+(\w+)\s*#\s*(\S.*)?\s*$/)
{
# combined characters
my ($c, $u1, $u2) = ($1, $2, $3);
- my $rest = "U+" . $u1 . "+" . $u2 . $4;
+ # The "\t \t" below is just to avoid insubstantial diffs.
+ my $rest = "U+" . $u1 . "+" . $u2 . "\t \t" . $4;
my $code = hex($c);
my $ucs1 = hex($u1);
my $ucs2 = hex($u2);
@@ -45,7 +46,7 @@ while (my $line = <$in>)
l => $.
};
}
- elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
+ elsif ($line =~ /^0x(\w+)\s*U\+(\w+)\s*#\s*(\S.*)?\s*$/)
{
# non-combined characters
diff --git a/src/backend/utils/mb/Unicode/convutils.pm b/src/backend/utils/mb/Unicode/convutils.pm
index 2f64a12ea14..9d97061c6fe 100644
--- a/src/backend/utils/mb/Unicode/convutils.pm
+++ b/src/backend/utils/mb/Unicode/convutils.pm
@@ -380,7 +380,8 @@ sub print_radix_table
{
header => "Dummy map, for invalid values",
min_idx => 0,
- max_idx => $widest_range
+ max_idx => $widest_range,
+ label => "dummy map"
};
###
@@ -471,35 +472,37 @@ sub print_radix_table
}
# Also look up the positions of the roots in the table.
- my $b1root = $segmap{"1-byte"};
- my $b2root = $segmap{"2-byte"};
- my $b3root = $segmap{"3-byte"};
- my $b4root = $segmap{"4-byte"};
+ # Missing map represents dummy mapping.
+ my $b1root = $segmap{"1-byte"} || 0;
+ my $b2root = $segmap{"2-byte"} || 0;
+ my $b3root = $segmap{"3-byte"} || 0;
+ my $b4root = $segmap{"4-byte"} || 0;
# And the lower-upper values of each level in each radix tree.
- my $b1_lower = $min_idx{1}{1};
- my $b1_upper = $max_idx{1}{1};
-
- my $b2_1_lower = $min_idx{2}{1};
- my $b2_1_upper = $max_idx{2}{1};
- my $b2_2_lower = $min_idx{2}{2};
- my $b2_2_upper = $max_idx{2}{2};
-
- my $b3_1_lower = $min_idx{3}{1};
- my $b3_1_upper = $max_idx{3}{1};
- my $b3_2_lower = $min_idx{3}{2};
- my $b3_2_upper = $max_idx{3}{2};
- my $b3_3_lower = $min_idx{3}{3};
- my $b3_3_upper = $max_idx{3}{3};
-
- my $b4_1_lower = $min_idx{4}{1};
- my $b4_1_upper = $max_idx{4}{1};
- my $b4_2_lower = $min_idx{4}{2};
- my $b4_2_upper = $max_idx{4}{2};
- my $b4_3_lower = $min_idx{4}{3};
- my $b4_3_upper = $max_idx{4}{3};
- my $b4_4_lower = $min_idx{4}{4};
- my $b4_4_upper = $max_idx{4}{4};
+ # Missing values represent zero.
+ my $b1_lower = $min_idx{1}{1} || 0;
+ my $b1_upper = $max_idx{1}{1} || 0;
+
+ my $b2_1_lower = $min_idx{2}{1} || 0;
+ my $b2_1_upper = $max_idx{2}{1} || 0;
+ my $b2_2_lower = $min_idx{2}{2} || 0;
+ my $b2_2_upper = $max_idx{2}{2} || 0;
+
+ my $b3_1_lower = $min_idx{3}{1} || 0;
+ my $b3_1_upper = $max_idx{3}{1} || 0;
+ my $b3_2_lower = $min_idx{3}{2} || 0;
+ my $b3_2_upper = $max_idx{3}{2} || 0;
+ my $b3_3_lower = $min_idx{3}{3} || 0;
+ my $b3_3_upper = $max_idx{3}{3} || 0;
+
+ my $b4_1_lower = $min_idx{4}{1} || 0;
+ my $b4_1_upper = $max_idx{4}{1} || 0;
+ my $b4_2_lower = $min_idx{4}{2} || 0;
+ my $b4_2_upper = $max_idx{4}{2} || 0;
+ my $b4_3_lower = $min_idx{4}{3} || 0;
+ my $b4_3_upper = $max_idx{4}{3} || 0;
+ my $b4_4_lower = $min_idx{4}{4} || 0;
+ my $b4_4_upper = $max_idx{4}{4} || 0;
###
### Find the maximum value in the whole table, to determine if we can
@@ -607,7 +610,8 @@ sub print_radix_table
for (my $j = 0;
$j < $vals_per_line && $i <= $seg->{max_idx}; $j++)
{
- my $val = $seg->{values}->{$i};
+ # missing values represent zero.
+ my $val = $seg->{values}->{$i} || 0;
printf $out " 0x%0*x", $colwidth, $val;
$off++;