33 files changed, 791 insertions, 1541 deletions
diff --git a/src/backend/utils/mb/Unicode/Makefile b/src/backend/utils/mb/Unicode/Makefile
index 9d2ef5e3d22..ea21f4a8527 100644
--- a/src/backend/utils/mb/Unicode/Makefile
+++ b/src/backend/utils/mb/Unicode/Makefile
@@ -39,8 +39,6 @@ WINMAPS = win866_to_utf8.map utf8_to_win866.map \
 	win1258_to_utf8.map utf8_to_win1258.map
 
 GENERICMAPS = $(ISO8859MAPS) $(WINMAPS) \
-	johab_to_utf8.map utf8_to_johab.map \
-	uhc_to_utf8.map utf8_to_uhc.map \
 	gbk_to_utf8.map utf8_to_gbk.map \
 	koi8r_to_utf8.map utf8_to_koi8r.map
 
@@ -51,6 +49,8 @@ SPECIALMAPS = euc_cn_to_utf8.map utf8_to_euc_cn.map \
 	sjis_to_utf8.map utf8_to_sjis.map \
 	gb18030_to_utf8.map utf8_to_gb18030.map \
 	big5_to_utf8.map utf8_to_big5.map \
+	johab_to_utf8.map utf8_to_johab.map \
+	uhc_to_utf8.map utf8_to_uhc.map \
 	euc_jis_2004_to_utf8.map euc_jis_2004_to_utf8_combined.map \
 	utf8_to_euc_jis_2004.map utf8_to_euc_jis_2004_combined.map \
 	shift_jis_2004_to_utf8.map shift_jis_2004_to_utf8_combined.map \
@@ -63,23 +63,29 @@ ISO8859TEXTS = 8859-2.TXT 8859-3.TXT 8859-4.TXT 8859-5.TXT \
 	8859-10.TXT 8859-13.TXT 8859-14.TXT 8859-15.TXT \
 	8859-16.TXT
 
-WINTEXTS = CP866.TXT CP874.TXT CP936.TXT CP949.TXT \
+WINTEXTS = CP866.TXT CP874.TXT CP936.TXT \
 	CP1250.TXT CP1251.TXT \
 	CP1252.TXT CP1253.TXT CP1254.TXT CP1255.TXT \
 	CP1256.TXT CP1257.TXT CP1258.TXT
 
 GENERICTEXTS = $(ISO8859TEXTS) $(WINTEXTS) \
-	KOI8-R.TXT KOI8-U.TXT JOHAB.TXT
+	KOI8-R.TXT KOI8-U.TXT
 
 all: $(MAPS)
 
 $(GENERICMAPS): UCS_to_most.pl $(GENERICTEXTS)
 	$(PERL) $<
 
-euc_jp_to_utf8.map utf8_to_euc_jp.map: UCS_to_EUC_JP.pl JIS0201.TXT JIS0208.TXT JIS0212.TXT
+johab_to_utf8.map utf8_to_johab.map: UCS_to_JOHAB.pl JOHAB.TXT
+	$(PERL) $<
+
+uhc_to_utf8.map utf8_to_uhc.map: UCS_to_UHC.pl windows-949-2000.xml
+	$(PERL) $<
+
+euc_jp_to_utf8.map utf8_to_euc_jp.map: UCS_to_EUC_JP.pl CP932.TXT JIS0212.TXT
 	$(PERL) $<
 
-euc_cn_to_utf8.map utf8_to_euc_cn.map: UCS_to_EUC_CN.pl GB2312.TXT
+euc_cn_to_utf8.map utf8_to_euc_cn.map: UCS_to_EUC_CN.pl gb-18030-2000.xml
 	$(PERL) $<
 
 euc_kr_to_utf8.map utf8_to_euc_kr.map: UCS_to_EUC_KR.pl KSX1001.TXT
@@ -119,7 +125,7 @@ BIG5.TXT CNS11643.TXT:
 euc-jis-2004-std.txt sjis-0213-2004-std.txt:
 	$(DOWNLOAD) http://x0213.org/codetable/$(@F)
 
-gb-18030-2000.xml:
+gb-18030-2000.xml windows-949-2000.xml:
 	$(DOWNLOAD) https://ssl.icu-project.org/repos/icu/data/trunk/charset/data/xml/$(@F)
 
 GB2312.TXT:
@@ -137,7 +143,7 @@ KOI8-R.TXT KOI8-U.TXT:
 $(ISO8859TEXTS):
 	$(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/ISO8859/$(@F)
 
-$(filter-out CP8%,$(WINTEXTS)):
+$(filter-out CP8%,$(WINTEXTS)) CP932.TXT CP950.TXT:
 	$(DOWNLOAD) http://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/$(@F)
 
 $(filter CP8%,$(WINTEXTS)):
diff --git a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
index 127fd157b07..6a1321bab84 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_BIG5.pl
@@ -25,56 +25,17 @@
 #		 # and Unicode name (not used in this script)
 
 
-require "ucs2utf.pl";
+require "convutils.pm";
 
+# Load BIG5.TXT
+my $all = &read_source("BIG5.TXT");
 
-#
-# first, generate UTF8 --> BIG5 table
-#
-$in_file = "BIG5.TXT";
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-reset 'array';
+# Load CP950.TXT
+my $cp950txt = &read_source("CP950.TXT");
 
-while (<FILE>)
-{
-	chop;
-	if (/^#/)
-	{
-		next;
-	}
-	($c, $u, $rest) = split;
-	$ucs  = hex($u);
-	$code = hex($c);
-	if ($code >= 0x80 && $ucs >= 0x0080)
-	{
-		$utf = &ucs2utf($ucs);
-		if ($array{$utf} ne "")
-		{
-			printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-			next;
-		}
-		$count++;
-		$array{$utf} = $code;
-	}
-}
-close(FILE);
-
-$in_file = "CP950.TXT";
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-while (<FILE>)
-{
-	chop;
-	if (/^#/)
-	{
-		next;
-	}
-	($c, $u, $rest) = split;
-	$ucs  = hex($u);
-	$code = hex($c);
+foreach my $i (@$cp950txt) {
+	my $code = $i->{code};
+	my $ucs = $i->{ucs};
 
 	# Pick only the ETEN extended characters in the range 0xf9d6 - 0xf9dc
 	# from CP950.TXT
@@ -83,126 +44,25 @@ while (<FILE>)
 		&& $code >= 0xf9d6
 		&& $code <= 0xf9dc)
 	{
-		$utf = &ucs2utf($ucs);
-		if ($array{$utf} ne "")
-		{
-			printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-			next;
-		}
-		$count++;
-		$array{$utf} = $code;
+		push @$all, {code => $code,
+					 ucs => $ucs,
+					 comment => $i->{comment},
+					 direction => "both"};
 	}
 }
-close(FILE);
-
-$file = lc("utf8_to_big5.map");
-open(FILE, "> $file") || die("cannot open $file");
-
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_utf_to_local ULmapBIG5[ $count ] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array))
-{
-	$code = $array{$index};
-	$count--;
-	if ($count == 0)
-	{
-		printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
-	}
-	else
-	{
-		printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
-	}
-}
-
-print FILE "};\n";
-close(FILE);
-
-#
-# then generate BIG5 --> UTF8 table
-#
-$in_file = "BIG5.TXT";
 
-open(FILE, $in_file) || die("cannot open $in_file");
+foreach my $i (@$all) {
+	my $code = $i->{code};
+	my $ucs = $i->{ucs};
 
-reset 'array';
-
-while (<FILE>)
-{
-	chop;
-	if (/^#/)
-	{
-		next;
-	}
-	($c, $u, $rest) = split;
-	$ucs  = hex($u);
-	$code = hex($c);
-	if ($code >= 0x80 && $ucs >= 0x0080)
-	{
-		$utf = &ucs2utf($ucs);
-		if ($array{$utf} ne "")
-		{
-			printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-			next;
-		}
-		$count++;
-		$array{$code} = $utf;
-	}
-}
-close(FILE);
-
-$in_file = "CP950.TXT";
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-while (<FILE>)
-{
-	chop;
-	if (/^#/)
-	{
-		next;
-	}
-	($c, $u, $rest) = split;
-	$ucs  = hex($u);
-	$code = hex($c);
-
-	# Pick only the ETEN extended characters in the range 0xf9d6 - 0xf9dc
-	# from CP950.TXT
-	if (   $code >= 0x80
-		&& $ucs >= 0x0080
-		&& $code >= 0xf9d6
-		&& $code <= 0xf9dc)
-	{
-		$utf = &ucs2utf($ucs);
-		if ($array{$utf} ne "")
-		{
-			printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-			next;
-		}
-		$count++;
-		$array{$code} = $utf;
-	}
-}
-close(FILE);
-
-$file = lc("big5_to_utf8.map");
-open(FILE, "> $file") || die("cannot open $file");
-
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_local_to_utf LUmapBIG5[ $count ] = {\n";
-for $index (sort { $a <=> $b } keys(%array))
-{
-	$utf = $array{$index};
-	$count--;
-	if ($count == 0)
-	{
-		printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
-	}
-	else
+	# BIG5.TXT maps several BIG5 characters to U+FFFD. The UTF-8 to BIG5 mapping can
+	# contain only one of them. XXX: Doesn't really make sense to include any of them,
+	# but for historical reasons, we map the first one of them.
+	if ($i->{ucs} == 0xFFFD && $i->{code} != 0xA15A)
 	{
-		printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
+		$i->{direction} = "to_unicode";
 	}
 }
 
-print FILE "};\n";
-close(FILE);
+# Output
+print_tables("BIG5", $all);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
index 53f44773c93..8df23f8be65 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
@@ -1,128 +1,76 @@
 #! /usr/bin/perl
 #
-# Copyright (c) 2001-2016, PostgreSQL Global Development Group
+# Copyright (c) 2007-2016, PostgreSQL Global Development Group
 #
-# src/backend/utils/mb/Unicode/UCS_to_EUC_CN.pl
+# src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
 #
-# Generate UTF-8 <--> EUC_CN code conversion tables from
-# map files provided by Unicode organization.
-# Unfortunately it is prohibited by the organization
-# to distribute the map files. So if you try to use this script,
-# you have to obtain GB2312.TXT from
-# the organization's ftp site.
+# Generate UTF-8 <--> GB18030 code conversion tables from
+# "gb-18030-2000.xml", obtained from
+# http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/
 #
-# GB2312.TXT format:
-#		 GB2312 code in hex
-#		 UCS-2 code in hex
-#		 # and Unicode name (not used in this script)
+# The lines we care about in the source file look like
+#    <a u="009A" b="81 30 83 36"/>
+# where the "u" field is the Unicode code point in hex,
+# and the "b" field is the hex byte sequence for GB18030
 
-require "ucs2utf.pl";
+require "convutils.pm";
 
-# first generate UTF-8 --> EUC_CN table
+# Read the input
 
-$in_file = "GB2312.TXT";
+$in_file = "gb-18030-2000.xml";
 
 open(FILE, $in_file) || die("cannot open $in_file");
 
+my @mapping;
+
 while (<FILE>)
 {
-	chop;
-	if (/^#/)
-	{
-		next;
-	}
-	($c, $u, $rest) = split;
+	next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
+	$u = $1;
+	$c = $2;
+	$c =~ s/ //g;
 	$ucs  = hex($u);
 	$code = hex($c);
-	if ($code >= 0x80 && $ucs >= 0x0080)
-	{
-		$utf = &ucs2utf($ucs);
-		if ($array{$utf} ne "")
-		{
-			printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-			next;
-		}
-		$count++;
-
-		$array{$utf} = ($code | 0x8080);
-	}
-}
-close(FILE);
-
-$file = "utf8_to_euc_cn.map";
-open(FILE, "> $file") || die("cannot open $file");
 
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_utf_to_local ULmapEUC_CN[ $count ] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array))
-{
-	$code = $array{$index};
-	$count--;
-	if ($count == 0)
+	# The GB-18030 character set, which we use as the source, contains
+	# a lot of extra characters on top of the GB2312 character set that
+	# EUC_CN encodes. Filter out those extra characters.
+	next if (($code & 0xFF) < 0xA1);
+	next if (!($code >= 0xA100 && $code <= 0xA9FF ||
+			   $code >= 0xB000 && $code <= 0xF7FF));
+
+	next if ($code >= 0xA2A1 && $code <= 0xA2B0);
+	next if ($code >= 0xA2E3 && $code <= 0xA2E4);
+	next if ($code >= 0xA2EF && $code <= 0xA2F0);
+	next if ($code >= 0xA2FD && $code <= 0xA2FE);
+	next if ($code >= 0xA4F4 && $code <= 0xA4FE);
+	next if ($code >= 0xA5F7 && $code <= 0xA5FE);
+	next if ($code >= 0xA6B9 && $code <= 0xA6C0);
+	next if ($code >= 0xA6D9 && $code <= 0xA6FE);
+	next if ($code >= 0xA7C2 && $code <= 0xA7D0);
+	next if ($code >= 0xA7F2 && $code <= 0xA7FE);
+	next if ($code >= 0xA8BB && $code <= 0xA8C4);
+	next if ($code >= 0xA8EA && $code <= 0xA8FE);
+	next if ($code >= 0xA9A1 && $code <= 0xA9A3);
+	next if ($code >= 0xA9F0 && $code <= 0xA9FE);
+	next if ($code >= 0xD7FA && $code <= 0xD7FE);
+
+	# A couple of characters are mapped differently from GB-2312 or GB-18030
+	if ($code == 0xA1A4)
 	{
-		printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
+		$ucs = 0x30FB;
 	}
-	else
-	{
-		printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
-	}
-}
-
-print FILE "};\n";
-close(FILE);
-
-#
-# then generate EUC_CN --> UTF8 table
-#
-reset 'array';
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-while (<FILE>)
-{
-	chop;
-	if (/^#/)
+	if ($code == 0xA1AA)
 	{
-		next;
+		$ucs = 0x2015;
 	}
-	($c, $u, $rest) = split;
-	$ucs  = hex($u);
-	$code = hex($c);
-	if ($code >= 0x80 && $ucs >= 0x0080)
-	{
-		$utf = &ucs2utf($ucs);
-		if ($array{$code} ne "")
-		{
-			printf STDERR "Warning: duplicate code: %04x\n", $ucs;
-			next;
-		}
-		$count++;
 
-		$code |= 0x8080;
-		$array{$code} = $utf;
+	push @mapping, {
+		ucs => $ucs,
+		code => $code,
+		direction => 'both'
 	}
 }
 close(FILE);
 
-$file = "euc_cn_to_utf8.map";
-open(FILE, "> $file") || die("cannot open $file");
-
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_local_to_utf LUmapEUC_CN[ $count ] = {\n";
-for $index (sort { $a <=> $b } keys(%array))
-{
-	$utf = $array{$index};
-	$count--;
-	if ($count == 0)
-	{
-		printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
-	}
-	else
-	{
-		printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
-	}
-}
-
-print FILE "};\n";
-close(FILE);
+print_tables("EUC_CN", \@mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
index d2f1b757cb3..b4e140b657c 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JIS_2004.pl
@@ -7,9 +7,7 @@
 # Generate UTF-8 <--> EUC_JIS_2004 code conversion tables from
 # "euc-jis-2004-std.txt" (http://x0213.org)
 
-require "ucs2utf.pl";
-
-$TEST = 0;
+require "convutils.pm";
 
 # first generate UTF-8 --> EUC_JIS_2004 table
 
@@ -17,10 +15,7 @@ $in_file = "euc-jis-2004-std.txt";
 
 open(FILE, $in_file) || die("cannot open $in_file");
 
-reset 'array';
-reset 'array1';
-reset 'comment';
-reset 'comment1';
+my @all;
 
 while ($line = <FILE>)
 {
@@ -31,14 +26,14 @@ while ($line = <FILE>)
 		$u2             = $3;
 		$rest           = "U+" . $u1 . "+" . $u2 . $4;
 		$code           = hex($c);
-		$ucs            = hex($u1);
-		$utf1           = &ucs2utf($ucs);
-		$ucs            = hex($u2);
-		$utf2           = &ucs2utf($ucs);
-		$str            = sprintf "%08x%08x", $utf1, $utf2;
-		$array1{$str}   = $code;
-		$comment1{$str} = $rest;
-		$count1++;
+		$ucs1           = hex($u1);
+		$ucs2           = hex($u2);
+
+		push @all, { direction => 'both',
+					 ucs => $ucs1,
+					 ucs_second => $ucs2,
+					 code => $code,
+					 comment => $rest };
 		next;
 	}
 	elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
@@ -54,252 +49,11 @@ while ($line = <FILE>)
 
 	$ucs  = hex($u);
 	$code = hex($c);
-	$utf  = &ucs2utf($ucs);
-	if ($array{$utf} ne "")
-	{
-		printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-		next;
-	}
-	$count++;
-
-	$array{$utf}    = $code;
-	$comment{$code} = $rest;
-}
-close(FILE);
-
-$file = "utf8_to_euc_jis_2004.map";
-open(FILE, "> $file") || die("cannot open $file");
-print FILE "/*\n";
-print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
-print FILE " */\n";
-print FILE "static const pg_utf_to_local ULmapEUC_JIS_2004[] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array))
-{
-	$code = $array{$index};
-	$count--;
-	if ($count == 0)
-	{
-		printf FILE "  {0x%08x, 0x%06x}	/* %s */\n", $index, $code,
-		  $comment{$code};
-	}
-	else
-	{
-		printf FILE "  {0x%08x, 0x%06x},	/* %s */\n", $index, $code,
-		  $comment{$code};
-	}
-}
-
-print FILE "};\n";
-close(FILE);
-
-if ($TEST == 1)
-{
-	$file1 = "utf8.data";
-	$file2 = "euc_jis_2004.data";
-	open(FILE1, "> $file1") || die("cannot open $file1");
-	open(FILE2, "> $file2") || die("cannot open $file2");
-
-	for $index (sort { $a <=> $b } keys(%array))
-	{
-		$code = $array{$index};
-		if (   $code > 0x00
-			&& $code != 0x09
-			&& $code != 0x0a
-			&& $code != 0x0d
-			&& $code != 0x5c
-			&& (   $code < 0x80
-				|| ($code >= 0x8ea1   && $code <= 0x8efe)
-				|| ($code >= 0x8fa1a1 && $code <= 0x8ffefe)
-				|| ($code >= 0xa1a1   && $code <= 0x8fefe)))
-		{
-			for ($i = 3; $i >= 0; $i--)
-			{
-				$s    = $i * 8;
-				$mask = 0xff << $s;
-				print FILE1 pack("C", ($index & $mask) >> $s)
-				  if $index & $mask;
-				print FILE2 pack("C", ($code & $mask) >> $s) if $code & $mask;
-			}
-			print FILE1 "\n";
-			print FILE2 "\n";
-		}
-	}
-}
 
-$file = "utf8_to_euc_jis_2004_combined.map";
-open(FILE, "> $file") || die("cannot open $file");
-print FILE "/*\n";
-print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
-print FILE " */\n";
-print FILE
-  "static const pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {\n";
+	next if ($code < 0x80 && $ucs < 0x80);
 
-for $index (sort { $a cmp $b } keys(%array1))
-{
-	$code = $array1{$index};
-	$count1--;
-	if ($count1 == 0)
-	{
-		printf FILE "  {0x%s, 0x%s, 0x%06x}	/* %s */\n", substr($index, 0, 8),
-		  substr($index, 8, 8), $code, $comment1{$index};
-	}
-	else
-	{
-		printf FILE "  {0x%s, 0x%s, 0x%06x},	/* %s */\n",
-		  substr($index, 0, 8), substr($index, 8, 8), $code,
-		  $comment1{$index};
-	}
+	push @all, { direction => 'both', ucs => $ucs, code => $code, comment => $rest };
 }
-
-print FILE "};\n";
 close(FILE);
 
-if ($TEST == 1)
-{
-	for $index (sort { $a cmp $b } keys(%array1))
-	{
-		$code = $array1{$index};
-		if (   $code > 0x00
-			&& $code != 0x09
-			&& $code != 0x0a
-			&& $code != 0x0d
-			&& $code != 0x5c
-			&& (   $code < 0x80
-				|| ($code >= 0x8ea1   && $code <= 0x8efe)
-				|| ($code >= 0x8fa1a1 && $code <= 0x8ffefe)
-				|| ($code >= 0xa1a1   && $code <= 0x8fefe)))
-		{
-
-			$v1 = hex(substr($index, 0, 8));
-			$v2 = hex(substr($index, 8, 8));
-
-			for ($i = 3; $i >= 0; $i--)
-			{
-				$s    = $i * 8;
-				$mask = 0xff << $s;
-				print FILE1 pack("C", ($v1 & $mask) >> $s)   if $v1 & $mask;
-				print FILE2 pack("C", ($code & $mask) >> $s) if $code & $mask;
-			}
-			for ($i = 3; $i >= 0; $i--)
-			{
-				$s    = $i * 8;
-				$mask = 0xff << $s;
-				print FILE1 pack("C", ($v2 & $mask) >> $s) if $v2 & $mask;
-			}
-			print FILE1 "\n";
-			print FILE2 "\n";
-		}
-	}
-	close(FILE1);
-	close(FILE2);
-}
-
-# then generate EUC_JIS_2004 --> UTF-8 table
-
-$in_file = "euc-jis-2004-std.txt";
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-reset 'array';
-reset 'array1';
-reset 'comment';
-reset 'comment1';
-
-while ($line = <FILE>)
-{
-	if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
-	{
-		$c               = $1;
-		$u1              = $2;
-		$u2              = $3;
-		$rest            = "U+" . $u1 . "+" . $u2 . $4;
-		$code            = hex($c);
-		$ucs             = hex($u1);
-		$utf1            = &ucs2utf($ucs);
-		$ucs             = hex($u2);
-		$utf2            = &ucs2utf($ucs);
-		$str             = sprintf "%08x%08x", $utf1, $utf2;
-		$array1{$code}   = $str;
-		$comment1{$code} = $rest;
-		$count1++;
-		next;
-	}
-	elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
-	{
-		$c    = $1;
-		$u    = $2;
-		$rest = "U+" . $u . $3;
-	}
-	else
-	{
-		next;
-	}
-
-	$ucs  = hex($u);
-	$code = hex($c);
-	$utf  = &ucs2utf($ucs);
-	if ($array{$code} ne "")
-	{
-		printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-		next;
-	}
-	$count++;
-
-	$array{$code}  = $utf;
-	$comment{$utf} = $rest;
-}
-close(FILE);
-
-$file = "euc_jis_2004_to_utf8.map";
-open(FILE, "> $file") || die("cannot open $file");
-print FILE "/*\n";
-print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
-print FILE " */\n";
-print FILE "static const pg_local_to_utf LUmapEUC_JIS_2004[] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array))
-{
-	$code = $array{$index};
-	$count--;
-	if ($count == 0)
-	{
-		printf FILE "  {0x%06x, 0x%08x}	/* %s */\n", $index, $code,
-		  $comment{$code};
-	}
-	else
-	{
-		printf FILE "  {0x%06x, 0x%08x},	/* %s */\n", $index, $code,
-		  $comment{$code};
-	}
-}
-
-print FILE "};\n";
-close(FILE);
-
-$file = "euc_jis_2004_to_utf8_combined.map";
-open(FILE, "> $file") || die("cannot open $file");
-print FILE "/*\n";
-print FILE " * This file was generated by UCS_to_EUC_JIS_2004.pl\n";
-print FILE " */\n";
-print FILE
-  "static const pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array1))
-{
-	$code = $array1{$index};
-	$count1--;
-	if ($count1 == 0)
-	{
-		printf FILE "  {0x%06x, 0x%s, 0x%s}	/* %s */\n", $index,
-		  substr($code, 0, 8), substr($code, 8, 8), $comment1{$index};
-	}
-	else
-	{
-		printf FILE "  {0x%06x, 0x%s, 0x%s},	/* %s */\n", $index,
-		  substr($code, 0, 8), substr($code, 8, 8), $comment1{$index};
-	}
-}
-
-print FILE "};\n";
-close(FILE);
+print_tables("EUC_JIS_2004", \@all, 1);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
index 055fc849bae..0e9dd292bff 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_JP.pl
@@ -8,275 +8,223 @@
 # map files provided by Unicode organization.
 # Unfortunately it is prohibited by the organization
 # to distribute the map files. So if you try to use this script,
-# you have to obtain JIS0201.TXT, JIS0208.TXT, JIS0212.TXT from
-# the organization's ftp site.
-#
-# JIS0201.TXT format:
-#		 JIS0201 code in hex
-#		 UCS-2 code in hex
-#		 # and Unicode name (not used in this script)
-#
-# JIS0208.TXT format:
-#		 JIS0208 shift-JIS code in hex
-#		 JIS0208 code in hex
-#		 UCS-2 code in hex
-#		 # and Unicode name (not used in this script)
-#
-# JIS0212.TXT format:
-#		 JIS0212 code in hex
-#		 UCS-2 code in hex
-#		 # and Unicode name (not used in this script)
-
-require "ucs2utf.pl";
-
-# first generate UTF-8 --> EUC_JP table
+# you have to obtain CP932.TXT and JIS0212.TXT from the
+# organization's ftp site.
 
-#
-# JIS0201
-#
-$in_file = "JIS0201.TXT";
+use strict;
+require "convutils.pm";
 
-open(FILE, $in_file) || die("cannot open $in_file");
+# Load JIS0212.TXT
+my $jis0212 = &read_source("JIS0212.TXT");
 
-reset 'array';
+my @mapping;
 
-while (<FILE>)
-{
-	chop;
-	if (/^#/)
+foreach my $i (@$jis0212) {
+	# We have a different mapping for this in the EUC_JP to UTF-8 direction.
+	if ($i->{code} == 0x2243)
 	{
-		next;
+		$i->{direction} = "from_unicode";
 	}
-	($c, $u, $rest) = split;
-	$ucs  = hex($u);
-	$code = hex($c);
-	if ($code >= 0x80 && $ucs >= 0x0080)
-	{
-		$utf = &ucs2utf($ucs);
-		if ($array{$utf} ne "")
-		{
-			printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-			next;
-		}
-		$count++;
 
-		# add single shift 2
-		$array{$utf} = ($code | 0x8e00);
+	if ($i->{code} == 0x2271)
+	{
+		$i->{direction} = "to_unicode";
 	}
-}
-close(FILE);
-
-#
-# JIS0208
-#
-$in_file = "JIS0208.TXT";
 
-open(FILE, $in_file) || die("cannot open $in_file");
-
-while (<FILE>)
-{
-	chop;
-	if (/^#/)
+	if ($i->{ucs} >= 0x080)
 	{
-		next;
+		$i->{code} = $i->{code} | 0x8f8080;
 	}
-	($s, $c, $u, $rest) = split;
-	$ucs  = hex($u);
-	$code = hex($c);
-	if ($code >= 0x80 && $ucs >= 0x0080)
+	else
 	{
-		$utf = &ucs2utf($ucs);
-		if ($array{$utf} ne "")
-		{
-			printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-			next;
-		}
-		$count++;
-
-		$array{$utf} = ($code | 0x8080);
+		next;
 	}
+
+	push @mapping, $i;
 }
-close(FILE);
 
-#
-# JIS0212
-#
-$in_file = "JIS0212.TXT";
+# Load CP932.TXT.
+my $ct932 = &read_source("CP932.TXT");
 
-open(FILE, $in_file) || die("cannot open $in_file");
+foreach my $i (@$ct932) {
+	my $sjis = $i->{code};
 
-while (<FILE>)
-{
-	chop;
-	if (/^#/)
+	# We have a different mapping for this in the EUC_JP to UTF-8 direction.
+	if ($sjis == 0xeefa ||
+		$sjis == 0xeefb ||
+		$sjis == 0xeefc)
 	{
 		next;
 	}
-	($c, $u, $rest) = split;
-	$ucs  = hex($u);
-	$code = hex($c);
-	if ($code >= 0x80 && $ucs >= 0x0080)
-	{
-		$utf = &ucs2utf($ucs);
-		if ($array{$utf} ne "")
-		{
-			printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-			next;
-		}
-		$count++;
 
-		$array{$utf} = ($code | 0x8f8080);
-	}
-}
-close(FILE);
+	if ($sjis >= 0xa1)
+	{
+		my $jis = &sjis2jis($sjis);
 
-$file = "utf8_to_euc_jp.map";
-open(FILE, "> $file") || die("cannot open $file");
+		$i->{code} = $jis | ($jis < 0x100 ? 0x8e00 :
+							 ($sjis >= 0xeffd  ? 0x8f8080 : 0x8080));
 
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_utf_to_local ULmapEUC_JP[ $count ] = {\n";
+		# Remember the SJIS code for later.
+		$i->{sjis} = $sjis;
 
-for $index (sort { $a <=> $b } keys(%array))
-{
-	$code = $array{$index};
-	$count--;
-	if ($count == 0)
-	{
-		printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
-	}
-	else
-	{
-		printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
+		push @mapping, $i;
 	}
 }
 
-print FILE "};\n";
-close(FILE);
-
-#
-# then generate EUC_JP --> UTF8 table
-#
+foreach my $i (@mapping) {
+	my $sjis = $i->{sjis};
 
-#
-# JIS0201
-#
-$in_file = "JIS0201.TXT";
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-reset 'array';
-
-while (<FILE>)
-{
-	chop;
-	if (/^#/)
+	# These SJIS characters are excluded completely.
+	if ($sjis >= 0xed00 && $sjis <= 0xeef9 ||
+		$sjis >= 0xfa54 && $sjis <= 0xfa56 ||
+		$sjis >= 0xfa58 && $sjis <= 0xfc4b)
 	{
+		$i->{direction} = "none";
 		next;
 	}
-	($c, $u, $rest) = split;
-	$ucs  = hex($u);
-	$code = hex($c);
-	if ($code >= 0x80 && $ucs >= 0x0080)
-	{
-		$utf = &ucs2utf($ucs);
-		if ($array{$code} ne "")
-		{
-			printf STDERR "Warning: duplicate code: %04x\n", $ucs;
-			next;
-		}
-		$count++;
-
-		# add single shift 2
-		$code |= 0x8e00;
-		$array{$code} = $utf;
-	}
-}
-close(FILE);
-
-#
-# JIS0208
-#
-$in_file = "JIS0208.TXT";
-
-open(FILE, $in_file) || die("cannot open $in_file");
 
-while (<FILE>)
-{
-	chop;
-	if (/^#/)
+	# These SJIS characters are only in the UTF-8 to EUC_JP table
+	if ($sjis == 0xeefa || $sjis == 0xeefb || $sjis == 0xeefc)
 	{
+		$i->{direction} = "from_unicode";
 		next;
 	}
-	($s, $c, $u, $rest) = split;
-	$ucs  = hex($u);
-	$code = hex($c);
-	if ($code >= 0x80 && $ucs >= 0x0080)
-	{
-		$utf = &ucs2utf($ucs);
-		if ($array{$code} ne "")
-		{
-			printf STDERR "Warning: duplicate code: %04x\n", $ucs;
-			next;
-		}
-		$count++;
 
-		$code |= 0x8080;
-		$array{$code} = $utf;
+	if ($sjis == 0x8790 || $sjis == 0x8791 || $sjis == 0x8792 ||
+		$sjis == 0x8795 || $sjis == 0x8796 || $sjis == 0x8797 ||
+		$sjis == 0x879a || $sjis == 0x879b || $sjis == 0x879c ||
+		($sjis >= 0xfa4a && $sjis <= 0xfa53))
+	{
+		$i->{direction} = "to_unicode";
+		next;
 	}
 }
-close(FILE);
 
-#
-# JIS0212
-#
-$in_file = "JIS0212.TXT";
+push @mapping, (
+	 {direction => 'both', ucs => 0x4efc, code => 0x8ff4af, comment => '# CJK(4EFC)'},
+	 {direction => 'both', ucs => 0x50f4, code => 0x8ff4b0, comment => '# CJK(50F4)'},
+	 {direction => 'both', ucs => 0x51EC, code => 0x8ff4b1, comment => '# CJK(51EC)'},
+	 {direction => 'both', ucs => 0x5307, code => 0x8ff4b2, comment => '# CJK(5307)'},
+	 {direction => 'both', ucs => 0x5324, code => 0x8ff4b3, comment => '# CJK(5324)'},
+	 {direction => 'both', ucs => 0x548A, code => 0x8ff4b5, comment => '# CJK(548A)'},
+	 {direction => 'both', ucs => 0x5759, code => 0x8ff4b6, comment => '# CJK(5759)'},
+	 {direction => 'both', ucs => 0x589E, code => 0x8ff4b9, comment => '# CJK(589E)'},
+	 {direction => 'both', ucs => 0x5BEC, code => 0x8ff4ba, comment => '# CJK(5BEC)'},
+	 {direction => 'both', ucs => 0x5CF5, code => 0x8ff4bb, comment => '# CJK(5CF5)'},
+	 {direction => 'both', ucs => 0x5D53, code => 0x8ff4bc, comment => '# CJK(5D53)'},
+	 {direction => 'both', ucs => 0x5FB7, code => 0x8ff4be, comment => '# CJK(5FB7)'},
+	 {direction => 'both', ucs => 0x6085, code => 0x8ff4bf, comment => '# CJK(6085)'},
+	 {direction => 'both', ucs => 0x6120, code => 0x8ff4c0, comment => '# CJK(6120)'},
+	 {direction => 'both', ucs => 0x654E, code => 0x8ff4c1, comment => '# CJK(654E)'},
+	 {direction => 'both', ucs => 0x663B, code => 0x8ff4c2, comment => '# CJK(663B)'},
+	 {direction => 'both', ucs => 0x6665, code => 0x8ff4c3, comment => '# CJK(6665)'},
+	 {direction => 'both', ucs => 0x6801, code => 0x8ff4c6, comment => '# CJK(6801)'},
+	 {direction => 'both', ucs => 0x6A6B, code => 0x8ff4c9, comment => '# CJK(6A6B)'},
+	 {direction => 'both', ucs => 0x6AE2, code => 0x8ff4ca, comment => '# CJK(6AE2)'},
+	 {direction => 'both', ucs => 0x6DF2, code => 0x8ff4cc, comment => '# CJK(6DF2)'},
+	 {direction => 'both', ucs => 0x6DF8, code => 0x8ff4cb, comment => '# CJK(6DF8)'},
+	 {direction => 'both', ucs => 0x7028, code => 0x8ff4cd, comment => '# CJK(7028)'},
+	 {direction => 'both', ucs => 0x70BB, code => 0x8ff4ae, comment => '# CJK(70BB)'},
+	 {direction => 'both', ucs => 0x7501, code => 0x8ff4d0, comment => '# CJK(7501)'},
+	 {direction => 'both', ucs => 0x7682, code => 0x8ff4d1, comment => '# CJK(7682)'},
+	 {direction => 'both', ucs => 0x769E, code => 0x8ff4d2, comment => '# CJK(769E)'},
+	 {direction => 'both', ucs => 0x7930, code => 0x8ff4d4, comment => '# CJK(7930)'},
+	 {direction => 'both', ucs => 0x7AE7, code => 0x8ff4d9, comment => '# CJK(7AE7)'},
+	 {direction => 'both', ucs => 0x7DA0, code => 0x8ff4dc, comment => '# CJK(7DA0)'},
+	 {direction => 'both', ucs => 0x7DD6, code => 0x8ff4dd, comment => '# CJK(7DD6)'},
+	 {direction => 'both', ucs => 0x8362, code => 0x8ff4df, comment => '# CJK(8362)'},
+	 {direction => 'both', ucs => 0x85B0, code => 0x8ff4e1, comment => '# CJK(85B0)'},
+	 {direction => 'both', ucs => 0x8807, code => 0x8ff4e4, comment => '# CJK(8807)'},
+	 {direction => 'both', ucs => 0x8B7F, code => 0x8ff4e6, comment => '# CJK(8B7F)'},
+	 {direction => 'both', ucs => 0x8CF4, code => 0x8ff4e7, comment => '# CJK(8CF4)'},
+	 {direction => 'both', ucs => 0x8D76, code => 0x8ff4e8, comment => '# CJK(8D76)'},
+	 {direction => 'both', ucs => 0x90DE, code => 0x8ff4ec, comment => '# CJK(90DE)'},
+	 {direction => 'both', ucs => 0x9115, code => 0x8ff4ee, comment => '# CJK(9115)'},
+	 {direction => 'both', ucs => 0x9592, code => 0x8ff4f1, comment => '# CJK(9592)'},
+	 {direction => 'both', ucs => 0x973B, code => 0x8ff4f4, comment => '# CJK(973B)'},
+	 {direction => 'both', ucs => 0x974D, code => 0x8ff4f5, comment => '# CJK(974D)'},
+	 {direction => 'both', ucs => 0x9751, code => 0x8ff4f6, comment => '# CJK(9751)'},
+	 {direction => 'both', ucs => 0x999E, code => 0x8ff4fa, comment => '# CJK(999E)'},
+	 {direction => 'both', ucs => 0x9AD9, code => 0x8ff4fb, comment => '# CJK(9AD9)'},
+	 {direction => 'both', ucs => 0x9B72, code => 0x8ff4fc, comment => '# CJK(9B72)'},
+	 {direction => 'both', ucs => 0x9ED1, code => 0x8ff4fe, comment => '# CJK(9ED1)'},
+	 {direction => 'both', ucs => 0xF929, code => 0x8ff4c5, comment => '# CJK COMPATIBILITY IDEOGRAPH-F929'},
+	 {direction => 'both', ucs => 0xF9DC, code => 0x8ff4f2, comment => '# CJK COMPATIBILITY IDEOGRAPH-F9DC'},
+	 {direction => 'both', ucs => 0xFA0E, code => 0x8ff4b4, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA0E'},
+	 {direction => 'both', ucs => 0xFA0F, code => 0x8ff4b7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA0F'},
+	 {direction => 'both', ucs => 0xFA10, code => 0x8ff4b8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA10'},
+	 {direction => 'both', ucs => 0xFA11, code => 0x8ff4bd, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA11'},
+	 {direction => 'both', ucs => 0xFA12, code => 0x8ff4c4, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA12'},
+	 {direction => 'both', ucs => 0xFA13, code => 0x8ff4c7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA13'},
+	 {direction => 'both', ucs => 0xFA14, code => 0x8ff4c8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA14'},
+	 {direction => 'both', ucs => 0xFA15, code => 0x8ff4ce, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA15'},
+	 {direction => 'both', ucs => 0xFA16, code => 0x8ff4cf, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA16'},
+	 {direction => 'both', ucs => 0xFA17, code => 0x8ff4d3, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA17'},
+	 {direction => 'both', ucs => 0xFA18, code => 0x8ff4d5, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA18'},
+	 {direction => 'both', ucs => 0xFA19, code => 0x8ff4d6, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA19'},
+	 {direction => 'both', ucs => 0xFA1A, code => 0x8ff4d7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1A'},
+	 {direction => 'both', ucs => 0xFA1B, code => 0x8ff4d8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1B'},
+	 {direction => 'both', ucs => 0xFA1C, code => 0x8ff4da, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1C'},
+	 {direction => 'both', ucs => 0xFA1D, code => 0x8ff4db, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1D'},
+	 {direction => 'both', ucs => 0xFA1E, code => 0x8ff4de, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1E'},
+	 {direction => 'both', ucs => 0xFA1F, code => 0x8ff4e0, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA1F'},
+	 {direction => 'both', ucs => 0xFA20, code => 0x8ff4e2, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA20'},
+	 {direction => 'both', ucs => 0xFA21, code => 0x8ff4e3, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA21'},
+	 {direction => 'both', ucs => 0xFA22, code => 0x8ff4e5, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA22'},
+	 {direction => 'both', ucs => 0xFA23, code => 0x8ff4e9, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA23'},
+	 {direction => 'both', ucs => 0xFA24, code => 0x8ff4ea, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA24'},
+	 {direction => 'both', ucs => 0xFA25, code => 0x8ff4eb, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA25'},
+	 {direction => 'both', ucs => 0xFA26, code => 0x8ff4ed, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA26'},
+	 {direction => 'both', ucs => 0xFA27, code => 0x8ff4ef, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA27'},
+	 {direction => 'both', ucs => 0xFA28, code => 0x8ff4f0, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA28'},
+	 {direction => 'both', ucs => 0xFA29, code => 0x8ff4f3, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA29'},
+	 {direction => 'both', ucs => 0xFA2A, code => 0x8ff4f7, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2A'},
+	 {direction => 'both', ucs => 0xFA2B, code => 0x8ff4f8, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2B'},
+	 {direction => 'both', ucs => 0xFA2C, code => 0x8ff4f9, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2C'},
+	 {direction => 'both', ucs => 0xFA2D, code => 0x8ff4fd, comment => '# CJK COMPATIBILITY IDEOGRAPH-FA2D'},
+	 {direction => 'both', ucs => 0xFF07, code => 0x8ff4a9, comment => '# FULLWIDTH APOSTROPHE'},
+	 {direction => 'both', ucs => 0xFFE4, code => 0x8fa2c3, comment => '# FULLWIDTH BROKEN BAR'},
+
+	 # additional conversions for EUC_JP -> UTF-8 conversion
+	 {direction => 'to_unicode', ucs => 0x2116, code => 0x8ff4ac, comment => '# NUMERO SIGN'},
+	 {direction => 'to_unicode', ucs => 0x2121, code => 0x8ff4ad, comment => '# TELEPHONE SIGN'},
+	 {direction => 'to_unicode', ucs => 0x3231, code => 0x8ff4ab, comment => '# PARENTHESIZED IDEOGRAPH STOCK'}
+	);
+
+print_tables("EUC_JP", \@mapping);
+
+#######################################################################
+# sjis2jis ; SJIS => JIS conversion
+sub sjis2jis
+{
+	my ($sjis) = @_;
 
-open(FILE, $in_file) || die("cannot open $in_file");
+	return $sjis if ($sjis <= 0x100);
 
-while (<FILE>)
-{
-	chop;
-	if (/^#/)
-	{
-		next;
-	}
-	($c, $u, $rest) = split;
-	$ucs  = hex($u);
-	$code = hex($c);
-	if ($code >= 0x80 && $ucs >= 0x0080)
+	my $hi = $sjis >> 8;
+	my $lo = $sjis & 0xff;
+
+	if ($lo >= 0x80) { $lo--; }
+	$lo -= 0x40;
+	if ($hi >= 0xe0) { $hi -= 0x40; }
+	$hi -= 0x81;
+	my $pos = $lo + $hi * 0xbc;
+
+	if ($pos >= 114 * 0x5e && $pos <= 115 * 0x5e + 0x1b)
 	{
-		$utf = &ucs2utf($ucs);
-		if ($array{$code} ne "")
-		{
-			printf STDERR "Warning: duplicate code: %04x\n", $ucs;
-			next;
-		}
-		$count++;
+		# This region (115-ku) is out of range of JIS code but for
+		# convenient to generate code in EUC CODESET 3, move this to
+		# seemingly duplicate region (83-84-ku).
+		$pos = $pos - ((31 * 0x5e) + 12);
 
-		$code |= 0x8f8080;
-		$array{$code} = $utf;
+		# after 85-ku 82-ten needs to be moved 2 codepoints
+		$pos = $pos - 2 if ($pos >= 84 * 0x5c + 82)
 	}
-}
-close(FILE);
 
-$file = "euc_jp_to_utf8.map";
-open(FILE, "> $file") || die("cannot open $file");
+	my $hi2 = $pos / 0x5e;
+	my $lo2 = ($pos % 0x5e);
 
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_local_to_utf LUmapEUC_JP[ $count ] = {\n";
-for $index (sort { $a <=> $b } keys(%array))
-{
-	$utf = $array{$index};
-	$count--;
-	if ($count == 0)
-	{
-		printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
-	}
-	else
-	{
-		printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
-	}
-}
+	my $ret = $lo2 + 0x21 + (($hi2 + 0x21) << 8);
 
-print FILE "};\n";
-close(FILE);
+	return $ret;
+}
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
index a7c94bca915..a917d067172 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_KR.pl
@@ -16,113 +16,22 @@
 #		 UCS-2 code in hex
 #		 # and Unicode name (not used in this script)
 
-require "ucs2utf.pl";
+require "convutils.pm";
 
-# first generate UTF-8 --> EUC_KR table
+# Load the source file.
 
-$in_file = "KSX1001.TXT";
+my $mapping = &read_source("KSX1001.TXT");
 
-open(FILE, $in_file) || die("cannot open $in_file");
-
-while (<FILE>)
-{
-	chop;
-	if (/^#/)
-	{
-		next;
-	}
-	($c, $u, $rest) = split;
-	$ucs  = hex($u);
-	$code = hex($c);
-	if ($code >= 0x80 && $ucs >= 0x0080)
-	{
-		$utf = &ucs2utf($ucs);
-		if ($array{$utf} ne "")
-		{
-			printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-			next;
-		}
-		$count++;
-
-		$array{$utf} = ($code | 0x8080);
-	}
-}
-close(FILE);
-
-$file = "utf8_to_euc_kr.map";
-open(FILE, "> $file") || die("cannot open $file");
-
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_utf_to_local ULmapEUC_KR[ $count ] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array))
+foreach my $i (@$mapping)
 {
-	$code = $array{$index};
-	$count--;
-	if ($count == 0)
-	{
-		printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
-	}
-	else
-	{
-		printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
-	}
+	$i->{code} = $i->{code} | 0x8080;
 }
 
-print FILE "};\n";
-close(FILE);
-
-#
-# then generate EUC_KR --> UTF8 table
-#
-reset 'array';
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-while (<FILE>)
-{
-	chop;
-	if (/^#/)
-	{
-		next;
-	}
-	($c, $u, $rest) = split;
-	$ucs  = hex($u);
-	$code = hex($c);
-	if ($code >= 0x80 && $ucs >= 0x0080)
-	{
-		$utf = &ucs2utf($ucs);
-		if ($array{$code} ne "")
-		{
-			printf STDERR "Warning: duplicate code: %04x\n", $ucs;
-			next;
-		}
-		$count++;
-
-		$code |= 0x8080;
-		$array{$code} = $utf;
-	}
-}
-close(FILE);
-
-$file = "euc_kr_to_utf8.map";
-open(FILE, "> $file") || die("cannot open $file");
-
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_local_to_utf LUmapEUC_KR[ $count ] = {\n";
-for $index (sort { $a <=> $b } keys(%array))
-{
-	$utf = $array{$index};
-	$count--;
-	if ($count == 0)
-	{
-		printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
-	}
-	else
-	{
-		printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
-	}
-}
+# Some extra characters that are not in KSX1001.TXT
+push @$mapping, (
+	{direction => 'both', ucs => 0x20AC, code => 0xa2e6, comment => '# EURO SIGN'},
+	{direction => 'both', ucs => 0x00AE, code => 0xa2e7, comment => '# REGISTERED SIGN'},
+	{direction => 'both', ucs => 0x327E, code => 0xa2e8, comment => '# CIRCLED HANGUL IEUNG U'}
+	);
 
-print FILE "};\n";
-close(FILE);
+print_tables("EUC_KR", $mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
index e4fc535b180..aceef5433c2 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_EUC_TW.pl
@@ -17,141 +17,47 @@
 #		 UCS-2 code in hex
 #		 # and Unicode name (not used in this script)
 
-require "ucs2utf.pl";
+require "convutils.pm";
 
-# first generate UTF-8 --> EUC_TW table
+my $mapping = &read_source("CNS11643.TXT");
 
-$in_file = "CNS11643.TXT";
+my @extras;
 
-open(FILE, $in_file) || die("cannot open $in_file");
-
-while (<FILE>)
+foreach my $i (@$mapping)
 {
-	chop;
-	if (/^#/)
+	my $ucs = $i->{ucs};
+	my $code = $i->{code};
+	my $origcode = $i->{code};
+
+	my $plane = ($code & 0x1f0000) >> 16;
+	if ($plane > 16)
 	{
+		printf STDERR "Warning: invalid plane No.$plane. ignored\n";
 		next;
 	}
-	($c, $u, $rest) = split;
-	$ucs  = hex($u);
-	$code = hex($c);
-	if ($code >= 0x80 && $ucs >= 0x0080)
-	{
-		$utf = &ucs2utf($ucs);
-		if ($array{$utf} ne "")
-		{
-			printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-			next;
-		}
-		$count++;
-
-		$plane = ($code & 0x1f0000) >> 16;
-		if ($plane > 16)
-		{
-			printf STDERR "Warning: invalid plane No.$plane. ignored\n";
-			next;
-		}
-
-		if ($plane == 1)
-		{
-			$array{$utf} = (($code & 0xffff) | 0x8080);
-		}
-		else
-		{
-			$array{$utf} =
-			  (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
-		}
-	}
-}
-close(FILE);
-
-$file = "utf8_to_euc_tw.map";
-open(FILE, "> $file") || die("cannot open $file");
 
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_utf_to_local ULmapEUC_TW[ $count ] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array))
-{
-	$code = $array{$index};
-	$count--;
-	if ($count == 0)
+	if ($plane == 1)
 	{
-		printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
+		$code = ($code & 0xffff) | 0x8080;
 	}
 	else
 	{
-		printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
+		$code = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
 	}
-}
-
-print FILE "};\n";
-close(FILE);
-
-#
-# then generate EUC_TW --> UTF8 table
-#
-reset 'array';
-
-open(FILE, $in_file) || die("cannot open $in_file");
+	$i->{code} = $code;
 
-while (<FILE>)
-{
-	chop;
-	if (/^#/)
+	# Some codes are mapped twice in the EUC_TW to UTF-8 table.
+	if ($origcode >= 0x12121 && $origcode <= 0x20000)
 	{
-		next;
-	}
-	($c, $u, $rest) = split;
-	$ucs  = hex($u);
-	$code = hex($c);
-	if ($code >= 0x80 && $ucs >= 0x0080)
-	{
-		$utf = &ucs2utf($ucs);
-		if ($array{$code} ne "")
-		{
-			printf STDERR "Warning: duplicate code: %04x\n", $ucs;
-			next;
-		}
-		$count++;
-
-		$plane = ($code & 0x1f0000) >> 16;
-		if ($plane > 16)
-		{
-			printf STDERR "Warning: invalid plane No.$plane. ignored\n";
-			next;
-		}
-
-		if ($plane == 1)
-		{
-			$c = (($code & 0xffff) | 0x8080);
-			$array{$c} = $utf;
-			$count++;
+		push @extras, {
+			ucs => $i->{ucs},
+			code => ($i->{code} + 0x8ea10000),
+			rest => $i->{rest},
+			direction => 'to_unicode'
 		}
-		$c = (0x8ea00000 + ($plane << 16)) | (($code & 0xffff) | 0x8080);
-		$array{$c} = $utf;
 	}
 }
-close(FILE);
-
-$file = "euc_tw_to_utf8.map";
-open(FILE, "> $file") || die("cannot open $file");
 
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_local_to_utf LUmapEUC_TW[ $count ] = {\n";
-for $index (sort { $a <=> $b } keys(%array))
-{
-	$utf = $array{$index};
-	$count--;
-	if ($count == 0)
-	{
-		printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
-	}
-	else
-	{
-		printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
-	}
-}
+push @$mapping, @extras;
 
-print FILE "};\n";
-close(FILE);
+print_tables("EUC_TW", $mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
index 043c1c27ec8..f58361024e4 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
@@ -13,8 +13,7 @@
 # where the "u" field is the Unicode code point in hex,
 # and the "b" field is the hex byte sequence for GB18030
 
-require "ucs2utf.pl";
-
+require "convutils.pm";
 
 # Read the input
 
@@ -22,6 +21,8 @@ $in_file = "gb-18030-2000.xml";
 
 open(FILE, $in_file) || die("cannot open $in_file");
 
+my @mapping;
+
 while (<FILE>)
 {
 	next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
@@ -32,78 +33,13 @@ while (<FILE>)
 	$code = hex($c);
 	if ($code >= 0x80 && $ucs >= 0x0080)
 	{
-		$utf = &ucs2utf($ucs);
-		if ($arrayu{$utf} ne "")
-		{
-			printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-			next;
+		push @mapping, {
+			ucs => $ucs,
+			code => $code,
+			direction => 'both'
 		}
-		if ($arrayc{$code} ne "")
-		{
-			printf STDERR "Warning: duplicate GB18030: %08x\n", $code;
-			next;
-		}
-		$arrayu{$utf}  = $code;
-		$arrayc{$code} = $utf;
-		$count++;
-	}
-}
-close(FILE);
-
-
-#
-# first, generate UTF8 --> GB18030 table
-#
-
-$file = "utf8_to_gb18030.map";
-open(FILE, "> $file") || die("cannot open $file");
-
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_utf_to_local ULmapGB18030[ $count ] = {\n";
-
-$cc = $count;
-for $index (sort { $a <=> $b } keys(%arrayu))
-{
-	$code = $arrayu{$index};
-	$cc--;
-	if ($cc == 0)
-	{
-		printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
-	}
-	else
-	{
-		printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
 	}
 }
-
-print FILE "};\n";
 close(FILE);
 
-
-#
-# then generate GB18030 --> UTF8 table
-#
-
-$file = "gb18030_to_utf8.map";
-open(FILE, "> $file") || die("cannot open $file");
-
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_local_to_utf LUmapGB18030[ $count ] = {\n";
-
-$cc = $count;
-for $index (sort { $a <=> $b } keys(%arrayc))
-{
-	$utf = $arrayc{$index};
-	$cc--;
-	if ($cc == 0)
-	{
-		printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
-	}
-	else
-	{
-		printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
-	}
-}
-
-print FILE "};\n";
-close(FILE);
+print_tables("GB18030", \@mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
new file mode 100755
index 00000000000..b98f9a7bf55
--- /dev/null
+++ b/src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
@@ -0,0 +1,31 @@
+#! /usr/bin/perl
+#
+# Copyright (c) 2001-2016, PostgreSQL Global Development Group
+#
+# src/backend/utils/mb/Unicode/UCS_to_JOHAB.pl
+#
+# Generate UTF-8 <--> JOHAB conversion tables from
+# map files provided by Unicode organization.
+# Unfortunately it is prohibited by the organization
+# to distribute the map files. So if you try to use this script,
+# you have to obtain the map files from the organization's ftp site.
+# ftp://www.unicode.org/Public/MAPPINGS/
+# We assume the file include three tab-separated columns:
+#		 JOHAB code in hex
+#		 UCS-2 code in hex
+#		 # and Unicode name (not used in this script)
+
+require "convutils.pm";
+
+# Load the source file.
+
+my $mapping = &read_source("JOHAB.TXT");
+
+# Some extra characters that are not in JOHAB.TXT
+push @$mapping, (
+	{direction => 'both', ucs => 0x20AC, code => 0xd9e6, comment => '# EURO SIGN'},
+	{direction => 'both', ucs => 0x00AE, code => 0xd9e7, comment => '# REGISTERED SIGN'},
+	{direction => 'both', ucs => 0x327E, code => 0xd9e8, comment => '# CIRCLED HANGUL IEUNG U'}
+	);
+
+print_tables("JOHAB", $mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
index 51ffd86b2c9..16a53ad1d9f 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_SHIFT_JIS_2004.pl
@@ -7,7 +7,7 @@
 # Generate UTF-8 <--> SHIFT_JIS_2004 code conversion tables from
 # "sjis-0213-2004-std.txt" (http://x0213.org)
 
-require "ucs2utf.pl";
+require "convutils.pm";
 
 # first generate UTF-8 --> SHIFT_JIS_2004 table
 
@@ -15,10 +15,7 @@ $in_file = "sjis-0213-2004-std.txt";
 
 open(FILE, $in_file) || die("cannot open $in_file");
 
-reset 'array';
-reset 'array1';
-reset 'comment';
-reset 'comment1';
+my @mapping;
 
 while ($line = <FILE>)
 {
@@ -29,14 +26,16 @@ while ($line = <FILE>)
 		$u2             = $3;
 		$rest           = "U+" . $u1 . "+" . $u2 . $4;
 		$code           = hex($c);
-		$ucs            = hex($u1);
-		$utf1           = &ucs2utf($ucs);
-		$ucs            = hex($u2);
-		$utf2           = &ucs2utf($ucs);
-		$str            = sprintf "%08x%08x", $utf1, $utf2;
-		$array1{$str}   = $code;
-		$comment1{$str} = $rest;
-		$count1++;
+		$ucs1           = hex($u1);
+		$ucs2           = hex($u2);
+
+		push @mapping, {
+			code => $code,
+			ucs => $ucs1,
+			ucs_second => $ucs2,
+			comment => $rest,
+			direction => 'both'
+		};
 		next;
 	}
 	elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
@@ -52,183 +51,31 @@ while ($line = <FILE>)
 
 	$ucs  = hex($u);
 	$code = hex($c);
-	$utf  = &ucs2utf($ucs);
-	if ($array{$utf} ne "")
-	{
-		printf STDERR
-		  "Warning: duplicate UTF8: %08x UCS: %04x Shift JIS: %04x\n", $utf,
-		  $ucs, $code;
-		next;
-	}
-	$count++;
 
-	$array{$utf}    = $code;
-	$comment{$code} = $rest;
-}
-close(FILE);
-
-$file = "utf8_to_shift_jis_2004.map";
-open(FILE, "> $file") || die("cannot open $file");
-print FILE "/*\n";
-print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
-print FILE " */\n";
-print FILE "static const pg_utf_to_local ULmapSHIFT_JIS_2004[] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array))
-{
-	$code = $array{$index};
-	$count--;
-	if ($count == 0)
-	{
-		printf FILE "  {0x%08x, 0x%06x}	/* %s */\n", $index, $code,
-		  $comment{$code};
-	}
-	else
+	if ($code < 0x80 && $ucs < 0x80)
 	{
-		printf FILE "  {0x%08x, 0x%06x},	/* %s */\n", $index, $code,
-		  $comment{$code};
-	}
-}
-
-print FILE "};\n";
-close(FILE);
-
-$file = "utf8_to_shift_jis_2004_combined.map";
-open(FILE, "> $file") || die("cannot open $file");
-print FILE "/*\n";
-print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
-print FILE " */\n";
-print FILE
-"static const pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {\n";
-
-for $index (sort { $a cmp $b } keys(%array1))
-{
-	$code = $array1{$index};
-	$count1--;
-	if ($count1 == 0)
-	{
-		printf FILE "  {0x%s, 0x%s, 0x%04x}	/* %s */\n", substr($index, 0, 8),
-		  substr($index, 8, 8), $code, $comment1{$index};
-	}
-	else
-	{
-		printf FILE "  {0x%s, 0x%s, 0x%04x},	/* %s */\n",
-		  substr($index, 0, 8), substr($index, 8, 8), $code,
-		  $comment1{$index};
-	}
-}
-
-print FILE "};\n";
-close(FILE);
-
-# then generate SHIFT_JIS_2004 --> UTF-8 table
-
-$in_file = "sjis-0213-2004-std.txt";
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-reset 'array';
-reset 'array1';
-reset 'comment';
-reset 'comment1';
-
-while ($line = <FILE>)
-{
-	if ($line =~ /^0x(.*)[ \t]*U\+(.*)\+(.*)[ \t]*#(.*)$/)
-	{
-		$c               = $1;
-		$u1              = $2;
-		$u2              = $3;
-		$rest            = "U+" . $u1 . "+" . $u2 . $4;
-		$code            = hex($c);
-		$ucs             = hex($u1);
-		$utf1            = &ucs2utf($ucs);
-		$ucs             = hex($u2);
-		$utf2            = &ucs2utf($ucs);
-		$str             = sprintf "%08x%08x", $utf1, $utf2;
-		$array1{$code}   = $str;
-		$comment1{$code} = $rest;
-		$count1++;
 		next;
 	}
-	elsif ($line =~ /^0x(.*)[ \t]*U\+(.*)[ \t]*#(.*)$/)
+	elsif ($code < 0x80)
 	{
-		$c    = $1;
-		$u    = $2;
-		$rest = "U+" . $u . $3;
+		$direction = 'from_unicode';
 	}
-	else
-	{
-		next;
-	}
-
-	$ucs  = hex($u);
-	$code = hex($c);
-	$utf  = &ucs2utf($ucs);
-	if ($array{$code} ne "")
-	{
-		printf STDERR
-		  "Warning: duplicate UTF8: %08x UCS: %04x Shift JIS: %04x\n", $utf,
-		  $ucs, $code;
-		printf STDERR "Previous value: UTF8: %08x\n", $array{$utf};
-		next;
-	}
-	$count++;
-
-	$array{$code}  = $utf;
-	$comment{$utf} = $rest;
-}
-close(FILE);
-
-$file = "shift_jis_2004_to_utf8.map";
-open(FILE, "> $file") || die("cannot open $file");
-print FILE "/*\n";
-print FILE " * This file was generated by UCS_to_SHIFTJIS_2004.pl\n";
-print FILE " */\n";
-print FILE "static const pg_local_to_utf LUmapSHIFT_JIS_2004[] = {\n";
-
-for $index (sort { $a <=> $b } keys(%array))
-{
-	$code = $array{$index};
-	$count--;
-	if ($count == 0)
+	elsif ($ucs < 0x80)
 	{
-		printf FILE "  {0x%04x, 0x%08x}	/* %s */\n", $index, $code,
-		  $comment{$code};
+		$direction = 'to_unicode';
 	}
 	else
 	{
-		printf FILE "  {0x%04x, 0x%08x},	/* %s */\n", $index, $code,
-		  $comment{$code};
+		$direction = 'both';
 	}
-}
-
-print FILE "};\n";
-close(FILE);
-
-$file = "shift_jis_2004_to_utf8_combined.map";
-open(FILE, "> $file") || die("cannot open $file");
-print FILE "/*\n";
-print FILE " * This file was generated by UCS_to_SHIFT_JIS_2004.pl\n";
-print FILE " */\n";
-print FILE
-"static const pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {\n";
 
-for $index (sort { $a <=> $b } keys(%array1))
-{
-	$code = $array1{$index};
-	$count1--;
-	if ($count1 == 0)
-	{
-		printf FILE "  {0x%04x, 0x%s, 0x%s}	/* %s */\n", $index,
-		  substr($code, 0, 8), substr($code, 8, 8), $comment1{$index};
-	}
-	else
-	{
-		printf FILE "  {0x%04x, 0x%s, 0x%s},	/* %s */\n", $index,
-		  substr($code, 0, 8), substr($code, 8, 8), $comment1{$index};
-	}
+	push @mapping, {
+		code => $code,
+		ucs => $ucs,
+		comment => $rest,
+		direction => $direction
+	};
 }
-
-print FILE "};\n";
 close(FILE);
+
+print_tables("SHIFT_JIS_2004", \@mapping, 1);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl b/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl
index 10e54b157d2..c8ff712af8f 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_SJIS.pl
@@ -4,138 +4,45 @@
 #
 # src/backend/utils/mb/Unicode/UCS_to_SJIS.pl
 #
-# Generate UTF-8 <--> SJIS code conversion tables from
-# map files provided by Unicode organization.
-# Unfortunately it is prohibited by the organization
-# to distribute the map files. So if you try to use this script,
-# you have to obtain SHIFTJIS.TXT from
-# the organization's ftp site.
-#
-# SHIFTJIS.TXT format:
-#		 SHIFTJIS code in hex
-#		 UCS-2 code in hex
-#		 # and Unicode name (not used in this script)
-# Warning: SHIFTJIS.TXT contains only JIS0201 and JIS0208. no JIS0212.
-
-require "ucs2utf.pl";
-
-# first generate UTF-8 --> SJIS table
-
-$in_file = "CP932.TXT";
-$count   = 0;
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-while (<FILE>)
-{
-	chop;
-	if (/^#/)
-	{
-		next;
-	}
-	($c, $u, $rest) = split;
-	$ucs  = hex($u);
-	$code = hex($c);
-	if ($code >= 0x80 && $ucs >= 0x0080)
-	{
-		$utf = &ucs2utf($ucs);
-		if ((($code >= 0xed40) && ($code <= 0xeefc))
-			|| (   ($code >= 0x8754)
-				&& ($code <= 0x875d))
-			|| ($code == 0x878a)
-			|| ($code == 0x8782)
-			|| ($code == 0x8784)
-			|| ($code == 0xfa5b)
-			|| ($code == 0xfa54)
-			|| (   ($code >= 0x8790)
-				&& ($code <= 0x8792))
-			|| (   ($code >= 0x8795)
-				&& ($code <= 0x8797))
-			|| (   ($code >= 0x879a)
-				&& ($code <= 0x879c)))
-		{
-			printf STDERR "Warning: duplicate UTF8: UCS=0x%04x SJIS=0x%04x\n",
-			  $ucs,
-			  $code;
-			next;
-		}
-		$count++;
-		$array{$utf} = $code;
-	}
-}
+# Generate UTF-8 <=> SJIS code conversion radix tree Generate UTF-8
+# <=> SJIS code conversion radix tree Unfortunately it is prohibited
+# by the organization to distribute the map files. So if you try to
+# use this script, you have to obtain CP932.TXT from the organization's
+# ftp site.
 
-close(FILE);
+use strict;
+require "convutils.pm";
 
-$file = "utf8_to_sjis.map";
-open(FILE, "> $file") || die("cannot open $file");
+my $charset = read_source("CP932.TXT");
 
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_utf_to_local ULmapSJIS[ $count ] = {\n";
+# Drop these SJIS codes from the source for UTF8=>SJIS conversion
+my @reject_sjis =(
+	0xed40..0xeefc, 0x8754..0x875d, 0x878a, 0x8782,
+	0x8784, 0xfa5b, 0xfa54, 0x8790..0x8792, 0x8795..0x8797,
+	0x879a..0x879c
+);
 
-for $index (sort { $a <=> $b } keys(%array))
+foreach my $i (@$charset)
 {
-	$code = $array{$index};
-	$count--;
-	if ($count == 0)
-	{
-		printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
-	}
-	else
-	{
-		printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
-	}
-}
-
-print FILE "};\n";
-close(FILE);
+	my $code = $i->{code};
+	my $ucs = $i->{ucs};
 
-#
-# then generate SJIS --> UTF8 table
-#
-
-open(FILE, $in_file) || die("cannot open $in_file");
-
-reset 'array';
-$count = 0;
-
-while (<FILE>)
-{
-	chop;
-	if (/^#/)
-	{
-		next;
-	}
-	($c, $u, $rest) = split;
-	$ucs  = hex($u);
-	$code = hex($c);
-	if ($code >= 0x80 && $ucs >= 0x0080)
-	{
-		$utf = &ucs2utf($ucs);
-		$count++;
-
-		$array{$code} = $utf;
-	}
-}
-close(FILE);
-
-$file = "sjis_to_utf8.map";
-open(FILE, "> $file") || die("cannot open $file");
-
-print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-print FILE "static const pg_local_to_utf LUmapSJIS[ $count ] = {\n";
-for $index (sort { $a <=> $b } keys(%array))
-{
-	$utf = $array{$index};
-	$count--;
-	if ($count == 0)
-	{
-		printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
-	}
-	else
+	if (grep {$code == $_} @reject_sjis)
 	{
-		printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
+		$i->{direction} = "to_unicode";
 	}
 }
 
-print FILE "};\n";
-close(FILE);
+# Add these UTF8->SJIS pairs to the table.
+push @$charset, (
+	{direction => "from_unicode", ucs => 0x00a2,   code => 0x8191, comment => '# CENT SIGN'},
+	{direction => "from_unicode", ucs => 0x00a3,   code => 0x8192, comment => '# POUND SIGN'},
+	{direction => "from_unicode", ucs => 0x00a5,   code => 0x5c,   comment => '# YEN SIGN'},
+	{direction => "from_unicode", ucs => 0x00ac,   code => 0x81ca, comment => '# NOT SIGN'},
+	{direction => "from_unicode", ucs => 0x2016, code => 0x8161, comment => '# DOUBLE VERTICAL LINE'},
+	{direction => "from_unicode", ucs => 0x203e, code => 0x7e,   comment => '# OVERLINE'},
+	{direction => "from_unicode", ucs => 0x2212, code => 0x817c, comment => '# MINUS SIGN'},
+	{direction => "from_unicode", ucs => 0x301c, code => 0x8160, comment => '# WAVE DASH'}
+);
+
+print_tables("SJIS", $charset);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_UHC.pl b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl
new file mode 100755
index 00000000000..b6bf3bd8f27
--- /dev/null
+++ b/src/backend/utils/mb/Unicode/UCS_to_UHC.pl
@@ -0,0 +1,51 @@
+#! /usr/bin/perl
+#
+# Copyright (c) 2007-2016, PostgreSQL Global Development Group
+#
+# src/backend/utils/mb/Unicode/UCS_to_GB18030.pl
+#
+# Generate UTF-8 <--> UHC code conversion tables from
+# "windows-949-2000.xml", obtained from
+# http://source.icu-project.org/repos/icu/data/trunk/charset/data/xml/
+#
+# The lines we care about in the source file look like
+#    <a u="009A" b="81 30 83 36"/>
+# where the "u" field is the Unicode code point in hex,
+# and the "b" field is the hex byte sequence for UHC
+
+require "convutils.pm";
+
+# Read the input
+
+$in_file = "windows-949-2000.xml";
+
+open(FILE, $in_file) || die("cannot open $in_file");
+
+my @mapping;
+
+while (<FILE>)
+{
+	next if (!m/<a u="([0-9A-F]+)" b="([0-9A-F ]+)"/);
+	$u = $1;
+	$c = $2;
+	$c =~ s/ //g;
+	$ucs  = hex($u);
+	$code = hex($c);
+
+	next if ($code == 0x0080 || $code == 0x00FF);
+
+	if ($code >= 0x80 && $ucs >= 0x0080)
+	{
+		push @mapping, {
+			ucs => $ucs,
+			code => $code,
+			direction => 'both'
+		}
+	}
+}
+close(FILE);
+
+# One extra character that's not in the source file.
+push @mapping, { direction => 'both', code => 0xa2e8, ucs => 0x327e, comment => 'CIRCLED HANGUL IEUNG U' };
+
+print_tables("UHC", \@mapping);
diff --git a/src/backend/utils/mb/Unicode/UCS_to_most.pl b/src/backend/utils/mb/Unicode/UCS_to_most.pl
index 125378f149a..a3cf436eefd 100755
--- a/src/backend/utils/mb/Unicode/UCS_to_most.pl
+++ b/src/backend/utils/mb/Unicode/UCS_to_most.pl
@@ -15,7 +15,7 @@
 #		 UCS-2 code in hex
 #		 # and Unicode name (not used in this script)
 
-require "ucs2utf.pl";
+require "convutils.pm";
 
 %filename = (
 	'WIN866'     => 'CP866.TXT',
@@ -44,121 +44,13 @@ require "ucs2utf.pl";
 	'ISO8859_16' => '8859-16.TXT',
 	'KOI8R'      => 'KOI8-R.TXT',
 	'KOI8U'      => 'KOI8-U.TXT',
-	'GBK'        => 'CP936.TXT',
-	'UHC'        => 'CP949.TXT',
-	'JOHAB'      => 'JOHAB.TXT',);
+	'GBK'        => 'CP936.TXT');
 
 @charsets = keys(%filename);
 @charsets = @ARGV if scalar(@ARGV);
 foreach $charset (@charsets)
 {
+	my $mapping = &read_source($filename{$charset});
 
-	#
-	# first, generate UTF8-> charset table
-	#
-	$in_file = $filename{$charset};
-
-	open(FILE, $in_file) || die("cannot open $in_file");
-
-	reset 'array';
-
-	while (<FILE>)
-	{
-		chop;
-		if (/^#/)
-		{
-			next;
-		}
-		($c, $u, $rest) = split;
-		$ucs  = hex($u);
-		$code = hex($c);
-		if ($code >= 0x80 && $ucs >= 0x0080)
-		{
-			$utf = &ucs2utf($ucs);
-			if ($array{$utf} ne "")
-			{
-				printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-				next;
-			}
-			$count++;
-			$array{$utf} = $code;
-		}
-	}
-	close(FILE);
-
-	$file = lc("utf8_to_${charset}.map");
-	open(FILE, "> $file") || die("cannot open $file");
-
-	print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-	print FILE "static const pg_utf_to_local ULmap${charset}[ $count ] = {\n";
-
-	for $index (sort { $a <=> $b } keys(%array))
-	{
-		$code = $array{$index};
-		$count--;
-		if ($count == 0)
-		{
-			printf FILE "  {0x%04x, 0x%04x}\n", $index, $code;
-		}
-		else
-		{
-			printf FILE "  {0x%04x, 0x%04x},\n", $index, $code;
-		}
-	}
-
-	print FILE "};\n";
-	close(FILE);
-
-	#
-	# then generate character set code ->UTF8 table
-	#
-	open(FILE, $in_file) || die("cannot open $in_file");
-
-	reset 'array';
-
-	while (<FILE>)
-	{
-		chop;
-		if (/^#/)
-		{
-			next;
-		}
-		($c, $u, $rest) = split;
-		$ucs  = hex($u);
-		$code = hex($c);
-		if ($code >= 0x80 && $ucs >= 0x0080)
-		{
-			$utf = &ucs2utf($ucs);
-			if ($array{$code} ne "")
-			{
-				printf STDERR "Warning: duplicate UTF8: %04x\n", $ucs;
-				next;
-			}
-			$count++;
-			$array{$code} = $utf;
-		}
-	}
-	close(FILE);
-
-	$file = lc("${charset}_to_utf8.map");
-	open(FILE, "> $file") || die("cannot open $file");
-
-	print FILE "/* src/backend/utils/mb/Unicode/$file */\n\n";
-	print FILE "static const pg_local_to_utf LUmap${charset}[ $count ] = {\n";
-	for $index (sort { $a <=> $b } keys(%array))
-	{
-		$utf = $array{$index};
-		$count--;
-		if ($count == 0)
-		{
-			printf FILE "  {0x%04x, 0x%04x}\n", $index, $utf;
-		}
-		else
-		{
-			printf FILE "  {0x%04x, 0x%04x},\n", $index, $utf;
-		}
-	}
-
-	print FILE "};\n";
-	close(FILE);
+	print_tables($charset, $mapping);
 }
diff --git a/src/backend/utils/mb/Unicode/convutils.pm b/src/backend/utils/mb/Unicode/convutils.pm
new file mode 100644
index 00000000000..d6a13e8c02c
--- /dev/null
+++ b/src/backend/utils/mb/Unicode/convutils.pm
@@ -0,0 +1,282 @@
+#
+# Copyright (c) 2001-2016, PostgreSQL Global Development Group
+#
+# src/backend/utils/mb/Unicode/convutils.pm
+
+use strict;
+
+#######################################################################
+# convert UCS-4 to UTF-8
+#
+sub ucs2utf
+{
+	my ($ucs) = @_;
+	my $utf;
+
+	if ($ucs <= 0x007f)
+	{
+		$utf = $ucs;
+	}
+	elsif ($ucs > 0x007f && $ucs <= 0x07ff)
+	{
+		$utf = (($ucs & 0x003f) | 0x80) | ((($ucs >> 6) | 0xc0) << 8);
+	}
+	elsif ($ucs > 0x07ff && $ucs <= 0xffff)
+	{
+		$utf =
+		  ((($ucs >> 12) | 0xe0) << 16) |
+		  (((($ucs & 0x0fc0) >> 6) | 0x80) << 8) | (($ucs & 0x003f) | 0x80);
+	}
+	else
+	{
+		$utf =
+		  ((($ucs >> 18) | 0xf0) << 24) |
+		  (((($ucs & 0x3ffff) >> 12) | 0x80) << 16) |
+		  (((($ucs & 0x0fc0) >> 6) | 0x80) << 8) | (($ucs & 0x003f) | 0x80);
+	}
+	return ($utf);
+}
+
+#######################################################################
+# read_source - common routine to read source file
+#
+# fname ; input file name
+sub read_source
+{
+	my ($fname) = @_;
+	my @r;
+
+	open(my $in, '<', $fname) || die("cannot open $fname");
+
+	while (<$in>)
+	{
+		next if (/^#/);
+		chop;
+
+		next if (/^$/); # Ignore empty lines
+
+		next if (/^0x([0-9A-F]+)\s+(#.*)$/);
+
+		# Skip the first column for JIS0208.TXT
+		if (!/^0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)\s+(?:0x([0-9A-Fa-f]+)\s+)?(#.*)$/)
+		{
+			print STDERR "READ ERROR at line $. in $fname: $_\n";
+			exit;
+		}
+		my $out = {f => $fname, l => $.,
+				   code => hex($1),
+				   ucs => hex($2),
+				   comment => $4,
+				   direction => "both"
+				};
+
+		# Ignore pure ASCII mappings. PostgreSQL character conversion code
+		# never even passes these to the conversion code.
+		next if ($out->{code} < 0x80 || $out->{ucs} < 0x80);
+
+		push(@r, $out);
+	}
+	close($in);
+
+	return \@r;
+}
+
+##################################################################
+# print_tables : output mapping tables
+#
+# Arguments:
+#  charset - string name of the character set.
+#  table   - mapping table (see format below)
+#  verbose - if 1, output comment on each line,
+#            if 2, also output source file name and number
+#
+#
+#
+# Mapping table format:
+#
+# Mapping table is a list of hashes. Each hash has the following fields:
+#   direction  - Direction: 'both', 'from_unicode' or 'to_unicode'
+#   ucs        - Unicode code point
+#   ucs_second - Second Unicode code point, if this is a "combined" character.
+#   code       - Byte sequence in the "other" character set, as an integer
+#   comment    - Text representation of the character
+#   f          - Source filename
+#   l          - Line number in source file
+#
+#
+sub print_tables
+{
+	my ($charset, $table, $verbose) = @_;
+
+	# Build an array with only the to-UTF8 direction mappings
+	my @to_unicode;
+	my @to_unicode_combined;
+	my @from_unicode;
+	my @from_unicode_combined;
+
+	foreach my $i (@$table)
+	{
+		if (defined $i->{ucs_second})
+		{
+			my $entry = {utf8 => ucs2utf($i->{ucs}),
+						 utf8_second => ucs2utf($i->{ucs_second}),
+						 code => $i->{code},
+						 comment => $i->{comment},
+						 f => $i->{f}, l => $i->{l}};
+			if ($i->{direction} eq "both" || $i->{direction} eq "to_unicode")
+			{
+				push @to_unicode_combined, $entry;
+			}
+			if ($i->{direction} eq "both" || $i->{direction} eq "from_unicode")
+			{
+				push @from_unicode_combined, $entry;
+			}
+		}
+		else
+		{
+			my $entry = {utf8 => ucs2utf($i->{ucs}),
+						 code => $i->{code},
+						 comment => $i->{comment},
+						 f => $i->{f}, l => $i->{l}};
+			if ($i->{direction} eq "both" || $i->{direction} eq "to_unicode")
+			{
+				push @to_unicode, $entry;
+			}
+			if ($i->{direction} eq "both" || $i->{direction} eq "from_unicode")
+			{
+				push @from_unicode, $entry;
+			}
+		}
+	}
+
+	print_to_utf8_map($charset, \@to_unicode, $verbose);
+	print_to_utf8_combined_map($charset, \@to_unicode_combined, $verbose) if (scalar @to_unicode_combined > 0);
+	print_from_utf8_map($charset, \@from_unicode, $verbose);
+	print_from_utf8_combined_map($charset, \@from_unicode_combined, $verbose) if (scalar @from_unicode_combined > 0);
+}
+
+sub print_from_utf8_map
+{
+	my ($charset, $table, $verbose) = @_;
+
+	my $last_comment = "";
+
+	my $fname = lc("utf8_to_${charset}.map");
+	print "- Writing UTF8=>${charset} conversion table: $fname\n";
+	open(my $out, '>', $fname) || die "cannot open output file : $fname\n";
+	printf($out "/* src/backend/utils/mb/Unicode/$fname */\n\n".
+		   "static const pg_utf_to_local ULmap${charset}[ %d ] = {",
+		   scalar(@$table));
+	my $first = 1;
+	foreach my $i (sort {$$a{utf8} <=> $$b{utf8}} @$table)
+    {
+		print($out ",") if (!$first);
+		$first = 0;
+		print($out "\t/* $last_comment */") if ($verbose);
+
+		printf($out "\n  {0x%04x, 0x%04x}", $$i{utf8}, $$i{code});
+		if ($verbose >= 2)
+		{
+			$last_comment = "$$i{f}:$$i{l} $$i{comment}";
+		}
+		else
+		{
+			$last_comment = $$i{comment};
+		}
+	}
+	print($out "\t/* $last_comment */") if ($verbose);
+	print $out "\n};\n";
+	close($out);
+}
+
+sub print_from_utf8_combined_map
+{
+	my ($charset, $table, $verbose) = @_;
+
+	my $last_comment = "";
+
+	my $fname = lc("utf8_to_${charset}_combined.map");
+	print "- Writing UTF8=>${charset} conversion table: $fname\n";
+	open(my $out, '>', $fname) || die "cannot open output file : $fname\n";
+	printf($out "/* src/backend/utils/mb/Unicode/$fname */\n\n".
+		   "static const pg_utf_to_local_combined ULmap${charset}_combined[ %d ] = {",
+		   scalar(@$table));
+	my $first = 1;
+	foreach my $i (sort {$$a{utf8} <=> $$b{utf8}} @$table)
+    {
+		print($out ",") if (!$first);
+		$first = 0;
+		print($out "\t/* $last_comment */") if ($verbose);
+
+		printf($out "\n  {0x%08x, 0x%08x, 0x%04x}", $$i{utf8}, $$i{utf8_second}, $$i{code});
+		$last_comment = "$$i{comment}";
+	}
+	print($out "\t/* $last_comment */") if ($verbose);
+	print $out "\n};\n";
+	close($out);
+}
+
+sub print_to_utf8_map
+{
+	my ($charset, $table, $verbose) = @_;
+
+	my $last_comment = "";
+
+	my $fname = lc("${charset}_to_utf8.map");
+
+	print "- Writing ${charset}=>UTF8 conversion table: $fname\n";
+	open(my $out, '>', $fname) || die "cannot open output file : $fname\n";
+	printf($out "/* src/backend/utils/mb/Unicode/${fname} */\n\n".
+		   "static const pg_local_to_utf LUmap${charset}[ %d ] = {",
+		   scalar(@$table));
+	my $first = 1;
+	foreach my $i (sort {$$a{code} <=> $$b{code}} @$table)
+    {
+		print($out ",") if (!$first);
+		$first = 0;
+		print($out "\t/* $last_comment */") if ($verbose);
+
+		printf($out "\n  {0x%04x, 0x%x}", $$i{code}, $$i{utf8});
+		if ($verbose >= 2)
+		{
+			$last_comment = "$$i{f}:$$i{l} $$i{comment}";
+		}
+		else
+		{
+			$last_comment = $$i{comment};
+		}
+	}
+	print($out "\t/* $last_comment */") if ($verbose);
+	print $out "\n};\n";
+	close($out);
+}
+
+sub print_to_utf8_combined_map
+{
+	my ($charset, $table, $verbose) = @_;
+
+	my $last_comment = "";
+
+	my $fname = lc("${charset}_to_utf8_combined.map");
+
+	print "- Writing ${charset}=>UTF8 conversion table: $fname\n";
+	open(my $out, '>', $fname) || die "cannot open output file : $fname\n";
+	printf($out "/* src/backend/utils/mb/Unicode/${fname} */\n\n".
+		   "static const pg_local_to_utf_combined LUmap${charset}_combined[ %d ] = {",
+		   scalar(@$table));
+	my $first = 1;
+	foreach my $i (sort {$$a{code} <=> $$b{code}} @$table)
+    {
+		print($out ",") if (!$first);
+		$first = 0;
+		print($out "\t/* $last_comment */") if ($verbose);
+
+		printf($out "\n  {0x%04x, 0x%08x, 0x%08x}", $$i{code}, $$i{utf8}, $$i{utf8_second});
+		$last_comment = "$$i{comment}";
+	}
+	print($out "\t/* $last_comment */") if ($verbose);
+	print $out "\n};\n";
+	close($out);
+}
+
+1;
diff --git a/src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8.map b/src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8.map
index 2c3a607bf86..33fd42ac464 100644
--- a/src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8.map
+++ b/src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8.map
@@ -1,7 +1,6 @@
-/*
- * This file was generated by UCS_to_EUC_JIS_2004.pl
- */
-static const pg_local_to_utf LUmapEUC_JIS_2004[] = {
+/* src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8.map */
+
+static const pg_local_to_utf LUmapEUC_JIS_2004[ 11303 ] = {	/*  */
   {0x0080, 0xc280},	/* U+0080	 <control> */
   {0x0081, 0xc281},	/* U+0081	 <control> */
   {0x0082, 0xc282},	/* U+0082	 <control> */
@@ -205,7 +204,7 @@ static const pg_local_to_utf LUmapEUC_JIS_2004[] = {
   {0xa2ac, 0xe28691},	/* U+2191	 UPWARDS ARROW */
   {0xa2ad, 0xe28693},	/* U+2193	 DOWNWARDS ARROW */
   {0xa2ae, 0xe38093},	/* U+3013	 GETA MARK */
-  {0xa2af, 0xefbc87},	/* U+FF07	 FULLWIDTH APOSTROPHE	[2000] */
+  {0xa2af, 0xefbc87},	/* U+FF07	 FULLWIDTH APOSTROPHE */
   {0xa2b0, 0xefbc82},	/* U+FF02	 FULLWIDTH QUOTATION MARK	[2000] */
   {0xa2b1, 0xefbc8d},	/* U+FF0D	 FULLWIDTH HYPHEN-MINUS	[2000] */
   {0xa2b2, 0xefbd9e},	/* U+FF5E	 FULLWIDTH TILDE	[2000] */
diff --git a/src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8_combined.map b/src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8_combined.map
index 7a7f85b105d..2d8987b9908 100644
--- a/src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8_combined.map
+++ b/src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8_combined.map
@@ -1,7 +1,6 @@
-/*
- * This file was generated by UCS_to_EUC_JIS_2004.pl
- */
-static const pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[] = {
+/* src/backend/utils/mb/Unicode/euc_jis_2004_to_utf8_combined.map */
+
+static const pg_local_to_utf_combined LUmapEUC_JIS_2004_combined[ 25 ] = {	/*  */
   {0xa4f7, 0x00e3818b, 0x00e3829a},	/* U+304B+309A	 	[2000] */
   {0xa4f8, 0x00e3818d, 0x00e3829a},	/* U+304D+309A	 	[2000] */
   {0xa4f9, 0x00e3818f, 0x00e3829a},	/* U+304F+309A	 	[2000] */
diff --git a/src/backend/utils/mb/Unicode/euc_jp_to_utf8.map b/src/backend/utils/mb/Unicode/euc_jp_to_utf8.map
index db427cbb24c..eb17f9829c5 100644
--- a/src/backend/utils/mb/Unicode/euc_jp_to_utf8.map
+++ b/src/backend/utils/mb/Unicode/euc_jp_to_utf8.map
@@ -1,6 +1,6 @@
 /* src/backend/utils/mb/Unicode/euc_jp_to_utf8.map */
 
-static const pg_local_to_utf LUmapEUC_JP[] = {
+static const pg_local_to_utf LUmapEUC_JP[ 13197 ] = {
   {0x8ea1, 0xefbda1},
   {0x8ea2, 0xefbda2},
   {0x8ea3, 0xefbda3},
@@ -13197,5 +13197,5 @@ static const pg_local_to_utf LUmapEUC_JP[] = {
   {0x8ff4fb, 0xe9ab99},
   {0x8ff4fc, 0xe9adb2},
   {0x8ff4fd, 0xefa8ad},
-  {0x8ff4fe, 0xe9bb91},
+  {0x8ff4fe, 0xe9bb91}
 };
diff --git a/src/backend/utils/mb/Unicode/euc_kr_to_utf8.map b/src/backend/utils/mb/Unicode/euc_kr_to_utf8.map
index e37152137d6..701a7a476ff 100644
--- a/src/backend/utils/mb/Unicode/euc_kr_to_utf8.map
+++ b/src/backend/utils/mb/Unicode/euc_kr_to_utf8.map
@@ -1,3 +1,5 @@
+/* src/backend/utils/mb/Unicode/euc_kr_to_utf8.map */
+
 static const pg_local_to_utf LUmapEUC_KR[ 8227 ] = {
   {0xa1a1, 0xe38080},
   {0xa1a2, 0xe38081},
diff --git a/src/backend/utils/mb/Unicode/johab_to_utf8.map b/src/backend/utils/mb/Unicode/johab_to_utf8.map
index 8110f6e8531..e31d24184c1 100644
--- a/src/backend/utils/mb/Unicode/johab_to_utf8.map
+++ b/src/backend/utils/mb/Unicode/johab_to_utf8.map
@@ -1,3 +1,5 @@
+/* src/backend/utils/mb/Unicode/johab_to_utf8.map */
+
 static const pg_local_to_utf LUmapJOHAB[ 17049 ] = {
   {0x8444, 0xe384b3},
   {0x8446, 0xe384b5},
diff --git a/src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8.map b/src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8.map
index 81c898c6be4..958dde7b83d 100644
--- a/src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8.map
+++ b/src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8.map
@@ -1,7 +1,6 @@
-/*
- * This file was generated by UCS_to_SHIFTJIS_2004.pl
- */
-static const pg_local_to_utf LUmapSHIFT_JIS_2004[] = {
+/* src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8.map */
+
+static const pg_local_to_utf LUmapSHIFT_JIS_2004[ 11271 ] = {	/*  */
   {0x00a1, 0xefbda1},	/* U+FF61	 HALFWIDTH IDEOGRAPHIC FULL STOP */
   {0x00a2, 0xefbda2},	/* U+FF62	 HALFWIDTH LEFT CORNER BRACKET */
   {0x00a3, 0xefbda3},	/* U+FF63	 HALFWIDTH RIGHT CORNER BRACKET */
@@ -173,7 +172,7 @@ static const pg_local_to_utf LUmapSHIFT_JIS_2004[] = {
   {0x81aa, 0xe28691},	/* U+2191	 UPWARDS ARROW */
   {0x81ab, 0xe28693},	/* U+2193	 DOWNWARDS ARROW */
   {0x81ac, 0xe38093},	/* U+3013	 GETA MARK */
-  {0x81ad, 0xefbc87},	/* U+FF07	 FULLWIDTH APOSTROPHE	[2000] */
+  {0x81ad, 0xefbc87},	/* U+FF07	 FULLWIDTH APOSTROPHE */
   {0x81ae, 0xefbc82},	/* U+FF02	 FULLWIDTH QUOTATION MARK	[2000] */
   {0x81af, 0xefbc8d},	/* U+FF0D	 FULLWIDTH HYPHEN-MINUS	[2000] */
   {0x81b0, 0x7e},	/* U+007E	 TILDE	[2000]	Fullwidth: U+FF5E */
diff --git a/src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8_combined.map b/src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8_combined.map
index b1c7bced5fd..414e59dc404 100644
--- a/src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8_combined.map
+++ b/src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8_combined.map
@@ -1,7 +1,6 @@
-/*
- * This file was generated by UCS_to_SHIFT_JIS_2004.pl
- */
-static const pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[] = {
+/* src/backend/utils/mb/Unicode/shift_jis_2004_to_utf8_combined.map */
+
+static const pg_local_to_utf_combined LUmapSHIFT_JIS_2004_combined[ 25 ] = {	/*  */
   {0x82f5, 0x00e3818b, 0x00e3829a},	/* U+304B+309A	 	[2000] */
   {0x82f6, 0x00e3818d, 0x00e3829a},	/* U+304D+309A	 	[2000] */
   {0x82f7, 0x00e3818f, 0x00e3829a},	/* U+304F+309A	 	[2000] */
diff --git a/src/backend/utils/mb/Unicode/ucs2utf.pl b/src/backend/utils/mb/Unicode/ucs2utf.pl
deleted file mode 100644
index e0f1fb226fd..00000000000
--- a/src/backend/utils/mb/Unicode/ucs2utf.pl
+++ /dev/null
@@ -1,35 +0,0 @@
-#
-# Copyright (c) 2001-2016, PostgreSQL Global Development Group
-#
-# src/backend/utils/mb/Unicode/ucs2utf.pl
-# convert UCS-4 to UTF-8
-#
-sub ucs2utf
-{
-	local ($ucs) = @_;
-	local $utf;
-
-	if ($ucs <= 0x007f)
-	{
-		$utf = $ucs;
-	}
-	elsif ($ucs > 0x007f && $ucs <= 0x07ff)
-	{
-		$utf = (($ucs & 0x003f) | 0x80) | ((($ucs >> 6) | 0xc0) << 8);
-	}
-	elsif ($ucs > 0x07ff && $ucs <= 0xffff)
-	{
-		$utf =
-		  ((($ucs >> 12) | 0xe0) << 16) |
-		  (((($ucs & 0x0fc0) >> 6) | 0x80) << 8) | (($ucs & 0x003f) | 0x80);
-	}
-	else
-	{
-		$utf =
-		  ((($ucs >> 18) | 0xf0) << 24) |
-		  (((($ucs & 0x3ffff) >> 12) | 0x80) << 16) |
-		  (((($ucs & 0x0fc0) >> 6) | 0x80) << 8) | (($ucs & 0x003f) | 0x80);
-	}
-	return ($utf);
-}
-1;
diff --git a/src/backend/utils/mb/Unicode/uhc_to_utf8.map b/src/backend/utils/mb/Unicode/uhc_to_utf8.map
index 26a7b18f658..65c7e114a3a 100644
--- a/src/backend/utils/mb/Unicode/uhc_to_utf8.map
+++ b/src/backend/utils/mb/Unicode/uhc_to_utf8.map
@@ -1,3 +1,5 @@
+/* src/backend/utils/mb/Unicode/uhc_to_utf8.map */
+
 static const pg_local_to_utf LUmapUHC[ 17237 ] = {
   {0x8141, 0xeab082},
   {0x8142, 0xeab083},
diff --git a/src/backend/utils/mb/Unicode/utf8_to_euc_cn.map b/src/backend/utils/mb/Unicode/utf8_to_euc_cn.map
index b28eb9cc0c7..3d64cd1a604 100644
--- a/src/backend/utils/mb/Unicode/utf8_to_euc_cn.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_euc_cn.map
@@ -1,3 +1,5 @@
+/* src/backend/utils/mb/Unicode/utf8_to_euc_cn.map */
+
 static const pg_utf_to_local ULmapEUC_CN[ 7445 ] = {
   {0xc2a4, 0xa1e8},
   {0xc2a7, 0xa1ec},
diff --git a/src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004.map b/src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004.map
index 51372012176..b50e232b6ce 100644
--- a/src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004.map
@@ -1,7 +1,6 @@
-/*
- * This file was generated by UCS_to_EUC_JIS_2004.pl
- */
-static const pg_utf_to_local ULmapEUC_JIS_2004[] = {
+/* src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004.map */
+
+static const pg_utf_to_local ULmapEUC_JIS_2004[ 11303 ] = {	/*  */
   {0xc280, 0x0080},	/* U+0080	 <control> */
   {0xc281, 0x0081},	/* U+0081	 <control> */
   {0xc282, 0x0082},	/* U+0082	 <control> */
@@ -10849,7 +10848,7 @@ static const pg_utf_to_local ULmapEUC_JIS_2004[] = {
   {0xefbc84, 0xa1f0},	/* U+FF04	 FULLWIDTH DOLLAR SIGN */
   {0xefbc85, 0xa1f3},	/* U+FF05	 FULLWIDTH PERCENT SIGN */
   {0xefbc86, 0xa1f5},	/* U+FF06	 FULLWIDTH AMPERSAND */
-  {0xefbc87, 0xa2af},	/* U+FF07	 FULLWIDTH APOSTROPHE	[2000] */
+  {0xefbc87, 0xa2af},	/* U+FF07	 FULLWIDTH APOSTROPHE */
   {0xefbc88, 0xa1ca},	/* U+FF08	 FULLWIDTH LEFT PARENTHESIS */
   {0xefbc89, 0xa1cb},	/* U+FF09	 FULLWIDTH RIGHT PARENTHESIS */
   {0xefbc8a, 0xa1f6},	/* U+FF0A	 FULLWIDTH ASTERISK */
diff --git a/src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004_combined.map b/src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004_combined.map
index d8ff5c05868..0d57667a558 100644
--- a/src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004_combined.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004_combined.map
@@ -1,7 +1,6 @@
-/*
- * This file was generated by UCS_to_EUC_JIS_2004.pl
- */
-static const pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[] = {
+/* src/backend/utils/mb/Unicode/utf8_to_euc_jis_2004_combined.map */
+
+static const pg_utf_to_local_combined ULmapEUC_JIS_2004_combined[ 25 ] = {	/*  */
   {0x0000c3a6, 0x0000cc80, 0xabc4},	/* U+00E6+0300	 	[2000] */
   {0x0000c994, 0x0000cc80, 0xabc8},	/* U+0254+0300	 	[2000] */
   {0x0000c994, 0x0000cc81, 0xabc9},	/* U+0254+0301	 	[2000] */
diff --git a/src/backend/utils/mb/Unicode/utf8_to_euc_jp.map b/src/backend/utils/mb/Unicode/utf8_to_euc_jp.map
index 137d4fdef61..eef6db65b34 100644
--- a/src/backend/utils/mb/Unicode/utf8_to_euc_jp.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_euc_jp.map
@@ -1,3 +1,5 @@
+/* src/backend/utils/mb/Unicode/utf8_to_euc_jp.map */
+
 static const pg_utf_to_local ULmapEUC_JP[ 13175 ] = {
   {0xc2a1, 0x8fa2c2},
   {0xc2a4, 0x8fa2f0},
diff --git a/src/backend/utils/mb/Unicode/utf8_to_euc_kr.map b/src/backend/utils/mb/Unicode/utf8_to_euc_kr.map
index 4a78b260ea4..a642b2154f2 100644
--- a/src/backend/utils/mb/Unicode/utf8_to_euc_kr.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_euc_kr.map
@@ -1,3 +1,5 @@
+/* src/backend/utils/mb/Unicode/utf8_to_euc_kr.map */
+
 static const pg_utf_to_local ULmapEUC_KR[ 8227 ] = {
   {0xc2a1, 0xa2ae},
   {0xc2a4, 0xa2b4},
diff --git a/src/backend/utils/mb/Unicode/utf8_to_johab.map b/src/backend/utils/mb/Unicode/utf8_to_johab.map
index 869f8213d21..78997d82d04 100644
--- a/src/backend/utils/mb/Unicode/utf8_to_johab.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_johab.map
@@ -1,3 +1,5 @@
+/* src/backend/utils/mb/Unicode/utf8_to_johab.map */
+
 static const pg_utf_to_local ULmapJOHAB[ 17049 ] = {
   {0xc2a1, 0xd9ae},
   {0xc2a4, 0xd9b4},
diff --git a/src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004.map b/src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004.map
index 4fab64fc956..e9f9e638c66 100644
--- a/src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004.map
@@ -1,7 +1,6 @@
-/*
- * This file was generated by UCS_to_SHIFT_JIS_2004.pl
- */
-static const pg_utf_to_local ULmapSHIFT_JIS_2004[] = {
+/* src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004.map */
+
+static const pg_utf_to_local ULmapSHIFT_JIS_2004[ 11271 ] = {	/*  */
   {0xc2a0, 0x8541},	/* U+00A0	 NO-BREAK SPACE	[2000] */
   {0xc2a1, 0x8542},	/* U+00A1	 INVERTED EXCLAMATION MARK	[2000] */
   {0xc2a2, 0x8191},	/* U+00A2	 CENT SIGN	Windows: U+FFE0 */
@@ -10817,7 +10816,7 @@ static const pg_utf_to_local ULmapSHIFT_JIS_2004[] = {
   {0xefbc84, 0x8190},	/* U+FF04	 FULLWIDTH DOLLAR SIGN */
   {0xefbc85, 0x8193},	/* U+FF05	 FULLWIDTH PERCENT SIGN */
   {0xefbc86, 0x8195},	/* U+FF06	 FULLWIDTH AMPERSAND */
-  {0xefbc87, 0x81ad},	/* U+FF07	 FULLWIDTH APOSTROPHE	[2000] */
+  {0xefbc87, 0x81ad},	/* U+FF07	 FULLWIDTH APOSTROPHE */
   {0xefbc88, 0x8169},	/* U+FF08	 FULLWIDTH LEFT PARENTHESIS */
   {0xefbc89, 0x816a},	/* U+FF09	 FULLWIDTH RIGHT PARENTHESIS */
   {0xefbc8a, 0x8196},	/* U+FF0A	 FULLWIDTH ASTERISK */
diff --git a/src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004_combined.map b/src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004_combined.map
index e55d4a2a6cf..3642851fd6a 100644
--- a/src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004_combined.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004_combined.map
@@ -1,7 +1,6 @@
-/*
- * This file was generated by UCS_to_SHIFT_JIS_2004.pl
- */
-static const pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[] = {
+/* src/backend/utils/mb/Unicode/utf8_to_shift_jis_2004_combined.map */
+
+static const pg_utf_to_local_combined ULmapSHIFT_JIS_2004_combined[ 25 ] = {	/*  */
   {0x0000c3a6, 0x0000cc80, 0x8663},	/* U+00E6+0300	 	[2000] */
   {0x0000c994, 0x0000cc80, 0x8667},	/* U+0254+0300	 	[2000] */
   {0x0000c994, 0x0000cc81, 0x8668},	/* U+0254+0301	 	[2000] */
diff --git a/src/backend/utils/mb/Unicode/utf8_to_sjis.map b/src/backend/utils/mb/Unicode/utf8_to_sjis.map
index fb0566a1db0..cd6ea48ffc3 100644
--- a/src/backend/utils/mb/Unicode/utf8_to_sjis.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_sjis.map
@@ -3,7 +3,7 @@
 static const pg_utf_to_local ULmapSJIS[ 7397 ] = {
   {0xc2a2, 0x8191},
   {0xc2a3, 0x8192},
-  {0xc2a5, 0x5c},
+  {0xc2a5, 0x005c},
   {0xc2a7, 0x8198},
   {0xc2a8, 0x814e},
   {0xc2ac, 0x81ca},
@@ -142,7 +142,7 @@ static const pg_utf_to_local ULmapSJIS[ 7397 ] = {
   {0xe280b2, 0x818c},
   {0xe280b3, 0x818d},
   {0xe280bb, 0x81a6},
-  {0xe280be, 0x7e},
+  {0xe280be, 0x007e},
   {0xe28483, 0x818e},
   {0xe28496, 0xfa59},
   {0xe284a1, 0xfa5a},
diff --git a/src/backend/utils/mb/Unicode/utf8_to_uhc.map b/src/backend/utils/mb/Unicode/utf8_to_uhc.map
index 15dfb56a099..dc04726364a 100644
--- a/src/backend/utils/mb/Unicode/utf8_to_uhc.map
+++ b/src/backend/utils/mb/Unicode/utf8_to_uhc.map
@@ -1,3 +1,5 @@
+/* src/backend/utils/mb/Unicode/utf8_to_uhc.map */
+
 static const pg_utf_to_local ULmapUHC[ 17237 ] = {
   {0xc2a1, 0xa2ae},
   {0xc2a4, 0xa2b4},