aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/mb/conv.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/mb/conv.c')
-rw-r--r--src/backend/utils/mb/conv.c253
1 files changed, 200 insertions, 53 deletions
diff --git a/src/backend/utils/mb/conv.c b/src/backend/utils/mb/conv.c
index ff7375df0dd..5a12f629cf1 100644
--- a/src/backend/utils/mb/conv.c
+++ b/src/backend/utils/mb/conv.c
@@ -6,7 +6,7 @@
* WIN1250 client encoding support contributed by Pavel Behal
* SJIS UDC (NEC selection IBM kanji) support contributed by Eiji Tokuya
*
- * $Id: conv.c,v 1.19 2000/10/27 02:23:51 ishii Exp $
+ * $Id: conv.c,v 1.20 2000/10/30 10:40:28 ishii Exp $
*
*
*/
@@ -16,24 +16,33 @@
#include "mb/pg_wchar.h"
-/*
- * XXX dummy elog() function for frontend only. Note that elog would
- * never be called from frontend, but to avoid the linking errors we
- * have to do it anyway. In the future, we should consider reorganizing
- * sources in this directory to avoid this kind of ugliness...
-*/
-
-#ifdef FRONTEND
-static void
-elog(int lev, const char *fmt, ...) {}
-#endif
+#ifdef UNICODE_CONVERSION
/*
* for Unicode (UTF-8) support
*/
-#include "iso8859.map" /* UTF-8 <--> ISO8859 map */
-#include "UTF_to_EUC_JP.map" /* UTF-8 --> EUC_JP map */
-#include "EUC_JP_to_UTF.map" /* UTF-8 <-- EUC_JP map */
+#include "Unicode/utf8_to_iso8859_2.map"
+#include "Unicode/utf8_to_iso8859_3.map"
+#include "Unicode/utf8_to_iso8859_4.map"
+#include "Unicode/utf8_to_iso8859_5.map"
+#include "Unicode/iso8859_2_to_utf8.map"
+#include "Unicode/iso8859_3_to_utf8.map"
+#include "Unicode/iso8859_4_to_utf8.map"
+#include "Unicode/iso8859_5_to_utf8.map"
+#include "Unicode/utf8_to_euc_jp.map"
+#include "Unicode/euc_jp_to_utf8.map"
+#include "Unicode/utf8_to_euc_cn.map"
+#include "Unicode/euc_cn_to_utf8.map"
+#include "Unicode/utf8_to_euc_kr.map"
+#include "Unicode/euc_kr_to_utf8.map"
+#include "Unicode/utf8_to_euc_tw.map"
+#include "Unicode/euc_tw_to_utf8.map"
+#include "Unicode/utf8_to_sjis.map"
+#include "Unicode/sjis_to_utf8.map"
+#include "Unicode/utf8_to_big5.map"
+#include "Unicode/big5_to_utf8.map"
+
+#endif /* UNICODE_CONVERSION */
/*
* SJIS alternative code.
@@ -1119,6 +1128,8 @@ mic2win1250(unsigned char *mic, unsigned char *p, int len)
mic2latin_with_table(mic, p, len, LC_ISO8859_2, iso88592_2_win1250);
}
+#ifdef UNICODE_CONVERSION
+
/*
* UNICODE(UTF-8) support
*/
@@ -1223,21 +1234,20 @@ static int compare2(const void *p1, const void *p2)
/*
* UTF-8 ---> local code
+ *
+ * utf: input UTF-8 string. Its length is limited by "len" parameter
+ * or a null terminater.
+ * iso: pointer to the output.
+ * map: the conversion map.
+ * size: the size of the conversion map.
*/
static void
utf_to_local(unsigned char *utf, unsigned char *iso,
- pg_utf_to_local *map, int size, int encoding, int len)
+ pg_utf_to_local *map, int size, int len)
{
unsigned int iutf;
int l;
pg_utf_to_local *p;
- pg_encoding_conv_tbl *e;
-
- e = pg_get_enc_ent(encoding);
- if (e == 0)
- {
- elog(ERROR, "Invalid encoding number %d", encoding);
- }
for (;len > 0 && *utf; len -= l)
{
@@ -1260,14 +1270,9 @@ utf_to_local(unsigned char *utf, unsigned char *iso,
}
p = bsearch(&iutf, map, size,
sizeof(pg_utf_to_local), compare1);
- if (p == NULL || p->encoding != encoding)
+ if (p == NULL)
{
- elog(NOTICE, "utf_to_latin: could not convert UTF-8 (0x%04x) to %s. Ignored",
- iutf, e->name);
- /*
- printf("utf_to_latin: could not convert UTF-8 (0x%04x) to %s. Ignored",
- iutf, e->name);
- */
+ elog(NOTICE, "utf_to_latin: could not convert UTF-8 (0x%04x) Ignored", iutf);
continue;
}
if (p->code & 0xff000000)
@@ -1287,9 +1292,8 @@ utf_to_local(unsigned char *utf, unsigned char *iso,
*/
static void
utf_to_latin2(unsigned char *utf, unsigned char *iso, int len)
-
{
- utf_to_local(utf, iso, mapISO8859, sizeof(mapISO8859)/sizeof(pg_utf_to_local), LATIN2, len);
+ utf_to_local(utf, iso, ULmapISO8859_2, sizeof(ULmapISO8859_2)/sizeof(pg_utf_to_local), len);
}
/*
@@ -1297,9 +1301,8 @@ utf_to_latin2(unsigned char *utf, unsigned char *iso, int len)
*/
static void
utf_to_latin3(unsigned char *utf, unsigned char *iso, int len)
-
{
- utf_to_local(utf, iso, mapISO8859, sizeof(mapISO8859)/sizeof(pg_utf_to_local), LATIN3, len);
+ utf_to_local(utf, iso, ULmapISO8859_3, sizeof(ULmapISO8859_3)/sizeof(pg_utf_to_local), len);
}
/*
@@ -1308,7 +1311,7 @@ utf_to_latin3(unsigned char *utf, unsigned char *iso, int len)
static void
utf_to_latin4(unsigned char *utf, unsigned char *iso, int len)
{
- utf_to_local(utf, iso, mapISO8859, sizeof(mapISO8859)/sizeof(pg_utf_to_local), LATIN4, len);
+ utf_to_local(utf, iso, ULmapISO8859_4, sizeof(ULmapISO8859_4)/sizeof(pg_utf_to_local), len);
}
/*
@@ -1318,7 +1321,7 @@ static void
utf_to_latin5(unsigned char *utf, unsigned char *iso, int len)
{
- utf_to_local(utf, iso, mapISO8859, sizeof(mapISO8859)/sizeof(pg_utf_to_local), LATIN5, len);
+ utf_to_local(utf, iso, ULmapISO8859_5, sizeof(ULmapISO8859_5)/sizeof(pg_utf_to_local), len);
}
/*
@@ -1348,7 +1351,7 @@ local_to_utf(unsigned char *iso, unsigned char *utf,
continue;
}
- l = pg_mblen_with_encoding(iso, encoding);
+ l = pg_encoding_mblen(encoding, iso);
if (l == 1)
{
@@ -1378,10 +1381,6 @@ local_to_utf(unsigned char *iso, unsigned char *utf,
{
elog(NOTICE, "local_to_utf: could not convert (0x%04x) %s to UTF-8. Ignored",
iiso, e->name);
- /*
- printf("local_to_utf: could not convert (0x%04x) %s to UTF-8. Ignored",
- iiso, e->name);
- */
continue;
}
if (p->utf & 0xff000000)
@@ -1402,7 +1401,7 @@ local_to_utf(unsigned char *iso, unsigned char *utf,
static void
latin2_to_utf(unsigned char *iso, unsigned char *utf, int len)
{
- local_to_utf(iso, utf, ISO8859_2, sizeof(ISO8859_2)/sizeof(pg_local_to_utf), LATIN2, len);
+ local_to_utf(iso, utf, LUmapISO8859_2, sizeof(LUmapISO8859_2)/sizeof(pg_local_to_utf), LATIN2, len);
}
/*
@@ -1411,7 +1410,7 @@ latin2_to_utf(unsigned char *iso, unsigned char *utf, int len)
static void
latin3_to_utf(unsigned char *iso, unsigned char *utf, int len)
{
- local_to_utf(iso, utf, ISO8859_3, sizeof(ISO8859_3)/sizeof(pg_local_to_utf), LATIN2, len);
+ local_to_utf(iso, utf, LUmapISO8859_3, sizeof(LUmapISO8859_3)/sizeof(pg_local_to_utf), LATIN3, len);
}
/*
@@ -1420,7 +1419,7 @@ latin3_to_utf(unsigned char *iso, unsigned char *utf, int len)
static void
latin4_to_utf(unsigned char *iso, unsigned char *utf, int len)
{
- local_to_utf(iso, utf, ISO8859_4, sizeof(ISO8859_4)/sizeof(pg_local_to_utf), LATIN2, len);
+ local_to_utf(iso, utf, LUmapISO8859_4, sizeof(LUmapISO8859_4)/sizeof(pg_local_to_utf), LATIN4, len);
}
/*
@@ -1429,7 +1428,7 @@ latin4_to_utf(unsigned char *iso, unsigned char *utf, int len)
static void
latin5_to_utf(unsigned char *iso, unsigned char *utf, int len)
{
- local_to_utf(iso, utf, ISO8859_5, sizeof(ISO8859_5)/sizeof(pg_local_to_utf), LATIN2, len);
+ local_to_utf(iso, utf, LUmapISO8859_5, sizeof(LUmapISO8859_5)/sizeof(pg_local_to_utf), LATIN5, len);
}
/*
@@ -1439,8 +1438,8 @@ static void
utf_to_euc_jp(unsigned char *utf, unsigned char *euc, int len)
{
- utf_to_local(utf, euc, mapUTF_to_EUC_JP,
- sizeof(mapUTF_to_EUC_JP)/sizeof(pg_utf_to_local), EUC_JP, len);
+ utf_to_local(utf, euc, ULmapEUC_JP,
+ sizeof(ULmapEUC_JP)/sizeof(pg_utf_to_local), len);
}
/*
@@ -1449,8 +1448,113 @@ utf_to_euc_jp(unsigned char *utf, unsigned char *euc, int len)
static void
euc_jp_to_utf(unsigned char *euc, unsigned char *utf, int len)
{
- local_to_utf(euc, utf, mapEUC_JP_to_UTF,
- sizeof(mapEUC_JP_to_UTF)/sizeof(pg_local_to_utf), EUC_JP, len);
+ local_to_utf(euc, utf, LUmapEUC_JP,
+ sizeof(LUmapEUC_JP)/sizeof(pg_local_to_utf), EUC_JP, len);
+}
+
+/*
+ * UTF-8 ---> EUC_CN
+ */
+static void
+utf_to_euc_cn(unsigned char *utf, unsigned char *euc, int len)
+
+{
+ utf_to_local(utf, euc, ULmapEUC_CN,
+ sizeof(ULmapEUC_CN)/sizeof(pg_utf_to_local), len);
+}
+
+/*
+ * EUC_CN ---> UTF-8
+ */
+static void
+euc_cn_to_utf(unsigned char *euc, unsigned char *utf, int len)
+{
+ local_to_utf(euc, utf, LUmapEUC_CN,
+ sizeof(LUmapEUC_CN)/sizeof(pg_local_to_utf), EUC_CN, len);
+}
+
+/*
+ * UTF-8 ---> EUC_KR
+ */
+static void
+utf_to_euc_kr(unsigned char *utf, unsigned char *euc, int len)
+
+{
+ utf_to_local(utf, euc, ULmapEUC_KR,
+ sizeof(ULmapEUC_KR)/sizeof(pg_utf_to_local), len);
+}
+
+/*
+ * EUC_KR ---> UTF-8
+ */
+static void
+euc_kr_to_utf(unsigned char *euc, unsigned char *utf, int len)
+{
+ local_to_utf(euc, utf, LUmapEUC_KR,
+ sizeof(LUmapEUC_KR)/sizeof(pg_local_to_utf), EUC_KR, len);
+}
+
+/*
+ * UTF-8 ---> EUC_TW
+ */
+static void
+utf_to_euc_tw(unsigned char *utf, unsigned char *euc, int len)
+
+{
+ utf_to_local(utf, euc, ULmapEUC_TW,
+ sizeof(ULmapEUC_TW)/sizeof(pg_utf_to_local), len);
+}
+
+/*
+ * EUC_TW ---> UTF-8
+ */
+static void
+euc_tw_to_utf(unsigned char *euc, unsigned char *utf, int len)
+{
+ local_to_utf(euc, utf, LUmapEUC_TW,
+ sizeof(LUmapEUC_TW)/sizeof(pg_local_to_utf), EUC_TW, len);
+}
+
+/*
+ * UTF-8 ---> SJIS
+ */
+static void
+utf_to_sjis(unsigned char *utf, unsigned char *euc, int len)
+
+{
+ utf_to_local(utf, euc, ULmapSJIS,
+ sizeof(ULmapSJIS)/sizeof(pg_utf_to_local), len);
+}
+
+/*
+ * SJIS ---> UTF-8
+ */
+static void
+sjis_to_utf(unsigned char *euc, unsigned char *utf, int len)
+{
+ local_to_utf(euc, utf, LUmapSJIS,
+ sizeof(LUmapSJIS)/sizeof(pg_local_to_utf), SJIS, len);
+}
+
+/*
+ * UTF-8 ---> BIG5
+ */
+static void
+utf_to_big5(unsigned char *utf, unsigned char *euc, int len)
+
+{
+ utf_to_local(utf, euc, ULmapBIG5,
+ sizeof(ULmapBIG5)/sizeof(pg_utf_to_local), len);
+}
+
+/*
+ * BIG5 ---> UTF-8
+ */
+static void
+big5_to_utf(unsigned char *euc, unsigned char *utf, int len)
+{
+ local_to_utf(euc, utf, LUmapBIG5,
+ sizeof(LUmapBIG5)/sizeof(pg_local_to_utf), BIG5, len);
}
/*-----------------------------------------------------------------*/
@@ -1460,9 +1564,12 @@ pg_encoding_conv_tbl pg_conv_tbl[] = {
ascii2utf, utf2ascii}, /* SQL/ASCII */
{EUC_JP, "EUC_JP", 0, euc_jp2mic, mic2euc_jp,
euc_jp_to_utf, utf_to_euc_jp}, /* EUC_JP */
- {EUC_CN, "EUC_CN", 0, euc_cn2mic, mic2euc_cn}, /* EUC_CN */
- {EUC_KR, "EUC_KR", 0, euc_kr2mic, mic2euc_kr}, /* EUC_KR */
- {EUC_TW, "EUC_TW", 0, euc_tw2mic, mic2euc_tw}, /* EUC_TW */
+ {EUC_CN, "EUC_CN", 0, euc_cn2mic, mic2euc_cn,
+ euc_cn_to_utf, utf_to_euc_cn}, /* EUC_CN */
+ {EUC_KR, "EUC_KR", 0, euc_kr2mic, mic2euc_kr,
+ euc_kr_to_utf, utf_to_euc_kr}, /* EUC_KR */
+ {EUC_TW, "EUC_TW", 0, euc_tw2mic, mic2euc_tw,
+ euc_tw_to_utf, utf_to_euc_tw}, /* EUC_TW */
{UNICODE, "UNICODE", 0, 0, 0}, /* UNICODE */
{MULE_INTERNAL, "MULE_INTERNAL", 0, 0, 0}, /* MULE_INTERNAL */
{LATIN1, "LATIN1", 0, latin12mic, mic2latin1,
@@ -1482,10 +1589,50 @@ pg_encoding_conv_tbl pg_conv_tbl[] = {
{ALT, "ALT", 0, alt2mic, mic2alt,
0,0}, /* CP866 */
{SJIS, "SJIS", 1, sjis2mic, mic2sjis,
- 0,0}, /* SJIS */
+ sjis_to_utf, utf_to_sjis}, /* SJIS */
+ {BIG5, "BIG5", 1, big52mic, mic2big5,
+ big5_to_utf, utf_to_big5}, /* Big5 */
+ {WIN1250, "WIN1250", 1, win12502mic, mic2win1250,
+ 0,0}, /* WIN 1250 */
+ {-1, "", 0, 0, 0, 0} /* end mark */
+};
+
+#else
+
+pg_encoding_conv_tbl pg_conv_tbl[] = {
+ {SQL_ASCII, "SQL_ASCII", 0, ascii2mic, mic2ascii,
+ 0, 0}, /* SQL/ASCII */
+ {EUC_JP, "EUC_JP", 0, euc_jp2mic, mic2euc_jp,
+ 0, 0}, /* EUC_JP */
+ {EUC_CN, "EUC_CN", 0, euc_cn2mic, mic2euc_cn,
+ 0, 0}, /* EUC_CN */
+ {EUC_KR, "EUC_KR", 0, euc_kr2mic, mic2euc_kr}, /* EUC_KR */
+ {EUC_TW, "EUC_TW", 0, euc_tw2mic, mic2euc_tw}, /* EUC_TW */
+ {UNICODE, "UNICODE", 0, 0, 0}, /* UNICODE */
+ {MULE_INTERNAL, "MULE_INTERNAL", 0, 0, 0}, /* MULE_INTERNAL */
+ {LATIN1, "LATIN1", 0, latin12mic, mic2latin1,
+ 0, 0}, /* ISO 8859 Latin 1 */
+ {LATIN2, "LATIN2", 0, latin22mic, mic2latin2,
+ 0, 0}, /* ISO 8859 Latin 2 */
+ {LATIN3, "LATIN3", 0, latin32mic, mic2latin3,
+ 0, 0}, /* ISO 8859 Latin 3 */
+ {LATIN4, "LATIN4", 0, latin42mic, mic2latin4,
+ 0, 0}, /* ISO 8859 Latin 4 */
+ {LATIN5, "LATIN5", 0, iso2mic, mic2iso,
+ 0, 0}, /* ISO 8859 Latin 5 */
+ {KOI8, "KOI8", 0, koi2mic, mic2koi,
+ 0, 0}, /* KOI8-R */
+ {WIN, "WIN", 0, win2mic, mic2win,
+ 0,0}, /* CP1251 */
+ {ALT, "ALT", 0, alt2mic, mic2alt,
+ 0,0}, /* CP866 */
+ {SJIS, "SJIS", 1, sjis2mic, mic2sjis,
+ 0, 0}, /* SJIS */
{BIG5, "BIG5", 1, big52mic, mic2big5,
0,0}, /* Big5 */
{WIN1250, "WIN1250", 1, win12502mic, mic2win1250,
0,0}, /* WIN 1250 */
{-1, "", 0, 0, 0, 0} /* end mark */
};
+
+#endif /* UNICODE_CONVERSION */