diff options
Diffstat (limited to 'src/backend/utils/mb/conv.c')
-rw-r--r-- | src/backend/utils/mb/conv.c | 253 |
1 files changed, 200 insertions, 53 deletions
diff --git a/src/backend/utils/mb/conv.c b/src/backend/utils/mb/conv.c index ff7375df0dd..5a12f629cf1 100644 --- a/src/backend/utils/mb/conv.c +++ b/src/backend/utils/mb/conv.c @@ -6,7 +6,7 @@ * WIN1250 client encoding support contributed by Pavel Behal * SJIS UDC (NEC selection IBM kanji) support contributed by Eiji Tokuya * - * $Id: conv.c,v 1.19 2000/10/27 02:23:51 ishii Exp $ + * $Id: conv.c,v 1.20 2000/10/30 10:40:28 ishii Exp $ * * */ @@ -16,24 +16,33 @@ #include "mb/pg_wchar.h" -/* - * XXX dummy elog() function for frontend only. Note that elog would - * never be called from frontend, but to avoid the linking errors we - * have to do it anyway. In the future, we should consider reorganizing - * sources in this directory to avoid this kind of ugliness... -*/ - -#ifdef FRONTEND -static void -elog(int lev, const char *fmt, ...) {} -#endif +#ifdef UNICODE_CONVERSION /* * for Unicode (UTF-8) support */ -#include "iso8859.map" /* UTF-8 <--> ISO8859 map */ -#include "UTF_to_EUC_JP.map" /* UTF-8 --> EUC_JP map */ -#include "EUC_JP_to_UTF.map" /* UTF-8 <-- EUC_JP map */ +#include "Unicode/utf8_to_iso8859_2.map" +#include "Unicode/utf8_to_iso8859_3.map" +#include "Unicode/utf8_to_iso8859_4.map" +#include "Unicode/utf8_to_iso8859_5.map" +#include "Unicode/iso8859_2_to_utf8.map" +#include "Unicode/iso8859_3_to_utf8.map" +#include "Unicode/iso8859_4_to_utf8.map" +#include "Unicode/iso8859_5_to_utf8.map" +#include "Unicode/utf8_to_euc_jp.map" +#include "Unicode/euc_jp_to_utf8.map" +#include "Unicode/utf8_to_euc_cn.map" +#include "Unicode/euc_cn_to_utf8.map" +#include "Unicode/utf8_to_euc_kr.map" +#include "Unicode/euc_kr_to_utf8.map" +#include "Unicode/utf8_to_euc_tw.map" +#include "Unicode/euc_tw_to_utf8.map" +#include "Unicode/utf8_to_sjis.map" +#include "Unicode/sjis_to_utf8.map" +#include "Unicode/utf8_to_big5.map" +#include "Unicode/big5_to_utf8.map" + +#endif /* UNICODE_CONVERSION */ /* * SJIS alternative code. @@ -1119,6 +1128,8 @@ mic2win1250(unsigned char *mic, unsigned char *p, int len) mic2latin_with_table(mic, p, len, LC_ISO8859_2, iso88592_2_win1250); } +#ifdef UNICODE_CONVERSION + /* * UNICODE(UTF-8) support */ @@ -1223,21 +1234,20 @@ static int compare2(const void *p1, const void *p2) /* * UTF-8 ---> local code + * + * utf: input UTF-8 string. Its length is limited by "len" parameter + * or a null terminater. + * iso: pointer to the output. + * map: the conversion map. + * size: the size of the conversion map. */ static void utf_to_local(unsigned char *utf, unsigned char *iso, - pg_utf_to_local *map, int size, int encoding, int len) + pg_utf_to_local *map, int size, int len) { unsigned int iutf; int l; pg_utf_to_local *p; - pg_encoding_conv_tbl *e; - - e = pg_get_enc_ent(encoding); - if (e == 0) - { - elog(ERROR, "Invalid encoding number %d", encoding); - } for (;len > 0 && *utf; len -= l) { @@ -1260,14 +1270,9 @@ utf_to_local(unsigned char *utf, unsigned char *iso, } p = bsearch(&iutf, map, size, sizeof(pg_utf_to_local), compare1); - if (p == NULL || p->encoding != encoding) + if (p == NULL) { - elog(NOTICE, "utf_to_latin: could not convert UTF-8 (0x%04x) to %s. Ignored", - iutf, e->name); - /* - printf("utf_to_latin: could not convert UTF-8 (0x%04x) to %s. Ignored", - iutf, e->name); - */ + elog(NOTICE, "utf_to_latin: could not convert UTF-8 (0x%04x) Ignored", iutf); continue; } if (p->code & 0xff000000) @@ -1287,9 +1292,8 @@ utf_to_local(unsigned char *utf, unsigned char *iso, */ static void utf_to_latin2(unsigned char *utf, unsigned char *iso, int len) - { - utf_to_local(utf, iso, mapISO8859, sizeof(mapISO8859)/sizeof(pg_utf_to_local), LATIN2, len); + utf_to_local(utf, iso, ULmapISO8859_2, sizeof(ULmapISO8859_2)/sizeof(pg_utf_to_local), len); } /* @@ -1297,9 +1301,8 @@ utf_to_latin2(unsigned char *utf, unsigned char *iso, int len) */ static void utf_to_latin3(unsigned char *utf, unsigned char *iso, int len) - { - utf_to_local(utf, iso, mapISO8859, sizeof(mapISO8859)/sizeof(pg_utf_to_local), LATIN3, len); + utf_to_local(utf, iso, ULmapISO8859_3, sizeof(ULmapISO8859_3)/sizeof(pg_utf_to_local), len); } /* @@ -1308,7 +1311,7 @@ utf_to_latin3(unsigned char *utf, unsigned char *iso, int len) static void utf_to_latin4(unsigned char *utf, unsigned char *iso, int len) { - utf_to_local(utf, iso, mapISO8859, sizeof(mapISO8859)/sizeof(pg_utf_to_local), LATIN4, len); + utf_to_local(utf, iso, ULmapISO8859_4, sizeof(ULmapISO8859_4)/sizeof(pg_utf_to_local), len); } /* @@ -1318,7 +1321,7 @@ static void utf_to_latin5(unsigned char *utf, unsigned char *iso, int len) { - utf_to_local(utf, iso, mapISO8859, sizeof(mapISO8859)/sizeof(pg_utf_to_local), LATIN5, len); + utf_to_local(utf, iso, ULmapISO8859_5, sizeof(ULmapISO8859_5)/sizeof(pg_utf_to_local), len); } /* @@ -1348,7 +1351,7 @@ local_to_utf(unsigned char *iso, unsigned char *utf, continue; } - l = pg_mblen_with_encoding(iso, encoding); + l = pg_encoding_mblen(encoding, iso); if (l == 1) { @@ -1378,10 +1381,6 @@ local_to_utf(unsigned char *iso, unsigned char *utf, { elog(NOTICE, "local_to_utf: could not convert (0x%04x) %s to UTF-8. Ignored", iiso, e->name); - /* - printf("local_to_utf: could not convert (0x%04x) %s to UTF-8. Ignored", - iiso, e->name); - */ continue; } if (p->utf & 0xff000000) @@ -1402,7 +1401,7 @@ local_to_utf(unsigned char *iso, unsigned char *utf, static void latin2_to_utf(unsigned char *iso, unsigned char *utf, int len) { - local_to_utf(iso, utf, ISO8859_2, sizeof(ISO8859_2)/sizeof(pg_local_to_utf), LATIN2, len); + local_to_utf(iso, utf, LUmapISO8859_2, sizeof(LUmapISO8859_2)/sizeof(pg_local_to_utf), LATIN2, len); } /* @@ -1411,7 +1410,7 @@ latin2_to_utf(unsigned char *iso, unsigned char *utf, int len) static void latin3_to_utf(unsigned char *iso, unsigned char *utf, int len) { - local_to_utf(iso, utf, ISO8859_3, sizeof(ISO8859_3)/sizeof(pg_local_to_utf), LATIN2, len); + local_to_utf(iso, utf, LUmapISO8859_3, sizeof(LUmapISO8859_3)/sizeof(pg_local_to_utf), LATIN3, len); } /* @@ -1420,7 +1419,7 @@ latin3_to_utf(unsigned char *iso, unsigned char *utf, int len) static void latin4_to_utf(unsigned char *iso, unsigned char *utf, int len) { - local_to_utf(iso, utf, ISO8859_4, sizeof(ISO8859_4)/sizeof(pg_local_to_utf), LATIN2, len); + local_to_utf(iso, utf, LUmapISO8859_4, sizeof(LUmapISO8859_4)/sizeof(pg_local_to_utf), LATIN4, len); } /* @@ -1429,7 +1428,7 @@ latin4_to_utf(unsigned char *iso, unsigned char *utf, int len) static void latin5_to_utf(unsigned char *iso, unsigned char *utf, int len) { - local_to_utf(iso, utf, ISO8859_5, sizeof(ISO8859_5)/sizeof(pg_local_to_utf), LATIN2, len); + local_to_utf(iso, utf, LUmapISO8859_5, sizeof(LUmapISO8859_5)/sizeof(pg_local_to_utf), LATIN5, len); } /* @@ -1439,8 +1438,8 @@ static void utf_to_euc_jp(unsigned char *utf, unsigned char *euc, int len) { - utf_to_local(utf, euc, mapUTF_to_EUC_JP, - sizeof(mapUTF_to_EUC_JP)/sizeof(pg_utf_to_local), EUC_JP, len); + utf_to_local(utf, euc, ULmapEUC_JP, + sizeof(ULmapEUC_JP)/sizeof(pg_utf_to_local), len); } /* @@ -1449,8 +1448,113 @@ utf_to_euc_jp(unsigned char *utf, unsigned char *euc, int len) static void euc_jp_to_utf(unsigned char *euc, unsigned char *utf, int len) { - local_to_utf(euc, utf, mapEUC_JP_to_UTF, - sizeof(mapEUC_JP_to_UTF)/sizeof(pg_local_to_utf), EUC_JP, len); + local_to_utf(euc, utf, LUmapEUC_JP, + sizeof(LUmapEUC_JP)/sizeof(pg_local_to_utf), EUC_JP, len); +} + +/* + * UTF-8 ---> EUC_CN + */ +static void +utf_to_euc_cn(unsigned char *utf, unsigned char *euc, int len) + +{ + utf_to_local(utf, euc, ULmapEUC_CN, + sizeof(ULmapEUC_CN)/sizeof(pg_utf_to_local), len); +} + +/* + * EUC_CN ---> UTF-8 + */ +static void +euc_cn_to_utf(unsigned char *euc, unsigned char *utf, int len) +{ + local_to_utf(euc, utf, LUmapEUC_CN, + sizeof(LUmapEUC_CN)/sizeof(pg_local_to_utf), EUC_CN, len); +} + +/* + * UTF-8 ---> EUC_KR + */ +static void +utf_to_euc_kr(unsigned char *utf, unsigned char *euc, int len) + +{ + utf_to_local(utf, euc, ULmapEUC_KR, + sizeof(ULmapEUC_KR)/sizeof(pg_utf_to_local), len); +} + +/* + * EUC_KR ---> UTF-8 + */ +static void +euc_kr_to_utf(unsigned char *euc, unsigned char *utf, int len) +{ + local_to_utf(euc, utf, LUmapEUC_KR, + sizeof(LUmapEUC_KR)/sizeof(pg_local_to_utf), EUC_KR, len); +} + +/* + * UTF-8 ---> EUC_TW + */ +static void +utf_to_euc_tw(unsigned char *utf, unsigned char *euc, int len) + +{ + utf_to_local(utf, euc, ULmapEUC_TW, + sizeof(ULmapEUC_TW)/sizeof(pg_utf_to_local), len); +} + +/* + * EUC_TW ---> UTF-8 + */ +static void +euc_tw_to_utf(unsigned char *euc, unsigned char *utf, int len) +{ + local_to_utf(euc, utf, LUmapEUC_TW, + sizeof(LUmapEUC_TW)/sizeof(pg_local_to_utf), EUC_TW, len); +} + +/* + * UTF-8 ---> SJIS + */ +static void +utf_to_sjis(unsigned char *utf, unsigned char *euc, int len) + +{ + utf_to_local(utf, euc, ULmapSJIS, + sizeof(ULmapSJIS)/sizeof(pg_utf_to_local), len); +} + +/* + * SJIS ---> UTF-8 + */ +static void +sjis_to_utf(unsigned char *euc, unsigned char *utf, int len) +{ + local_to_utf(euc, utf, LUmapSJIS, + sizeof(LUmapSJIS)/sizeof(pg_local_to_utf), SJIS, len); +} + +/* + * UTF-8 ---> BIG5 + */ +static void +utf_to_big5(unsigned char *utf, unsigned char *euc, int len) + +{ + utf_to_local(utf, euc, ULmapBIG5, + sizeof(ULmapBIG5)/sizeof(pg_utf_to_local), len); +} + +/* + * BIG5 ---> UTF-8 + */ +static void +big5_to_utf(unsigned char *euc, unsigned char *utf, int len) +{ + local_to_utf(euc, utf, LUmapBIG5, + sizeof(LUmapBIG5)/sizeof(pg_local_to_utf), BIG5, len); } /*-----------------------------------------------------------------*/ @@ -1460,9 +1564,12 @@ pg_encoding_conv_tbl pg_conv_tbl[] = { ascii2utf, utf2ascii}, /* SQL/ASCII */ {EUC_JP, "EUC_JP", 0, euc_jp2mic, mic2euc_jp, euc_jp_to_utf, utf_to_euc_jp}, /* EUC_JP */ - {EUC_CN, "EUC_CN", 0, euc_cn2mic, mic2euc_cn}, /* EUC_CN */ - {EUC_KR, "EUC_KR", 0, euc_kr2mic, mic2euc_kr}, /* EUC_KR */ - {EUC_TW, "EUC_TW", 0, euc_tw2mic, mic2euc_tw}, /* EUC_TW */ + {EUC_CN, "EUC_CN", 0, euc_cn2mic, mic2euc_cn, + euc_cn_to_utf, utf_to_euc_cn}, /* EUC_CN */ + {EUC_KR, "EUC_KR", 0, euc_kr2mic, mic2euc_kr, + euc_kr_to_utf, utf_to_euc_kr}, /* EUC_KR */ + {EUC_TW, "EUC_TW", 0, euc_tw2mic, mic2euc_tw, + euc_tw_to_utf, utf_to_euc_tw}, /* EUC_TW */ {UNICODE, "UNICODE", 0, 0, 0}, /* UNICODE */ {MULE_INTERNAL, "MULE_INTERNAL", 0, 0, 0}, /* MULE_INTERNAL */ {LATIN1, "LATIN1", 0, latin12mic, mic2latin1, @@ -1482,10 +1589,50 @@ pg_encoding_conv_tbl pg_conv_tbl[] = { {ALT, "ALT", 0, alt2mic, mic2alt, 0,0}, /* CP866 */ {SJIS, "SJIS", 1, sjis2mic, mic2sjis, - 0,0}, /* SJIS */ + sjis_to_utf, utf_to_sjis}, /* SJIS */ + {BIG5, "BIG5", 1, big52mic, mic2big5, + big5_to_utf, utf_to_big5}, /* Big5 */ + {WIN1250, "WIN1250", 1, win12502mic, mic2win1250, + 0,0}, /* WIN 1250 */ + {-1, "", 0, 0, 0, 0} /* end mark */ +}; + +#else + +pg_encoding_conv_tbl pg_conv_tbl[] = { + {SQL_ASCII, "SQL_ASCII", 0, ascii2mic, mic2ascii, + 0, 0}, /* SQL/ASCII */ + {EUC_JP, "EUC_JP", 0, euc_jp2mic, mic2euc_jp, + 0, 0}, /* EUC_JP */ + {EUC_CN, "EUC_CN", 0, euc_cn2mic, mic2euc_cn, + 0, 0}, /* EUC_CN */ + {EUC_KR, "EUC_KR", 0, euc_kr2mic, mic2euc_kr}, /* EUC_KR */ + {EUC_TW, "EUC_TW", 0, euc_tw2mic, mic2euc_tw}, /* EUC_TW */ + {UNICODE, "UNICODE", 0, 0, 0}, /* UNICODE */ + {MULE_INTERNAL, "MULE_INTERNAL", 0, 0, 0}, /* MULE_INTERNAL */ + {LATIN1, "LATIN1", 0, latin12mic, mic2latin1, + 0, 0}, /* ISO 8859 Latin 1 */ + {LATIN2, "LATIN2", 0, latin22mic, mic2latin2, + 0, 0}, /* ISO 8859 Latin 2 */ + {LATIN3, "LATIN3", 0, latin32mic, mic2latin3, + 0, 0}, /* ISO 8859 Latin 3 */ + {LATIN4, "LATIN4", 0, latin42mic, mic2latin4, + 0, 0}, /* ISO 8859 Latin 4 */ + {LATIN5, "LATIN5", 0, iso2mic, mic2iso, + 0, 0}, /* ISO 8859 Latin 5 */ + {KOI8, "KOI8", 0, koi2mic, mic2koi, + 0, 0}, /* KOI8-R */ + {WIN, "WIN", 0, win2mic, mic2win, + 0,0}, /* CP1251 */ + {ALT, "ALT", 0, alt2mic, mic2alt, + 0,0}, /* CP866 */ + {SJIS, "SJIS", 1, sjis2mic, mic2sjis, + 0, 0}, /* SJIS */ {BIG5, "BIG5", 1, big52mic, mic2big5, 0,0}, /* Big5 */ {WIN1250, "WIN1250", 1, win12502mic, mic2win1250, 0,0}, /* WIN 1250 */ {-1, "", 0, 0, 0, 0} /* end mark */ }; + +#endif /* UNICODE_CONVERSION */ |