aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/utils/mb/conv.c263
-rw-r--r--src/backend/utils/mb/wchar.c70
2 files changed, 296 insertions, 37 deletions
diff --git a/src/backend/utils/mb/conv.c b/src/backend/utils/mb/conv.c
index 9aac4e96a92..e2b6ff0c9dd 100644
--- a/src/backend/utils/mb/conv.c
+++ b/src/backend/utils/mb/conv.c
@@ -2,7 +2,7 @@
* conversion between client encoding and server internal encoding
* (currently mule internal code (mic) is used)
* Tatsuo Ishii
- * $Id: conv.c,v 1.5 1999/02/02 18:51:23 momjian Exp $
+ * $Id: conv.c,v 1.6 1999/03/24 07:02:16 ishii Exp $
*/
#include <stdio.h>
#include <string.h>
@@ -588,6 +588,262 @@ mic2ascii(unsigned char *mic, unsigned char *p, int len)
*p = '\0';
}
+/*
+ * Cyrillic support
+ * currently supported Cyrillic encodings:
+ *
+ * KOI8-R (this is the charset for the mule internal code
+ * for Cyrillic)
+ * ISO-8859-5
+ * Microsoft's CP1251(windows-1251)
+ * Alternativny Variant (MS-DOS CP866)
+ */
+
+/* koi2mic: KOI8-R to Mule internal code */
+static void
+koi2mic(unsigned char *l, unsigned char *p, int len)
+{
+ latin2mic(l, p, len, LC_KOI8_R);
+}
+
+/* mic2koi: Mule internal code to KOI8-R */
+static void
+mic2koi(unsigned char *mic, unsigned char *p, int len)
+{
+ mic2latin(mic, p, len, LC_KOI8_R);
+}
+
+/*
+ * latin2mic_with_table: a generic single byte charset encoding
+ * conversion from a local charset to the mule internal code.
+ * with a encoding conversion table.
+ * the table is ordered according to the local charset,
+ * starting from 128 (0x80). each entry in the table
+ * holds the corresponding code point for the mule internal code.
+ */
+static void
+latin2mic_with_table(
+ unsigned char *l, /* local charset string (source) */
+ unsigned char *p, /* pointer to store mule internal code
+ (destination) */
+ int len, /* length of l */
+ int lc, /* leading character of p */
+ unsigned char *tab /* code conversion table */
+ )
+{
+ unsigned char c1,c2;
+
+ while (len-- > 0 && (c1 = *l++)) {
+ if (c1 < 128) {
+ *p++ = c1;
+ } else {
+ c2 = tab[c1 - 128];
+ if (c2) {
+ *p++ = lc;
+ *p++ = c2;
+ } else {
+ *p++ = ' '; /* cannot convert */
+ }
+ }
+ }
+ *p = '\0';
+}
+
+/*
+ * mic2latin_with_table: a generic single byte charset encoding
+ * conversion from the mule internal code to a local charset
+ * with a encoding conversion table.
+ * the table is ordered according to the second byte of the mule
+ * internal code starting from 128 (0x80).
+ * each entry in the table
+ * holds the corresponding code point for the local code.
+ */
+static void
+mic2latin_with_table(
+ unsigned char *mic, /* mule internal code (source) */
+ unsigned char *p, /* local code (destination) */
+ int len, /* length of p */
+ int lc, /* leading character */
+ unsigned char *tab /* code conversion table */
+ )
+{
+
+ unsigned char c1,c2;
+
+ while (len-- > 0 && (c1 = *mic++)) {
+ if (c1 < 128) {
+ *p++ = c1;
+ } else if (c1 == lc) {
+ c1 = *mic++;
+ len--;
+ c2 = tab[c1 - 128];
+ if (c2) {
+ *p++ = c2;
+ } else {
+ *p++ = ' '; /* cannot convert */
+ }
+ } else {
+ *p++ = ' '; /* bogus character */
+ }
+ }
+ *p = '\0';
+}
+
+/* iso2mic: ISO-8859-5 to Mule internal code */
+static void
+iso2mic(unsigned char *l, unsigned char *p, int len)
+{
+ static char iso2koi[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa,
+ 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0,
+ 0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe,
+ 0xfb, 0xfd, 0xff, 0xf9, 0xf8, 0xfc, 0xe0, 0xf1,
+ 0xc1, 0xc2, 0xd7, 0xc7, 0xc4, 0xc5, 0xd6, 0xda,
+ 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
+ 0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde,
+ 0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ };
+ latin2mic_with_table(l, p, len, LC_KOI8_R, iso2koi);
+}
+
+/* mic2iso: Mule internal code to ISO8859-5 */
+static void
+mic2iso(unsigned char *mic, unsigned char *p, int len)
+{
+ static char koi2iso[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xee, 0xd0, 0xd1, 0xe6, 0xd4, 0xd5, 0xe4, 0xd3,
+ 0xe5, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde,
+ 0xdf, 0xef, 0xe0, 0xe1, 0xe2, 0xe3, 0xd6, 0xd2,
+ 0xec, 0xeb, 0xd7, 0xe8, 0xed, 0xe9, 0xe7, 0xea,
+ 0xce, 0xb0, 0xb1, 0xc6, 0xb4, 0xb5, 0xc4, 0xb3,
+ 0xc5, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe,
+ 0xbf, 0xcf, 0xc0, 0xc1, 0xc2, 0xc3, 0xb6, 0xb2,
+ 0xcc, 0xcb, 0xb7, 0xc8, 0xcd, 0xc9, 0xc7, 0xca
+ };
+
+ mic2latin_with_table(mic, p, len, LC_KOI8_R, koi2iso);
+}
+
+/* win2mic: CP1251 to Mule internal code */
+static void
+win2mic(unsigned char *l, unsigned char *p, int len)
+{
+ static char win2koi[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xbd, 0x00, 0x00,
+ 0xb3, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x00, 0xb7,
+ 0x00, 0x00, 0xb6, 0xa6, 0xad, 0x00, 0x00, 0x00,
+ 0xa3, 0x00, 0xa4, 0x00, 0x00, 0x00, 0x00, 0xa7,
+ 0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa,
+ 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0,
+ 0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe,
+ 0xfb, 0xfd, 0xff, 0xf9, 0xf8, 0xfc, 0xe0, 0xf1,
+ 0xc1, 0xc2, 0xd7, 0xc7, 0xc4, 0xc5, 0xd6, 0xda,
+ 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
+ 0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde,
+ 0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1
+ };
+ latin2mic_with_table(l, p, len, LC_KOI8_R, win2koi);
+}
+
+/* mic2win: Mule internal code to CP1251 */
+static void
+mic2win(unsigned char *mic, unsigned char *p, int len)
+{
+ static char koi2win[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xb8, 0xba, 0x00, 0xb3, 0xbf,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xa8, 0xaa, 0x00, 0xb2, 0xaf,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xa5, 0x00, 0x00,
+ 0xfe, 0xe0, 0xe1, 0xf6, 0xe4, 0xe5, 0xf4, 0xe3,
+ 0xf5, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee,
+ 0xef, 0xff, 0xf0, 0xf1, 0xf2, 0xf3, 0xe6, 0xe2,
+ 0xfc, 0xfb, 0xe7, 0xf8, 0xfd, 0xf9, 0xf7, 0xfa,
+ 0xde, 0xc0, 0xc1, 0xd6, 0xc4, 0xc5, 0xd4, 0xc3,
+ 0xd5, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,
+ 0xcf, 0xdf, 0xd0, 0xd1, 0xd2, 0xd3, 0xc6, 0xc2,
+ 0xdc, 0xdb, 0xc7, 0xd8, 0xdd, 0xd9, 0xd7, 0xda
+ };
+ mic2latin_with_table(mic, p, len, LC_KOI8_R, koi2win);
+}
+
+/* alt2mic: CP866 to Mule internal code */
+static void
+alt2mic(unsigned char *l, unsigned char *p, int len)
+{
+ static char alt2koi[] = {
+ 0xe1, 0xe2, 0xf7, 0xe7, 0xe4, 0xe5, 0xf6, 0xfa,
+ 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0,
+ 0xf2, 0xf3, 0xf4, 0xf5, 0xe6, 0xe8, 0xe3, 0xfe,
+ 0xfb, 0xfd, 0xff, 0xf9, 0xf8, 0xfc, 0xe0, 0xf1,
+ 0xc1, 0xc2, 0xd7, 0xc7, 0xc4, 0xc5, 0xd6, 0xda,
+ 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xbd, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xd2, 0xd3, 0xd4, 0xd5, 0xc6, 0xc8, 0xc3, 0xde,
+ 0xdb, 0xdd, 0xdf, 0xd9, 0xd8, 0xdc, 0xc0, 0xd1,
+ 0xb3, 0xa3, 0xb4, 0xa4, 0xb7, 0xa7, 0x00, 0x00,
+ 0xb6, 0xa6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ };
+ latin2mic_with_table(l, p, len, LC_KOI8_R, alt2koi);
+}
+
+/* mic2alt: Mule internal code to CP866 */
+static void
+mic2alt(unsigned char *mic, unsigned char *p, int len)
+{
+ static char koi2alt[] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xf1, 0xf3, 0x00, 0xf9, 0xf5,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xad, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xf0, 0xf2, 0x00, 0xf8, 0xf4,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xbd, 0x00, 0x00,
+ 0xee, 0xa0, 0xa1, 0xe6, 0xa4, 0xa5, 0xe4, 0xa3,
+ 0xe5, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae,
+ 0xaf, 0xef, 0xe0, 0xe1, 0xe2, 0xe3, 0xa6, 0xa2,
+ 0xec, 0xeb, 0xa7, 0xe8, 0xed, 0xe9, 0xe7, 0xea,
+ 0x9e, 0x80, 0x81, 0x96, 0x84, 0x85, 0x94, 0x83,
+ 0x95, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
+ 0x8f, 0x9f, 0x90, 0x91, 0x92, 0x93, 0x86, 0x82,
+ 0x9c, 0x9b, 0x87, 0x98, 0x9d, 0x99, 0x97, 0x9a
+ };
+ mic2latin_with_table(mic, p, len, LC_KOI8_R, koi2alt);
+}
+
+/*
+ * end of Cyrillic support
+ */
+
pg_encoding_conv_tbl pg_conv_tbl[] = {
{SQL_ASCII, "SQL_ASCII", 0, ascii2mic, mic2ascii}, /* SQL/ACII */
{EUC_JP, "EUC_JP", 0, euc_jp2mic, mic2euc_jp}, /* EUC_JP */
@@ -600,7 +856,10 @@ pg_encoding_conv_tbl pg_conv_tbl[] = {
{LATIN2, "LATIN2", 0, latin22mic, mic2latin2}, /* ISO 8859 Latin 2 */
{LATIN3, "LATIN3", 0, latin32mic, mic2latin3}, /* ISO 8859 Latin 3 */
{LATIN4, "LATIN4", 0, latin42mic, mic2latin4}, /* ISO 8859 Latin 4 */
- {LATIN5, "LATIN5", 0, latin52mic, mic2latin5}, /* ISO 8859 Latin 5 */
+ {LATIN5, "LATIN5", 0, iso2mic, mic2iso}, /* ISO 8859 Latin 5 */
+ {KOI8, "KOI8", 0, koi2mic, mic2koi}, /* KOI8-R */
+ {WIN, "WIN", 0, win2mic, mic2win}, /* CP1251 */
+ {ALT, "ALT", 0, alt2mic, mic2alt}, /* CP866 */
{SJIS, "SJIS", 1, sjis2mic, mic2sjis}, /* SJIS */
{BIG5, "BIG5", 1, big52mic, mic2big5}, /* Big5 */
{-1, "", 0, 0, 0} /* end mark */
diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c
index 2a1141fbadb..78f22c15ebc 100644
--- a/src/backend/utils/mb/wchar.c
+++ b/src/backend/utils/mb/wchar.c
@@ -1,7 +1,7 @@
/*
* conversion functions between pg_wchar and multi-byte streams.
* Tatsuo Ishii
- * $Id: wchar.c,v 1.5 1999/02/02 18:51:23 momjian Exp $
+ * $Id: wchar.c,v 1.6 1999/03/24 07:02:17 ishii Exp $
*/
#include "mb/pg_wchar.h"
@@ -416,40 +416,40 @@ pg_big5_mblen(const unsigned char *s)
}
pg_wchar_tbl pg_wchar_table[] = {
- {pg_ascii2wchar_with_len, pg_ascii_mblen},
- {pg_eucjp2wchar_with_len, pg_eucjp_mblen},
- {pg_euccn2wchar_with_len, pg_euccn_mblen},
- {pg_euckr2wchar_with_len, pg_euckr_mblen},
- {pg_euctw2wchar_with_len, pg_euctw_mblen},
- {pg_utf2wchar_with_len, pg_utf_mblen},
- {pg_mule2wchar_with_len, pg_mule_mblen},
- {pg_latin12wchar_with_len, pg_latin1_mblen},
- {pg_latin12wchar_with_len, pg_latin1_mblen},
- {pg_latin12wchar_with_len, pg_latin1_mblen},
- {pg_latin12wchar_with_len, pg_latin1_mblen},
- {pg_latin12wchar_with_len, pg_latin1_mblen},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, 0},
- {0, pg_sjis_mblen},
- {0, pg_big5_mblen}
+ {pg_ascii2wchar_with_len, pg_ascii_mblen}, /* 0 */
+ {pg_eucjp2wchar_with_len, pg_eucjp_mblen}, /* 1 */
+ {pg_euccn2wchar_with_len, pg_euccn_mblen}, /* 2 */
+ {pg_euckr2wchar_with_len, pg_euckr_mblen}, /* 3 */
+ {pg_euctw2wchar_with_len, pg_euctw_mblen}, /* 4 */
+ {pg_utf2wchar_with_len, pg_utf_mblen}, /* 5 */
+ {pg_mule2wchar_with_len, pg_mule_mblen}, /* 6 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 7 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 8 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 9 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 10 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 11 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 12 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 13 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 14 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 15 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 16 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 17 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 18 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 19 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 20 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 21 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 22 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 23 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 24 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 25 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 26 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 27 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 28 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 29 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 30 */
+ {pg_latin12wchar_with_len, pg_latin1_mblen}, /* 31 */
+ {0, pg_sjis_mblen}, /* 32 */
+ {0, pg_big5_mblen} /* 33 */
};
/* returns the byte length of a word for mule internal code */