diff options
author | Bruce Momjian <bruce@momjian.us> | 1998-06-16 07:29:54 +0000 |
---|---|---|
committer | Bruce Momjian <bruce@momjian.us> | 1998-06-16 07:29:54 +0000 |
commit | cb7cbc16fa4b5933fb5d63052568e3ed6859857b (patch) | |
tree | bed17594c4880549288373de4d400512cbe2f82d /src/backend | |
parent | 0d8e7f6381291b85ad6264365e01143357d70a75 (diff) | |
download | postgresql-cb7cbc16fa4b5933fb5d63052568e3ed6859857b.tar.gz postgresql-cb7cbc16fa4b5933fb5d63052568e3ed6859857b.zip |
Hi, here are the patches to enhance existing MB handling. This time
I have implemented a framework of encoding translation between the
backend and the frontend. Also I have added a new variable setting
command:
SET CLIENT_ENCODING TO 'encoding';
Other features include:
Latin1 support more 8 bit cleaness
See doc/README.mb for more details. Note that the pacthes are
against May 30 snapshot.
Tatsuo Ishii
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/access/common/Makefile | 6 | ||||
-rw-r--r-- | src/backend/access/common/printtup.c | 19 | ||||
-rw-r--r-- | src/backend/commands/Makefile | 10 | ||||
-rw-r--r-- | src/backend/commands/variable.c | 58 | ||||
-rw-r--r-- | src/backend/libpq/Makefile | 6 | ||||
-rw-r--r-- | src/backend/libpq/pqcomm.c | 44 | ||||
-rw-r--r-- | src/backend/parser/scan.c | 30 | ||||
-rw-r--r-- | src/backend/parser/scan.l | 8 | ||||
-rw-r--r-- | src/backend/regex/engine.c | 3 | ||||
-rw-r--r-- | src/backend/regex/regcomp.c | 2 | ||||
-rw-r--r-- | src/backend/regex/utils.c | 388 | ||||
-rw-r--r-- | src/backend/tcop/Makefile | 6 | ||||
-rw-r--r-- | src/backend/tcop/postgres.c | 21 |
13 files changed, 345 insertions, 256 deletions
diff --git a/src/backend/access/common/Makefile b/src/backend/access/common/Makefile index 73b1b3ea82e..76974644fa6 100644 --- a/src/backend/access/common/Makefile +++ b/src/backend/access/common/Makefile @@ -4,7 +4,7 @@ # Makefile for access/common # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/access/common/Makefile,v 1.10 1998/04/06 00:20:44 momjian Exp $ +# $Header: /cvsroot/pgsql/src/backend/access/common/Makefile,v 1.11 1998/06/16 07:29:18 momjian Exp $ # #------------------------------------------------------------------------- @@ -13,6 +13,10 @@ include ../../../Makefile.global CFLAGS+=-I../.. +ifdef MB +CFLAGS+= -DMB=$(MB) +endif + OBJS = heaptuple.o heapvalid.o indextuple.o indexvalid.o printtup.o \ scankey.o tupdesc.o diff --git a/src/backend/access/common/printtup.c b/src/backend/access/common/printtup.c index 8b27415cf68..ccebe243825 100644 --- a/src/backend/access/common/printtup.c +++ b/src/backend/access/common/printtup.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/common/printtup.c,v 1.28 1998/05/14 17:18:12 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/common/printtup.c,v 1.29 1998/06/16 07:29:18 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -23,6 +23,10 @@ #include <libpq/libpq.h> #include <utils/syscache.h> +#ifdef MB +#include <commands/variable.h> +#endif + /* ---------------------------------------------------------------- * printtup / debugtup support * ---------------------------------------------------------------- @@ -80,6 +84,9 @@ printtup(HeapTuple tuple, TupleDesc typeinfo) Datum attr; bool isnull; Oid typoutput; +#ifdef MB + unsigned char *p; +#endif /* ---------------- * tell the frontend to expect new tuple data @@ -125,8 +132,14 @@ printtup(HeapTuple tuple, TupleDesc typeinfo) outputstr = fmgr(typoutput, attr, gettypelem(typeinfo->attrs[i]->atttypid), typeinfo->attrs[i]->atttypmod); +#ifdef MB + p = pg_server_to_client(outputstr, strlen(outputstr)); + pq_putint(strlen(p) + VARHDRSZ, VARHDRSZ); + pq_putnchar(p, strlen(p)); +#else pq_putint(strlen(outputstr) + VARHDRSZ, VARHDRSZ); pq_putnchar(outputstr, strlen(outputstr)); +#endif pfree(outputstr); } } @@ -268,8 +281,12 @@ printtup_internal(HeapTuple tuple, TupleDesc typeinfo) /* variable length, assume a varlena structure */ len = VARSIZE(attr) - VARHDRSZ; +#ifdef MB + pq_putncharlen(VARDATA(attr), len); +#else pq_putint(len, VARHDRSZ); pq_putnchar(VARDATA(attr), len); +#endif #ifdef IPORTAL_DEBUG { char *d = VARDATA(attr); diff --git a/src/backend/commands/Makefile b/src/backend/commands/Makefile index 7e4fe415c3b..fc2b7199ab0 100644 --- a/src/backend/commands/Makefile +++ b/src/backend/commands/Makefile @@ -4,7 +4,7 @@ # Makefile for commands # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/commands/Makefile,v 1.12 1998/04/06 00:22:19 momjian Exp $ +# $Header: /cvsroot/pgsql/src/backend/commands/Makefile,v 1.13 1998/06/16 07:29:20 momjian Exp $ # #------------------------------------------------------------------------- @@ -13,11 +13,19 @@ include ../../Makefile.global CFLAGS += -I.. +ifdef MB +CFLAGS += -DMB=$(MB) +endif + OBJS = async.o creatinh.o command.o copy.o defind.o define.o \ remove.o rename.o vacuum.o version.o view.o cluster.o \ recipe.o explain.o sequence.o trigger.o user.o proclang.o \ dbcommands.o variable.o +ifdef MB +OBJS += mbutils.o +endif + all: SUBSYS.o SUBSYS.o: $(OBJS) diff --git a/src/backend/commands/variable.c b/src/backend/commands/variable.c index 9e138b82a91..51f9d871bdf 100644 --- a/src/backend/commands/variable.c +++ b/src/backend/commands/variable.c @@ -2,7 +2,7 @@ * Routines for handling of 'SET var TO', * 'SHOW var' and 'RESET var' statements. * - * $Id: variable.c,v 1.6 1998/06/15 19:28:17 momjian Exp $ + * $Id: variable.c,v 1.7 1998/06/16 07:29:21 momjian Exp $ * */ @@ -15,6 +15,9 @@ #include "commands/variable.h" #include "utils/builtins.h" #include "optimizer/internal.h" +#ifdef MB +#include "regex/pg_wchar.h" +#endif extern Cost _cpu_page_wight_; extern Cost _cpu_index_page_wight_; @@ -519,6 +522,54 @@ reset_timezone() return TRUE; } /* reset_timezone() */ +#ifdef MB +/*-----------------------------------------------------------------------*/ +bool +parse_client_encoding(const char *value) +{ + int encoding; + + encoding = pg_valid_client_encoding(value); + if (encoding < 0) { + elog(ERROR, "Client encoding %s is not supported", value); + } else { + if (pg_set_client_encoding(encoding)) { + elog(ERROR, "Conversion between %s and %s is not supported", + value, pg_encoding_to_char(MB)); + } + } + return TRUE; +} + +bool +show_client_encoding() +{ + elog(NOTICE, "Current client encoding is %s", + pg_encoding_to_char(pg_get_client_encoding())); + return TRUE; +} + +bool +reset_client_encoding() +{ + int encoding; + char *env = getenv("PGCLIENTENCODING"); + + if (env) { + encoding = pg_char_to_encoding(env); + if (encoding < 0) { + encoding = MB; + } + } else { + encoding = MB; + } + pg_set_client_encoding(encoding); + return TRUE; +} + +/*-----------------------------------------------------------------------*/ +#endif + /*-----------------------------------------------------------------------*/ struct VariableParsers { @@ -547,6 +598,11 @@ struct VariableParsers { "r_plans", parse_r_plans, show_r_plans, reset_r_plans }, +#ifdef MB + { + "client_encoding", parse_client_encoding, show_client_encoding, reset_client_encoding + }, +#endif { NULL, NULL, NULL, NULL } diff --git a/src/backend/libpq/Makefile b/src/backend/libpq/Makefile index a608044facf..5176f2996a8 100644 --- a/src/backend/libpq/Makefile +++ b/src/backend/libpq/Makefile @@ -4,7 +4,7 @@ # Makefile for libpq subsystem (backend half of libpq interface) # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/libpq/Makefile,v 1.11 1998/04/06 00:22:39 momjian Exp $ +# $Header: /cvsroot/pgsql/src/backend/libpq/Makefile,v 1.12 1998/06/16 07:29:22 momjian Exp $ # #------------------------------------------------------------------------- @@ -19,6 +19,10 @@ CFLAGS+= $(KRBFLAGS) LDFLAGS+= $(KRBLIBS) endif +ifdef MB +CFLAGS+= -DMB=$(MB) +endif + OBJS = be-dumpdata.o be-fsstubs.o be-pqexec.o pqcomprim.o\ auth.o hba.o crypt.o pqcomm.o portal.o util.o portalbuf.o pqpacket.o pqsignal.o \ password.o diff --git a/src/backend/libpq/pqcomm.c b/src/backend/libpq/pqcomm.c index a3ff7bcffd1..9b699a22e4b 100644 --- a/src/backend/libpq/pqcomm.c +++ b/src/backend/libpq/pqcomm.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/libpq/pqcomm.c,v 1.44 1998/06/15 19:28:27 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/libpq/pqcomm.c,v 1.45 1998/06/16 07:29:23 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -23,6 +23,9 @@ * pq_putstr - send a null terminated string to connection * pq_putnchar - send n characters to connection * pq_putint - send an integer to connection + * pq_putncharlen - send n characters to connection + * (also send an int header indicating + * the length) * pq_getinaddr - initialize address from host and port number * pq_getinserv - initialize address from host and service name * pq_connect - create remote input / output connection @@ -66,6 +69,9 @@ #include "libpq/auth.h" #include "libpq/libpq.h" /* where the declarations go */ #include "storage/ipc.h" +#ifdef MB +#include "commands/variable.h" +#endif /* ---------------- * declarations @@ -180,6 +186,14 @@ pq_getstr(char *s, int maxlen) { int c = '\0'; +#ifdef MB + unsigned char *p, *ps; + int len; + + ps = s; + len = maxlen; +#endif + if (Pfin == (FILE *) NULL) { /* elog(DEBUG, "Input descriptor is null"); */ @@ -190,6 +204,13 @@ pq_getstr(char *s, int maxlen) *s++ = c; *s = '\0'; +#ifdef MB + p = pg_client_to_server(ps, len); + if (ps != p) { /* actual conversion has been done? */ + strcpy(ps, p); + } +#endif + /* ----------------- * If EOF reached let caller know. * (This will only happen if we hit EOF before the string @@ -325,7 +346,14 @@ pq_getint(int b) void pq_putstr(char *s) { +#ifdef MB + unsigned char *p; + + p = pg_server_to_client(s, strlen(s)); + if (pqPutString(p, Pfout)) +#else if (pqPutString(s, Pfout)) +#endif { sprintf(PQerrormsg, "FATAL: pq_putstr: fputs() failed: errno=%d\n", errno); @@ -788,3 +816,17 @@ StreamOpen(char *hostName, short portName, Port *port) return (STATUS_OK); } + +#ifdef MB +void +pq_putncharlen(char *s, int n) +{ + unsigned char *p; + int len; + + p = pg_server_to_client(s, n); + len = strlen(p); + pq_putint(len, sizeof(int)); + pq_putnchar(p, len); +} +#endif diff --git a/src/backend/parser/scan.c b/src/backend/parser/scan.c index dd0304b97f0..83ae575e894 100644 --- a/src/backend/parser/scan.c +++ b/src/backend/parser/scan.c @@ -1,7 +1,7 @@ /* A lexical scanner generated by flex */ /* Scanner skeleton version: - * $Header: /cvsroot/pgsql/src/backend/parser/Attic/scan.c,v 1.21 1998/06/15 19:28:56 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/Attic/scan.c,v 1.22 1998/06/16 07:29:25 momjian Exp $ */ #define FLEX_SCANNER @@ -555,7 +555,7 @@ char *yytext; * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/Attic/scan.c,v 1.21 1998/06/15 19:28:56 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/Attic/scan.c,v 1.22 1998/06/16 07:29:25 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -1178,7 +1178,8 @@ YY_RULE_SETUP BEGIN(xm); for(i = 0; yytext[i]; i++) - if (isupper(yytext[i])) + if (isascii((unsigned char)yytext[i]) && + isupper(yytext[i])) yytext[i] = tolower(yytext[i]); keyword = ScanKeywordLookup((char*)yytext); @@ -1194,7 +1195,7 @@ YY_RULE_SETUP YY_BREAK case 34: YY_RULE_SETUP -#line 336 "scan.l" +#line 337 "scan.l" { char* endptr; @@ -1216,7 +1217,7 @@ YY_RULE_SETUP YY_BREAK case 35: YY_RULE_SETUP -#line 354 "scan.l" +#line 355 "scan.l" { char* endptr; @@ -1231,7 +1232,7 @@ YY_RULE_SETUP YY_BREAK case 36: YY_RULE_SETUP -#line 365 "scan.l" +#line 366 "scan.l" { char* endptr; @@ -1252,7 +1253,7 @@ YY_RULE_SETUP YY_BREAK case 37: YY_RULE_SETUP -#line 382 "scan.l" +#line 383 "scan.l" { char* endptr; @@ -1266,13 +1267,14 @@ YY_RULE_SETUP YY_BREAK case 38: YY_RULE_SETUP -#line 394 "scan.l" +#line 395 "scan.l" { int i; ScanKeyword *keyword; for(i = 0; yytext[i]; i++) - if (isupper(yytext[i])) + if (isascii((unsigned char)yytext[i]) && + isupper(yytext[i])) yytext[i] = tolower(yytext[i]); keyword = ScanKeywordLookup((char*)yytext); @@ -1288,20 +1290,20 @@ YY_RULE_SETUP YY_BREAK case 39: YY_RULE_SETUP -#line 412 "scan.l" +#line 414 "scan.l" { /* ignore */ } YY_BREAK case 40: YY_RULE_SETUP -#line 414 "scan.l" +#line 416 "scan.l" { return (yytext[0]); } YY_BREAK case 41: YY_RULE_SETUP -#line 416 "scan.l" +#line 418 "scan.l" ECHO; YY_BREAK -#line 1305 "lex.yy.c" +#line 1307 "lex.yy.c" case YY_STATE_EOF(INITIAL): case YY_STATE_EOF(xb): case YY_STATE_EOF(xc): @@ -2187,7 +2189,7 @@ int main() return 0; } #endif -#line 416 "scan.l" +#line 418 "scan.l" void yyerror(char message[]) diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 6fe3af8369c..bf18df010c2 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.39 1998/05/09 23:15:20 thomas Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/scan.l,v 1.40 1998/06/16 07:29:27 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -320,7 +320,8 @@ other . BEGIN(xm); for(i = 0; yytext[i]; i++) - if (isupper(yytext[i])) + if (isascii((unsigned char)yytext[i]) && + isupper(yytext[i])) yytext[i] = tolower(yytext[i]); keyword = ScanKeywordLookup((char*)yytext); @@ -396,7 +397,8 @@ other . ScanKeyword *keyword; for(i = 0; yytext[i]; i++) - if (isupper(yytext[i])) + if (isascii((unsigned char)yytext[i]) && + isupper(yytext[i])) yytext[i] = tolower(yytext[i]); keyword = ScanKeywordLookup((char*)yytext); diff --git a/src/backend/regex/engine.c b/src/backend/regex/engine.c index 6381d5990c9..b95b6491eb6 100644 --- a/src/backend/regex/engine.c +++ b/src/backend/regex/engine.c @@ -127,6 +127,9 @@ extern "C" # elif MB == UNICODE # define NONCHAR(c) ((c) > USHRT_MAX) # define NNONCHAR (CODEMAX-USHRT_MAX) +# else /* assume 1 byte code such as ISO8859-1 */ +# define NONCHAR(c) ((c) > UCHAR_MAX) +# define NNONCHAR (CODEMAX-UCHAR_MAX) # endif #else # define NONCHAR(c) ((c) > CHAR_MAX) diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 01b95427c62..4eb71eb525e 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -1344,7 +1344,7 @@ cset *cs; for (i = 0; i < css; i++) if (CHIN(cs, i)) - return ((char) i); + return (i); assert(never); return (0); /* arbitrary */ } diff --git a/src/backend/regex/utils.c b/src/backend/regex/utils.c index 67b9f2a737a..0308140118b 100644 --- a/src/backend/regex/utils.c +++ b/src/backend/regex/utils.c @@ -1,202 +1,97 @@ /* * misc conversion functions between pg_wchar and other encodings. * Tatsuo Ishii - * $Id: utils.c,v 1.2 1998/04/27 17:07:53 scrappy Exp $ + * $Id: utils.c,v 1.3 1998/06/16 07:29:29 momjian Exp $ */ #include <regex/pg_wchar.h> + /* - * convert EUC to pg_wchar (EUC process code) - * caller should allocate enough space for "to" + * conversion to pg_wchar is done by "table driven." + * to add an encoding support, define mb2wchar_with_len(), mblen() + * for the particular encoding. Note that if the encoding is only + * supported in the client, you don't need to define + * mb2wchar_with_len() function (SJIS is the case). */ -static void pg_euc2wchar(const unsigned char *from, pg_wchar *to) -{ - while (*from) { - if (*from == SS2) { - from++; - *to = *from++; - } else if (*from == SS3) { - from++; - *to = *from++ << 8; - *to |= 0x3f & *from++; - } else if (*from & 0x80) { - *to = *from++ << 8; - *to |= *from++; - } else { - *to = *from++; - } - to++; - } - *to = 0; -} - -static void pg_eucjp2wchar(const unsigned char *from, pg_wchar *to) -{ - pg_euc2wchar(from,to); -} - -static void pg_euckr2wchar(const unsigned char *from, pg_wchar *to) -{ - pg_euc2wchar(from,to); -} +typedef struct { + void (*mb2wchar_with_len)(); /* convert a multi-byte string to a wchar */ + int (*mblen)(); /* returns the length of a multi-byte word */ +} pg_wchar_tbl; -static void pg_eucch2wchar(const unsigned char *from, pg_wchar *to) +static void pg_euc2wchar_with_len +(const unsigned char *from, pg_wchar *to, int len) { - while (*from) { + while (*from && len > 0) { if (*from == SS2) { from++; - *to = 0x3f00 & (*from++ << 8); - *to = *from++; + len--; + *to = 0xff & *from++; + len--; } else if (*from == SS3) { from++; *to = *from++ << 8; *to |= 0x3f & *from++; + len -= 3; } else if (*from & 0x80) { *to = *from++ << 8; *to |= *from++; + len -= 2; } else { *to = *from++; + len--; } to++; } *to = 0; } -static void pg_euccn2wchar(const unsigned char *from, pg_wchar *to) +static int pg_euc_mblen(const unsigned char *s) { - while (*from) { - if (*from == SS2) { - from++; - *to = *from++ << 16; - *to |= *from++ << 8; - *to |= *from++; - } else if (*from == SS3) { - from++; - *to = *from++ << 8; - *to |= 0x3f & *from++; - } else if (*from & 0x80) { - *to = *from++ << 8; - *to |= *from++; - } else { - *to = *from++; - } - to++; + int len; + + if (*s == SS2) { + len = 2; + } else if (*s == SS3) { + len = 3; + } else if (*s & 0x80) { + len = 2; + } else { + len = 1; } - *to = 0; + return(len); } /* - * convert UTF-8 to pg_wchar (UCS-2) - * caller should allocate enough space for "to" + * EUC_JP */ -static void pg_utf2wchar(const unsigned char *from, pg_wchar *to) +static void pg_eucjp2wchar_with_len +(const unsigned char *from, pg_wchar *to, int len) { - unsigned char c1,c2,c3; - while (*from) { - if ((*from & 0x80) == 0) { - *to = *from++; - } else if ((*from & 0xe0) == 0xc0) { - c1 = *from++ & 0x1f; - c2 = *from++ & 0x3f; - *to = c1 << 6; - *to |= c2; - } else if ((*from & 0xe0) == 0xe0) { - c1 = *from++ & 0x0f; - c2 = *from++ & 0x3f; - c3 = *from++ & 0x3f; - *to = c1 << 12; - *to |= c2 << 6; - *to |= c3; - } - to++; - } - *to = 0; + pg_euc2wchar_with_len(from,to,len); } -/* - * convert mule internal code to pg_wchar. - * in this case pg_wchar consists of following 4 bytes: - * - * 0x00(unused) - * 0x00(ASCII)|leading character (one of LC1, LC12, LC2 or LC22) - * 0x00(ASCII,1 byte code)|other than 0x00(2 byte code) - * the lowest byte of the code - * - * note that Type N (variable length byte encoding) cannot be represented by - * this schema. sorry. - * caller should allocate enough space for "to" - */ -static void pg_mule2wchar(const unsigned char *from, pg_wchar *to) +static int pg_eucjp_mblen(const unsigned char *s) { - while (*from) { - if (IS_LC1(*from)) { - *to = *from++ << 16; - *to |= *from++; - } else if (IS_LCPRV1(*from)) { - from++; - *to = *from++ << 16; - *to |= *from++; - } else if (IS_LC2(*from)) { - *to = *from++ << 16; - *to |= *from++ << 8; - *to |= *from++; - } else if (IS_LCPRV2(*from)) { - from++; - *to = *from++ << 16; - *to |= *from++ << 8; - *to |= *from++; - } else { /* assume ASCII */ - *to = *from++; - } - to++; - } - *to = 0; + return(pg_euc_mblen(s)); } /* - * convert EUC to pg_wchar (EUC process code) - * caller should allocate enough space for "to" - * len: length of from. - * "from" not necessarily null terminated. + * EUC_KR */ -static void pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) -{ - while (*from && len > 0) { - if (*from == SS2) { - from++; - len--; - *to = 0xff & *from++; - len--; - } else if (*from == SS3) { - from++; - *to = *from++ << 8; - *to |= 0x3f & *from++; - len -= 3; - } else if (*from & 0x80) { - *to = *from++ << 8; - *to |= *from++; - len -= 2; - } else { - *to = *from++; - len--; - } - to++; - } - *to = 0; -} - -static void pg_eucjp2wchar_with_len +static void pg_euckr2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) { pg_euc2wchar_with_len(from,to,len); } -static void pg_euckr2wchar_with_len -(const unsigned char *from, pg_wchar *to, int len) +static int pg_euckr_mblen(const unsigned char *s) { - pg_euc2wchar_with_len(from,to,len); + return(pg_euc_mblen(s)); } -static void pg_eucch2wchar_with_len +/* + * EUC_CN + */ +static void pg_euccn2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) { while (*from && len > 0) { @@ -224,7 +119,26 @@ static void pg_eucch2wchar_with_len *to = 0; } -static void pg_euccn2wchar_with_len +static int pg_euccn_mblen(const unsigned char *s) +{ + int len; + + if (*s == SS2) { + len = 3; + } else if (*s == SS3) { + len = 3; + } else if (*s & 0x80) { + len = 2; + } else { + len = 1; + } + return(len); +} + +/* + * EUC_TW + */ +static void pg_euctw2wchar_with_len (const unsigned char *from, pg_wchar *to, int len) { while (*from && len > 0) { @@ -253,6 +167,22 @@ static void pg_euccn2wchar_with_len *to = 0; } +static int pg_euctw_mblen(const unsigned char *s) +{ + int len; + + if (*s == SS2) { + len = 4; + } else if (*s == SS3) { + len = 3; + } else if (*s & 0x80) { + len = 2; + } else { + len = 1; + } + return(len); +} + /* * convert UTF-8 to pg_wchar (UCS-2) * caller should allocate enough space for "to" @@ -286,6 +216,20 @@ static void pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int l *to = 0; } +static int pg_utf_mblen(const unsigned char *s) +{ + int len = 1; + + if ((*s & 0x80) == 0) { + len = 1; + } else if ((*s & 0xe0) == 0xc0) { + len = 2; + } else if ((*s & 0xe0) == 0xe0) { + len = 3; + } + return(len); +} + /* * convert mule internal code to pg_wchar * caller should allocate enough space for "to" @@ -324,115 +268,89 @@ static void pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int *to = 0; } -static int pg_euc_mblen(const unsigned char *s) +static int pg_mule_mblen(const unsigned char *s) { int len; - if (*s == SS2) { - len = 2; - } else if (*s == SS3) { - len = 3; - } else if (*s & 0x80) { + if (IS_LC1(*s)) { len = 2; - } else { - len = 1; - } - return(len); -} - -static int pg_eucjp_mblen(const unsigned char *s) -{ - return(pg_euc_mblen(s)); -} - -static int pg_euckr_mblen(const unsigned char *s) -{ - return(pg_euc_mblen(s)); -} - -static int pg_eucch_mblen(const unsigned char *s) -{ - int len; - - if (*s == SS2) { + } else if (IS_LCPRV1(*s)) { len = 3; - } else if (*s == SS3) { + } else if (IS_LC2(*s)) { len = 3; - } else if (*s & 0x80) { - len = 2; - } else { + } else if (IS_LCPRV2(*s)) { + len = 4; + } else { /* assume ASCII */ len = 1; } return(len); } -static int pg_euccn_mblen(const unsigned char *s) +/* + * ISO8859-1 + */ +static void pg_latin12wchar_with_len(const unsigned char *from, pg_wchar *to, int len) { - int len; - - if (*s == SS2) { - len = 4; - } else if (*s == SS3) { - len = 3; - } else if (*s & 0x80) { - len = 2; - } else { - len = 1; + while (*from && len-- > 0) { + *to++ = *from++; } - return(len); + *to = 0; } -static int pg_utf_mblen(const unsigned char *s) +static int pg_latin1_mblen(const unsigned char *s) { - int len = 1; - - if ((*s & 0x80) == 0) { - len = 1; - } else if ((*s & 0xe0) == 0xc0) { - len = 2; - } else if ((*s & 0xe0) == 0xe0) { - len = 3; - } - return(len); + return(1); } -static int pg_mule_mblen(const unsigned char *s) +/* + * SJIS + */ +static int pg_sjis_mblen(const unsigned char *s) { int len; - if (IS_LC1(*s)) { + if (*s >= 0xa1 && *s <= 0xdf) { /* 1 byte kana? */ + len = 1; + } else if (*s > 0x7f) { /* kanji? */ len = 2; - } else if (IS_LCPRV1(*s)) { - len = 3; - } else if (IS_LC2(*s)) { - len = 3; - } else if (IS_LCPRV2(*s)) { - len = 4; - } else { /* assume ASCII */ + } else { /* should be ASCII */ len = 1; } return(len); } -typedef struct { - void (*mb2wchar)(); /* convert a multi-byte string to a wchar */ - void (*mb2wchar_with_len)(); /* convert a multi-byte string to a wchar - with a limited length */ - int (*mblen)(); /* returns the length of a multi-byte word */ -} pg_wchar_tbl; - static pg_wchar_tbl pg_wchar_table[] = { - {pg_eucjp2wchar, pg_eucjp2wchar_with_len, pg_eucjp_mblen}, - {pg_eucch2wchar, pg_eucch2wchar_with_len, pg_eucch_mblen}, - {pg_euckr2wchar, pg_euckr2wchar_with_len, pg_euckr_mblen}, - {pg_euccn2wchar, pg_euccn2wchar_with_len, pg_euccn_mblen}, - {pg_utf2wchar, pg_utf2wchar_with_len, pg_utf_mblen}, - {pg_mule2wchar, pg_mule2wchar_with_len, pg_mule_mblen}}; + {pg_eucjp2wchar_with_len, pg_eucjp_mblen}, + {pg_euccn2wchar_with_len, pg_euccn_mblen}, + {pg_euckr2wchar_with_len, pg_euckr_mblen}, + {pg_euctw2wchar_with_len, pg_euctw_mblen}, + {pg_utf2wchar_with_len, pg_utf_mblen}, + {pg_mule2wchar_with_len, pg_mule_mblen}, + {pg_latin12wchar_with_len, pg_latin1_mblen}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, pg_sjis_mblen} +}; + +/* + *######################################################################## + * + * Public functions + * + *######################################################################## + */ /* convert a multi-byte string to a wchar */ void pg_mb2wchar(const unsigned char *from, pg_wchar *to) { - (*pg_wchar_table[MB].mb2wchar)(from,to); + (*pg_wchar_table[MB].mb2wchar_with_len)(from,to,strlen(from)); } /* convert a multi-byte string to a wchar with a limited length */ @@ -447,6 +365,18 @@ int pg_mblen(const unsigned char *mbstr) return((*pg_wchar_table[MB].mblen)(mbstr)); } +/* returns the byte length of a multi-byte word for an encoding */ +int pg_encoding_mblen(int encoding, const unsigned char *mbstr) +{ + return((*pg_wchar_table[encoding].mblen)(mbstr)); +} + +/* returns the byte length of a word for mule internal code */ +int pg_mic_mblen(const unsigned char *mbstr) +{ + return(pg_mule_mblen(mbstr)); +} + /* returns the length (counted as a wchar) of a multi-byte string */ int pg_mbstrlen(const unsigned char *mbstr) { diff --git a/src/backend/tcop/Makefile b/src/backend/tcop/Makefile index 45cddf43aaa..e8e88d84392 100644 --- a/src/backend/tcop/Makefile +++ b/src/backend/tcop/Makefile @@ -4,7 +4,7 @@ # Makefile for tcop # # IDENTIFICATION -# $Header: /cvsroot/pgsql/src/backend/tcop/Makefile,v 1.16 1998/04/06 00:26:05 momjian Exp $ +# $Header: /cvsroot/pgsql/src/backend/tcop/Makefile,v 1.17 1998/06/16 07:29:30 momjian Exp $ # #------------------------------------------------------------------------- @@ -13,6 +13,10 @@ include ../../Makefile.global CFLAGS+= -I.. +ifdef MB +CFLAGS+= -DMB=$(MB) +endif + ifeq ($(CC), gcc) CFLAGS+= -Wno-error endif diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 9670f326404..2c457bf05e0 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/tcop/postgres.c,v 1.76 1998/06/15 19:29:27 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/tcop/postgres.c,v 1.77 1998/06/16 07:29:30 momjian Exp $ * * NOTES * this is the "main" module of the postgres backend and @@ -83,6 +83,10 @@ #include "nodes/memnodes.h" #endif +#ifdef MB +#include "commands/variable.h" +#endif + /* ---------------- * global variables * ---------------- @@ -1270,6 +1274,19 @@ PostgresMain(int argc, char *argv[], int real_argc, char *real_argv[]) InitPostgres(DBName); +#ifdef MB + /* set default client encoding */ + if (!Quiet) + { + puts("\treset_client_encoding().."); + } + reset_client_encoding(); + if (!Quiet) + { + puts("\treset_client_encoding() done."); + } +#endif + /* ---------------- * if an exception is encountered, processing resumes here * so we abort the current transaction and start a new one. @@ -1308,7 +1325,7 @@ PostgresMain(int argc, char *argv[], int real_argc, char *real_argv[]) if (!IsUnderPostmaster) { puts("\nPOSTGRES backend interactive interface"); - puts("$Revision: 1.76 $ $Date: 1998/06/15 19:29:27 $"); + puts("$Revision: 1.77 $ $Date: 1998/06/16 07:29:30 $"); } /* ---------------- |