aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/mb/mbutils.c
diff options
context:
space:
mode:
authorMarc G. Fournier <scrappy@hub.org>1998-07-24 03:32:46 +0000
committerMarc G. Fournier <scrappy@hub.org>1998-07-24 03:32:46 +0000
commitbf00bbb0c4940b80b46b7e5b379cd64184f2262f (patch)
treebf32bf3bafe6f367ee97249c83afb4c9e9a637af /src/backend/utils/mb/mbutils.c
parent6e66468f3a160878111578a93be2852635eb4f4d (diff)
downloadpostgresql-bf00bbb0c4940b80b46b7e5b379cd64184f2262f.tar.gz
postgresql-bf00bbb0c4940b80b46b7e5b379cd64184f2262f.zip
I really hope that I haven't missed anything in this one...
From: t-ishii@sra.co.jp Attached are patches to enhance the multi-byte support. (patches are against 7/18 snapshot) * determine encoding at initdb/createdb rather than compile time Now initdb/createdb has an option to specify the encoding. Also, I modified the syntax of CREATE DATABASE to accept encoding option. See README.mb for more details. For this purpose I have added new column "encoding" to pg_database. Also pg_attribute and pg_class are changed to catch up the modification to pg_database. Actually I haved added pg_database_mb.h, pg_attribute_mb.h and pg_class_mb.h. These are used only when MB is enabled. The reason having separate files is I couldn't find a way to use ifdef or whatever in those files. I have to admit it looks ugly. No way. * support for PGCLIENTENCODING when issuing COPY command commands/copy.c modified. * support for SQL92 syntax "SET NAMES" See gram.y. * support for LATIN2-5 * add UNICODE regression test case * new test suite for MB New directory test/mb added. * clean up source files Basic idea is to have MB's own subdirectory for easier maintenance. These are include/mb and backend/utils/mb.
Diffstat (limited to 'src/backend/utils/mb/mbutils.c')
-rw-r--r--src/backend/utils/mb/mbutils.c216
1 files changed, 216 insertions, 0 deletions
diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c
new file mode 100644
index 00000000000..eb6cbc5262d
--- /dev/null
+++ b/src/backend/utils/mb/mbutils.c
@@ -0,0 +1,216 @@
+/*
+ * This file contains public functions for conversion between
+ * client encoding and server internal encoding.
+ * (currently mule internal code (mic) is used)
+ * Tatsuo Ishii
+ * $Id: mbutils.c,v 1.1 1998/07/24 03:31:56 scrappy Exp $ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "mb/pg_wchar.h"
+
+static client_encoding = -1;
+static void (*client_to_mic)(); /* something to MIC */
+static void (*client_from_mic)(); /* MIC to something */
+static void (*server_to_mic)(); /* something to MIC */
+static void (*server_from_mic)(); /* MIC to something */
+
+/*
+ * find encoding table entry by encoding
+ */
+static pg_encoding_conv_tbl *get_enc_ent(int encoding)
+{
+ pg_encoding_conv_tbl *p = pg_conv_tbl;
+ for(;p->encoding >= 0;p++) {
+ if (p->encoding == encoding) {
+ return(p);
+ }
+ }
+ return(0);
+}
+
+/*
+ * set the client encoding. if client/server encoding is
+ * not supported, returns -1
+ */
+int pg_set_client_encoding(int encoding)
+{
+ int current_server_encoding = GetDatabaseEncoding();
+
+ client_encoding = encoding;
+
+ if (client_encoding == current_server_encoding) { /* server == client? */
+ client_to_mic = client_from_mic = 0;
+ server_to_mic = server_from_mic = 0;
+ } else if (current_server_encoding == MULE_INTERNAL) { /* server == MULE_INETRNAL? */
+ client_to_mic = get_enc_ent(encoding)->to_mic;
+ client_from_mic = get_enc_ent(encoding)->from_mic;
+ server_to_mic = server_from_mic = 0;
+ if (client_to_mic == 0 || client_from_mic == 0) {
+ return(-1);
+ }
+ } else if (encoding == MULE_INTERNAL) { /* client == MULE_INETRNAL? */
+ client_to_mic = client_from_mic = 0;
+ server_to_mic = get_enc_ent(current_server_encoding)->to_mic;
+ server_from_mic = get_enc_ent(current_server_encoding)->from_mic;
+ if (server_to_mic == 0 || server_from_mic == 0) {
+ return(-1);
+ }
+ } else {
+ client_to_mic = get_enc_ent(encoding)->to_mic;
+ client_from_mic = get_enc_ent(encoding)->from_mic;
+ server_to_mic = get_enc_ent(current_server_encoding)->to_mic;
+ server_from_mic = get_enc_ent(current_server_encoding)->from_mic;
+ if (client_to_mic == 0 || client_from_mic == 0) {
+ return(-1);
+ }
+ if (server_to_mic == 0 || server_from_mic == 0) {
+ return(-1);
+ }
+ }
+ return(0);
+}
+
+/*
+ * returns the current client encoding
+ */
+int pg_get_client_encoding()
+{
+ if (client_encoding == -1) {
+ /* this is the first time */
+ client_encoding = GetDatabaseEncoding();
+ }
+ return(client_encoding);
+}
+
+/*
+ * convert client encoding to server encoding. if server_encoding ==
+ * client_encoding or no conversion function exists,
+ * returns s. So be careful.
+ */
+unsigned char *pg_client_to_server(unsigned char *s, int len)
+{
+ static unsigned char b1[MAX_PARSE_BUFFER*4]; /* is this enough? */
+ static unsigned char b2[MAX_PARSE_BUFFER*4]; /* is this enough? */
+ unsigned char *p = s;
+
+ if (client_encoding == GetDatabaseEncoding()) {
+ return(p);
+ }
+ if (client_to_mic) {
+ (*client_to_mic)(s, b1, len);
+ len = strlen(b1);
+ p = b1;
+ }
+ if (server_from_mic) {
+ (*server_from_mic)(p, b2, len);
+ p = b2;
+ }
+ return(p);
+}
+
+/*
+ * convert server encoding to client encoding. if server_encoding ==
+ * client_encoding or no conversion function exists,
+ * returns s. So be careful.
+ */
+unsigned char *pg_server_to_client(unsigned char *s, int len)
+{
+ static unsigned char b1[MAX_PARSE_BUFFER*4]; /* is this enough? */
+ static unsigned char b2[MAX_PARSE_BUFFER*4]; /* is this enough? */
+ unsigned char *p = s;
+
+ if (client_encoding == GetDatabaseEncoding()) {
+ return(p);
+ }
+ if (server_to_mic) {
+ (*server_to_mic)(s, b1, len);
+ len = strlen(b1);
+ p = b1;
+ }
+ if (client_from_mic) {
+ (*client_from_mic)(p, b2, len);
+ p = b2;
+ }
+ return(p);
+}
+
+/* convert a multi-byte string to a wchar */
+void pg_mb2wchar(const unsigned char *from, pg_wchar *to)
+{
+ (*pg_wchar_table[GetDatabaseEncoding()].mb2wchar_with_len)(from,to,strlen(from));
+}
+
+/* convert a multi-byte string to a wchar with a limited length */
+void pg_mb2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
+{
+ (*pg_wchar_table[GetDatabaseEncoding()].mb2wchar_with_len)(from,to,len);
+}
+
+/* returns the byte length of a multi-byte word */
+int pg_mblen(const unsigned char *mbstr)
+{
+ return((*pg_wchar_table[GetDatabaseEncoding()].mblen)(mbstr));
+}
+
+/* returns the length (counted as a wchar) of a multi-byte string */
+int pg_mbstrlen(const unsigned char *mbstr)
+{
+ int len = 0;
+ while (*mbstr) {
+ mbstr += pg_mblen(mbstr);
+ len++;
+ }
+ return(len);
+}
+
+/* returns the length (counted as a wchar) of a multi-byte string
+ (not necessarily NULL terminated) */
+int pg_mbstrlen_with_len(const unsigned char *mbstr, int limit)
+{
+ int len = 0;
+ int l;
+ while (*mbstr && limit > 0) {
+ l = pg_mblen(mbstr);
+ limit -= l;
+ mbstr += l;
+ len++;
+ }
+ return(len);
+}
+
+/*
+ * fuctions for utils/init
+ */
+static int DatabaseEncoding = MB;
+void
+SetDatabaseEncoding(int encoding)
+{
+ DatabaseEncoding = encoding;
+}
+
+int
+GetDatabaseEncoding()
+{
+ return(DatabaseEncoding);
+}
+
+/* for builtin-function */
+const char *
+getdatabaseencoding()
+{
+ return(pg_encoding_to_char(DatabaseEncoding));
+}
+
+/* set and get template1 database encoding */
+static int templateEncoding;
+void SetTemplateEncoding(int encoding)
+{
+ templateEncoding = encoding;
+}
+
+int GetTemplateEncoding()
+{
+ return(templateEncoding);
+}