aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/regex/utils.c144
-rw-r--r--src/backend/utils/adt/oracle_compat.c12
-rw-r--r--src/backend/utils/adt/varchar.c54
-rw-r--r--src/backend/utils/adt/varlena.c86
4 files changed, 277 insertions, 19 deletions
diff --git a/src/backend/regex/utils.c b/src/backend/regex/utils.c
index 1f904e338ce..67b9f2a737a 100644
--- a/src/backend/regex/utils.c
+++ b/src/backend/regex/utils.c
@@ -1,7 +1,7 @@
/*
* misc conversion functions between pg_wchar and other encodings.
* Tatsuo Ishii
- * $Id: utils.c,v 1.1 1998/03/15 07:38:39 scrappy Exp $
+ * $Id: utils.c,v 1.2 1998/04/27 17:07:53 scrappy Exp $
*/
#include <regex/pg_wchar.h>
/*
@@ -324,25 +324,151 @@ static void pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int
*to = 0;
}
+static int pg_euc_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (*s == SS2) {
+ len = 2;
+ } else if (*s == SS3) {
+ len = 3;
+ } else if (*s & 0x80) {
+ len = 2;
+ } else {
+ len = 1;
+ }
+ return(len);
+}
+
+static int pg_eucjp_mblen(const unsigned char *s)
+{
+ return(pg_euc_mblen(s));
+}
+
+static int pg_euckr_mblen(const unsigned char *s)
+{
+ return(pg_euc_mblen(s));
+}
+
+static int pg_eucch_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (*s == SS2) {
+ len = 3;
+ } else if (*s == SS3) {
+ len = 3;
+ } else if (*s & 0x80) {
+ len = 2;
+ } else {
+ len = 1;
+ }
+ return(len);
+}
+
+static int pg_euccn_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (*s == SS2) {
+ len = 4;
+ } else if (*s == SS3) {
+ len = 3;
+ } else if (*s & 0x80) {
+ len = 2;
+ } else {
+ len = 1;
+ }
+ return(len);
+}
+
+static int pg_utf_mblen(const unsigned char *s)
+{
+ int len = 1;
+
+ if ((*s & 0x80) == 0) {
+ len = 1;
+ } else if ((*s & 0xe0) == 0xc0) {
+ len = 2;
+ } else if ((*s & 0xe0) == 0xe0) {
+ len = 3;
+ }
+ return(len);
+}
+
+static int pg_mule_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (IS_LC1(*s)) {
+ len = 2;
+ } else if (IS_LCPRV1(*s)) {
+ len = 3;
+ } else if (IS_LC2(*s)) {
+ len = 3;
+ } else if (IS_LCPRV2(*s)) {
+ len = 4;
+ } else { /* assume ASCII */
+ len = 1;
+ }
+ return(len);
+}
+
typedef struct {
- void (*mb2wchar)();
- void (*mb2wchar_with_len)();
+ void (*mb2wchar)(); /* convert a multi-byte string to a wchar */
+ void (*mb2wchar_with_len)(); /* convert a multi-byte string to a wchar
+ with a limited length */
+ int (*mblen)(); /* returns the length of a multi-byte word */
} pg_wchar_tbl;
static pg_wchar_tbl pg_wchar_table[] = {
- {pg_eucjp2wchar, pg_eucjp2wchar_with_len},
- {pg_eucch2wchar, pg_eucch2wchar_with_len},
- {pg_euckr2wchar, pg_euckr2wchar_with_len},
- {pg_euccn2wchar, pg_euccn2wchar_with_len},
- {pg_utf2wchar, pg_utf2wchar_with_len},
- {pg_mule2wchar, pg_mule2wchar_with_len}};
+ {pg_eucjp2wchar, pg_eucjp2wchar_with_len, pg_eucjp_mblen},
+ {pg_eucch2wchar, pg_eucch2wchar_with_len, pg_eucch_mblen},
+ {pg_euckr2wchar, pg_euckr2wchar_with_len, pg_euckr_mblen},
+ {pg_euccn2wchar, pg_euccn2wchar_with_len, pg_euccn_mblen},
+ {pg_utf2wchar, pg_utf2wchar_with_len, pg_utf_mblen},
+ {pg_mule2wchar, pg_mule2wchar_with_len, pg_mule_mblen}};
+/* convert a multi-byte string to a wchar */
void pg_mb2wchar(const unsigned char *from, pg_wchar *to)
{
(*pg_wchar_table[MB].mb2wchar)(from,to);
}
+/* convert a multi-byte string to a wchar with a limited length */
void pg_mb2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
{
(*pg_wchar_table[MB].mb2wchar_with_len)(from,to,len);
}
+
+/* returns the byte length of a multi-byte word */
+int pg_mblen(const unsigned char *mbstr)
+{
+ return((*pg_wchar_table[MB].mblen)(mbstr));
+}
+
+/* returns the length (counted as a wchar) of a multi-byte string */
+int pg_mbstrlen(const unsigned char *mbstr)
+{
+ int len = 0;
+ while (*mbstr) {
+ mbstr += pg_mblen(mbstr);
+ len++;
+ }
+ return(len);
+}
+
+/* returns the length (counted as a wchar) of a multi-byte string
+ (not necessarily NULL terminated) */
+int pg_mbstrlen_with_len(const unsigned char *mbstr, int limit)
+{
+ int len = 0;
+ int l;
+ while (*mbstr && limit > 0) {
+ l = pg_mblen(mbstr);
+ limit -= l;
+ mbstr += l;
+ len++;
+ }
+ return(len);
+}
diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c
index 3680b1e2194..3324108250a 100644
--- a/src/backend/utils/adt/oracle_compat.c
+++ b/src/backend/utils/adt/oracle_compat.c
@@ -1,7 +1,7 @@
/*
* Edmund Mergl <E.Mergl@bawue.de>
*
- * $Id: oracle_compat.c,v 1.12 1998/02/26 04:37:19 momjian Exp $
+ * $Id: oracle_compat.c,v 1.13 1998/04/27 17:08:19 scrappy Exp $
*
*/
@@ -55,7 +55,7 @@ lower(text *string)
while (m--)
{
- *ptr_ret++ = tolower(*ptr++);
+ *ptr_ret++ = tolower((unsigned char)*ptr++);
}
return ret;
@@ -95,7 +95,7 @@ upper(text *string)
while (m--)
{
- *ptr_ret++ = toupper(*ptr++);
+ *ptr_ret++ = toupper((unsigned char)*ptr++);
}
return ret;
@@ -135,18 +135,18 @@ initcap(text *string)
ptr = VARDATA(string);
ptr_ret = VARDATA(ret);
- *ptr_ret++ = toupper(*ptr++);
+ *ptr_ret++ = toupper((unsigned char)*ptr++);
--m;
while (m--)
{
if (*(ptr_ret - 1) == ' ' || *(ptr_ret - 1) == ' ')
{
- *ptr_ret++ = toupper(*ptr++);
+ *ptr_ret++ = toupper((unsigned char)*ptr++);
}
else
{
- *ptr_ret++ = tolower(*ptr++);
+ *ptr_ret++ = tolower((unsigned char)*ptr++);
}
}
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
index 796cc3099ac..cbd113bba85 100644
--- a/src/backend/utils/adt/varchar.c
+++ b/src/backend/utils/adt/varchar.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.29 1998/02/26 04:37:24 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.30 1998/04/27 17:08:26 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
@@ -21,6 +21,8 @@ char *convertstr(char *, int, int);
#endif
+#include "regex/pg_wchar.h"
+
/*
* CHAR() and VARCHAR() types are part of the ANSI SQL standard. CHAR()
* is for blank-padded string whose length is specified in CREATE TABLE.
@@ -214,6 +216,31 @@ bcTruelen(char *arg)
int32
bpcharlen(char *arg)
{
+#ifdef MB
+ unsigned char *s;
+ int len, l, wl;
+#endif
+ if (!PointerIsValid(arg))
+ elog(ERROR, "Bad (null) char() external representation", NULL);
+#ifdef MB
+ l = bcTruelen(arg);
+ len = 0;
+ s = VARDATA(arg);
+ while (l > 0) {
+ wl = pg_mblen(s);
+ l -= wl;
+ s += wl;
+ len++;
+ }
+ return(len);
+#else
+ return (bcTruelen(arg));
+#endif
+}
+
+int32
+bpcharoctetlen(char *arg)
+{
if (!PointerIsValid(arg))
elog(ERROR, "Bad (null) char() external representation", NULL);
@@ -354,9 +381,34 @@ bpcharcmp(char *arg1, char *arg2)
int32
varcharlen(char *arg)
{
+#ifdef MB
+ unsigned char *s;
+ int len, l, wl;
+#endif
if (!PointerIsValid(arg))
elog(ERROR, "Bad (null) varchar() external representation", NULL);
+#ifdef MB
+ len = 0;
+ s = VARDATA(arg);
+ l = VARSIZE(arg) - VARHDRSZ;
+ while (l > 0) {
+ wl = pg_mblen(s);
+ l -= wl;
+ s += wl;
+ len++;
+ }
+ return(len);
+#else
+ return (VARSIZE(arg) - VARHDRSZ);
+#endif
+}
+
+int32
+varcharoctetlen(char *arg)
+{
+ if (!PointerIsValid(arg))
+ elog(ERROR, "Bad (null) varchar() external representation", NULL);
return (VARSIZE(arg) - VARHDRSZ);
}
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index c45a5d9a2e3..d094924db14 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.32 1998/03/15 08:07:01 scrappy Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.33 1998/04/27 17:08:28 scrappy Exp $
*
*-------------------------------------------------------------------------
*/
@@ -18,6 +18,8 @@
#include "utils/palloc.h"
#include "utils/builtins.h" /* where function declarations go */
+#include "regex/pg_wchar.h"
+
/*****************************************************************************
* USER I/O ROUTINES *
*****************************************************************************/
@@ -198,19 +200,53 @@ textout(text *vlena)
/*
* textlen -
- * returns the actual length of a text*
+ * returns the logical length of a text*
* (which is less than the VARSIZE of the text*)
*/
int32
textlen(text *t)
{
+#ifdef MB
+ unsigned char *s;
+ int len, l, wl;
+#endif
+
if (!PointerIsValid(t))
elog(ERROR, "Null input to textlen");
+#ifdef MB
+ len = 0;
+ s = VARDATA(t);
+ l = VARSIZE(t) - VARHDRSZ;
+ while (l > 0) {
+ wl = pg_mblen(s);
+ l -= wl;
+ s += wl;
+ len++;
+ }
+ return(len);
+#else
return (VARSIZE(t) - VARHDRSZ);
+#endif
+
} /* textlen() */
/*
+ * textoctetlen -
+ * returns the physical length of a text*
+ * (which is less than the VARSIZE of the text*)
+ */
+int32
+textoctetlen(text *t)
+{
+ if (!PointerIsValid(t))
+ elog(ERROR, "Null input to textoctetlen");
+
+ return (VARSIZE(t) - VARHDRSZ);
+
+} /* textoctetlen() */
+
+/*
* textcat -
* takes two text* and returns a text* that is the concatentation of
* the two.
@@ -278,17 +314,27 @@ textcat(text *t1, text *t2)
*
* Note that the arguments operate on octet length,
* so not aware of multi-byte character sets.
+ *
+ * Added multi-byte support.
+ * - Tatsuo Ishii 1998-4-21
*/
text *
text_substr(text *string, int32 m, int32 n)
{
text *ret;
int len;
+#ifdef MB
+ int i;
+ char *p;
+#endif
if ((string == (text *) NULL) || (m <= 0))
return string;
len = VARSIZE(string) - VARHDRSZ;
+#ifdef MB
+ len = pg_mbstrlen_with_len(VARDATA(string),len);
+#endif
/* m will now become a zero-based starting position */
if (m > len)
@@ -303,6 +349,17 @@ text_substr(text *string, int32 m, int32 n)
n = (len - m);
}
+#ifdef MB
+ p = VARDATA(string);
+ for (i=0;i<m;i++) {
+ p += pg_mblen(p);
+ }
+ m = p - VARDATA(string);
+ for (i=0;i<n;i++) {
+ p += pg_mblen(p);
+ }
+ n = p - (VARDATA(string) + m);
+#endif
ret = (text *) palloc(VARHDRSZ + n);
VARSIZE(ret) = VARHDRSZ + n;
@@ -317,6 +374,9 @@ text_substr(text *string, int32 m, int32 n)
* Implements the SQL92 POSITION() function.
* Ref: A Guide To The SQL Standard, Date & Darwen, 1997
* - thomas 1997-07-27
+ *
+ * Added multi-byte support.
+ * - Tatsuo Ishii 1998-4-21
*/
int32
textpos(text *t1, text *t2)
@@ -326,8 +386,11 @@ textpos(text *t1, text *t2)
p;
int len1,
len2;
- char *p1,
+ pg_wchar *p1,
*p2;
+#ifdef MB
+ pg_wchar *ps1, *ps2;
+#endif
if (!PointerIsValid(t1) || !PointerIsValid(t2))
return (0);
@@ -337,19 +400,36 @@ textpos(text *t1, text *t2)
len1 = (VARSIZE(t1) - VARHDRSZ);
len2 = (VARSIZE(t2) - VARHDRSZ);
+#ifdef MB
+ ps1 = p1 = (pg_wchar *) palloc((len1 + 1)*sizeof(pg_wchar));
+ (void)pg_mb2wchar_with_len((unsigned char *)VARDATA(t1),p1,len1);
+ len1 = pg_wchar_strlen(p1);
+ ps2 = p2 = (pg_wchar *) palloc((len2 + 1)*sizeof(pg_wchar));
+ (void)pg_mb2wchar_with_len((unsigned char *)VARDATA(t2),p2,len2);
+ len2 = pg_wchar_strlen(p2);
+#else
p1 = VARDATA(t1);
p2 = VARDATA(t2);
+#endif
pos = 0;
px = (len1 - len2);
for (p = 0; p <= px; p++)
{
+#ifdef MB
+ if ((*p2 == *p1) && (pg_wchar_strncmp(p1, p2, len2) == 0))
+#else
if ((*p2 == *p1) && (strncmp(p1, p2, len2) == 0))
+#endif
{
pos = p + 1;
break;
};
p1++;
};
+#ifdef MB
+ pfree(ps1);
+ pfree(ps2);
+#endif
return (pos);
} /* textpos() */