From f554af0a9fdbe0e9636fce36d6c809e81ce1539c Mon Sep 17 00:00:00 2001 From: "Marc G. Fournier" Date: Mon, 27 Apr 1998 17:10:50 +0000 Subject: From: t-ishii@sra.co.jp Hi, here are patches I promised (against 6.3.2): * character_length(), position(), substring() are now aware of multi-byte characters * add octet_length() * add --with-mb option to configure * new regression tests for EUC_KR (contributed by "Soonmyung. Hong" ) * add some test cases to the EUC_JP regression test * fix problem in regress/regress.sh in case of System V * fix toupper(), tolower() to handle 8bit chars note that: o patches for both configure.in and configure are included. maybe the one for configure is not necessary. o pg_proc.h was modified to add octet_length(). I used OIDs (1374-1379) for that. Please let me know if these numbers are not appropriate. --- src/backend/utils/adt/varlena.c | 86 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 83 insertions(+), 3 deletions(-) (limited to 'src/backend/utils/adt/varlena.c') diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index c45a5d9a2e3..d094924db14 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.32 1998/03/15 08:07:01 scrappy Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.33 1998/04/27 17:08:28 scrappy Exp $ * *------------------------------------------------------------------------- */ @@ -18,6 +18,8 @@ #include "utils/palloc.h" #include "utils/builtins.h" /* where function declarations go */ +#include "regex/pg_wchar.h" + /***************************************************************************** * USER I/O ROUTINES * *****************************************************************************/ @@ -198,18 +200,52 @@ textout(text *vlena) /* * textlen - - * returns the actual length of a text* + * returns the logical length of a text* * (which is less than the VARSIZE of the text*) */ int32 textlen(text *t) { +#ifdef MB + unsigned char *s; + int len, l, wl; +#endif + if (!PointerIsValid(t)) elog(ERROR, "Null input to textlen"); +#ifdef MB + len = 0; + s = VARDATA(t); + l = VARSIZE(t) - VARHDRSZ; + while (l > 0) { + wl = pg_mblen(s); + l -= wl; + s += wl; + len++; + } + return(len); +#else return (VARSIZE(t) - VARHDRSZ); +#endif + } /* textlen() */ +/* + * textoctetlen - + * returns the physical length of a text* + * (which is less than the VARSIZE of the text*) + */ +int32 +textoctetlen(text *t) +{ + if (!PointerIsValid(t)) + elog(ERROR, "Null input to textoctetlen"); + + return (VARSIZE(t) - VARHDRSZ); + +} /* textoctetlen() */ + /* * textcat - * takes two text* and returns a text* that is the concatentation of @@ -278,17 +314,27 @@ textcat(text *t1, text *t2) * * Note that the arguments operate on octet length, * so not aware of multi-byte character sets. + * + * Added multi-byte support. + * - Tatsuo Ishii 1998-4-21 */ text * text_substr(text *string, int32 m, int32 n) { text *ret; int len; +#ifdef MB + int i; + char *p; +#endif if ((string == (text *) NULL) || (m <= 0)) return string; len = VARSIZE(string) - VARHDRSZ; +#ifdef MB + len = pg_mbstrlen_with_len(VARDATA(string),len); +#endif /* m will now become a zero-based starting position */ if (m > len) @@ -303,6 +349,17 @@ text_substr(text *string, int32 m, int32 n) n = (len - m); } +#ifdef MB + p = VARDATA(string); + for (i=0;i