diff options
-rw-r--r-- | src/backend/utils/adt/like.c | 45 | ||||
-rw-r--r-- | src/backend/utils/adt/like_match.c | 27 |
2 files changed, 55 insertions, 17 deletions
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 1603a4e61ae..4c4ca2c1936 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/like.c,v 1.70 2007/09/21 22:52:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/like.c,v 1.71 2007/09/22 03:58:34 adunstan Exp $ * *------------------------------------------------------------------------- */ @@ -36,6 +36,8 @@ static text *MB_do_like_escape(text *, text *); static int UTF8_MatchText(char *t, int tlen, char *p, int plen); +static int SB_IMatchText(char *t, int tlen, char *p, int plen); + static int GenericMatchText(char *s, int slen, char* p, int plen); static int Generic_Text_IC_like(text *str, text *pat); @@ -104,6 +106,12 @@ wchareq(char *p1, char *p2) #include "like_match.c" +/* setup to compile like_match.c for single byte case insensitive matches */ +#define MATCH_LOWER +#define NextChar(p, plen) NextByte((p), (plen)) +#define MatchText SB_IMatchText + +#include "like_match.c" /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */ @@ -132,16 +140,33 @@ Generic_Text_IC_like(text *str, text *pat) int slen, plen; - /* Force inputs to lower case to achieve case insensitivity */ - str = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(str))); - pat = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(pat))); - /* lower's result is never packed, so OK to use old macros here */ - s = VARDATA(str); - slen = (VARSIZE(str) - VARHDRSZ); - p = VARDATA(pat); - plen = (VARSIZE(pat) - VARHDRSZ); + /* For efficiency reasons, in the single byte case we don't call + * lower() on the pattern and text, but instead call to_lower on each + * character. In the multi-byte case we don't have much choice :-( + */ - return GenericMatchText(s, slen, p, plen); + if (pg_database_encoding_max_length() > 1) + { + /* lower's result is never packed, so OK to use old macros here */ + pat = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(pat))); + p = VARDATA(pat); + plen = (VARSIZE(pat) - VARHDRSZ); + str = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(str))); + s = VARDATA(str); + slen = (VARSIZE(str) - VARHDRSZ); + if (GetDatabaseEncoding() == PG_UTF8) + return UTF8_MatchText(s, slen, p, plen); + else + return MB_MatchText(s, slen, p, plen); + } + else + { + p = VARDATA_ANY(pat); + plen = VARSIZE_ANY_EXHDR(pat); + s = VARDATA_ANY(str); + slen = VARSIZE_ANY_EXHDR(str); + return SB_IMatchText(s, slen, p, plen); + } } /* diff --git a/src/backend/utils/adt/like_match.c b/src/backend/utils/adt/like_match.c index 7ab29623f3c..f2ee0bae0ec 100644 --- a/src/backend/utils/adt/like_match.c +++ b/src/backend/utils/adt/like_match.c @@ -3,8 +3,9 @@ * like_match.c * like expression handling internal code. * - * This file is included by like.c three times, to provide natching code for - * single-byte encodings, UTF8, and for other multi-byte encodings. + * This file is included by like.c four times, to provide natching code for + * single-byte encodings, UTF8, and for other multi-byte encodings, + * and case insensitive matches for single byte encodings. * UTF8 is a special case because we can use a much more efficient version * of NextChar than can be used for other multi-byte encodings. * @@ -13,11 +14,12 @@ * NextChar * MatchText - to name of function wanted * do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar + * MATCH_LOWER - define iff using to_lower on text chars * * Copyright (c) 1996-2007, PostgreSQL Global Development Group * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.17 2007/09/21 22:52:52 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.18 2007/09/22 03:58:34 adunstan Exp $ * *------------------------------------------------------------------------- */ @@ -68,6 +70,12 @@ *-------------------- */ +#ifdef MATCH_LOWER +#define TCHAR(t) tolower((t)) +#else +#define TCHAR(t) (t) +#endif + static int MatchText(char *t, int tlen, char *p, int plen) { @@ -143,13 +151,13 @@ MatchText(char *t, int tlen, char *p, int plen) else { - char firstpat = *p ; + char firstpat = TCHAR(*p) ; if (*p == '\\') { if (plen < 2) return LIKE_FALSE; - firstpat = p[1]; + firstpat = TCHAR(p[1]); } while (tlen > 0) @@ -158,7 +166,7 @@ MatchText(char *t, int tlen, char *p, int plen) * Optimization to prevent most recursion: don't recurse * unless first pattern byte matches first text byte. */ - if (*t == firstpat) + if (TCHAR(*t) == firstpat) { int matched = MatchText(t, tlen, p, plen); @@ -183,7 +191,7 @@ MatchText(char *t, int tlen, char *p, int plen) NextByte(p, plen); continue; } - else if (*t != *p) + else if (TCHAR(*t) != TCHAR(*p)) { /* * Not the single-character wildcard and no explicit match? Then @@ -338,3 +346,8 @@ do_like_escape(text *pat, text *esc) #undef do_like_escape #endif +#undef TCHAR + +#ifdef MATCH_LOWER +#undef MATCH_LOWER +#endif |