aboutsummaryrefslogtreecommitdiff
path: root/src/backend/utils/adt/regexp.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2011-04-10 18:02:17 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2011-04-10 18:03:09 -0400
commit1e16a8107db9a50435b39e09c6f9c52c45e63e1a (patch)
treebf2231fc078b46004c7814ba871e3c38c1d8d52d /src/backend/utils/adt/regexp.c
parent210f95f1cd59c6fdfe0f84b922c19d8498ac377d (diff)
downloadpostgresql-1e16a8107db9a50435b39e09c6f9c52c45e63e1a.tar.gz
postgresql-1e16a8107db9a50435b39e09c6f9c52c45e63e1a.zip
Teach regular expression operators to honor collations.
This involves getting the character classification and case-folding functions in the regex library to use the collations infrastructure. Most of this work had been done already in connection with the upper/lower and LIKE logic, so it was a simple matter of transposition. While at it, split out these functions into a separate source file regc_pg_locale.c, so that they can be correctly labeled with the Postgres project's license rather than the Scriptics license. These functions are 100% Postgres-written code whereas what remains in regc_locale.c is still mostly not ours, so lumping them both under the same copyright notice was getting more and more misleading.
Diffstat (limited to 'src/backend/utils/adt/regexp.c')
-rw-r--r--src/backend/utils/adt/regexp.c42
1 files changed, 32 insertions, 10 deletions
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index a4cb87915bb..0dbbd6715c9 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -96,6 +96,7 @@ typedef struct cached_re_str
char *cre_pat; /* original RE (not null terminated!) */
int cre_pat_len; /* length of original RE, in bytes */
int cre_flags; /* compile flags: extended,icase etc */
+ Oid cre_collation; /* collation to use */
regex_t cre_re; /* the compiled regular expression */
} cached_re_str;
@@ -106,6 +107,7 @@ static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */
/* Local functions */
static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern,
text *flags,
+ Oid collation,
bool force_glob,
bool use_subpatterns,
bool ignore_degenerate);
@@ -121,12 +123,13 @@ static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
*
* text_re --- the pattern, expressed as a TEXT object
* cflags --- compile options for the pattern
+ * collation --- collation to use for LC_CTYPE-dependent behavior
*
* Pattern is given in the database encoding. We internally convert to
* an array of pg_wchar, which is what Spencer's regex package wants.
*/
static regex_t *
-RE_compile_and_cache(text *text_re, int cflags)
+RE_compile_and_cache(text *text_re, int cflags, Oid collation)
{
int text_re_len = VARSIZE_ANY_EXHDR(text_re);
char *text_re_val = VARDATA_ANY(text_re);
@@ -146,6 +149,7 @@ RE_compile_and_cache(text *text_re, int cflags)
{
if (re_array[i].cre_pat_len == text_re_len &&
re_array[i].cre_flags == cflags &&
+ re_array[i].cre_collation == collation &&
memcmp(re_array[i].cre_pat, text_re_val, text_re_len) == 0)
{
/*
@@ -176,7 +180,8 @@ RE_compile_and_cache(text *text_re, int cflags)
regcomp_result = pg_regcomp(&re_temp.cre_re,
pattern,
pattern_len,
- cflags);
+ cflags,
+ collation);
pfree(pattern);
@@ -207,6 +212,7 @@ RE_compile_and_cache(text *text_re, int cflags)
memcpy(re_temp.cre_pat, text_re_val, text_re_len);
re_temp.cre_pat_len = text_re_len;
re_temp.cre_flags = cflags;
+ re_temp.cre_collation = collation;
/*
* Okay, we have a valid new item in re_temp; insert it into the storage
@@ -313,6 +319,7 @@ RE_execute(regex_t *re, char *dat, int dat_len,
* dat --- the data to match against (need not be null-terminated)
* dat_len --- the length of the data string
* cflags --- compile options for the pattern
+ * collation --- collation to use for LC_CTYPE-dependent behavior
* nmatch, pmatch --- optional return area for match details
*
* Both pattern and data are given in the database encoding. We internally
@@ -320,12 +327,13 @@ RE_execute(regex_t *re, char *dat, int dat_len,
*/
static bool
RE_compile_and_execute(text *text_re, char *dat, int dat_len,
- int cflags, int nmatch, regmatch_t *pmatch)
+ int cflags, Oid collation,
+ int nmatch, regmatch_t *pmatch)
{
regex_t *re;
/* Compile RE */
- re = RE_compile_and_cache(text_re, cflags);
+ re = RE_compile_and_cache(text_re, cflags, collation);
return RE_execute(re, dat, dat_len, nmatch, pmatch);
}
@@ -424,6 +432,7 @@ nameregexeq(PG_FUNCTION_ARGS)
NameStr(*n),
strlen(NameStr(*n)),
REG_ADVANCED,
+ PG_GET_COLLATION(),
0, NULL));
}
@@ -437,6 +446,7 @@ nameregexne(PG_FUNCTION_ARGS)
NameStr(*n),
strlen(NameStr(*n)),
REG_ADVANCED,
+ PG_GET_COLLATION(),
0, NULL));
}
@@ -450,6 +460,7 @@ textregexeq(PG_FUNCTION_ARGS)
VARDATA_ANY(s),
VARSIZE_ANY_EXHDR(s),
REG_ADVANCED,
+ PG_GET_COLLATION(),
0, NULL));
}
@@ -463,6 +474,7 @@ textregexne(PG_FUNCTION_ARGS)
VARDATA_ANY(s),
VARSIZE_ANY_EXHDR(s),
REG_ADVANCED,
+ PG_GET_COLLATION(),
0, NULL));
}
@@ -483,6 +495,7 @@ nameicregexeq(PG_FUNCTION_ARGS)
NameStr(*n),
strlen(NameStr(*n)),
REG_ADVANCED | REG_ICASE,
+ PG_GET_COLLATION(),
0, NULL));
}
@@ -496,6 +509,7 @@ nameicregexne(PG_FUNCTION_ARGS)
NameStr(*n),
strlen(NameStr(*n)),
REG_ADVANCED | REG_ICASE,
+ PG_GET_COLLATION(),
0, NULL));
}
@@ -509,6 +523,7 @@ texticregexeq(PG_FUNCTION_ARGS)
VARDATA_ANY(s),
VARSIZE_ANY_EXHDR(s),
REG_ADVANCED | REG_ICASE,
+ PG_GET_COLLATION(),
0, NULL));
}
@@ -522,6 +537,7 @@ texticregexne(PG_FUNCTION_ARGS)
VARDATA_ANY(s),
VARSIZE_ANY_EXHDR(s),
REG_ADVANCED | REG_ICASE,
+ PG_GET_COLLATION(),
0, NULL));
}
@@ -541,7 +557,7 @@ textregexsubstr(PG_FUNCTION_ARGS)
eo;
/* Compile RE */
- re = RE_compile_and_cache(p, REG_ADVANCED);
+ re = RE_compile_and_cache(p, REG_ADVANCED, PG_GET_COLLATION());
/*
* We pass two regmatch_t structs to get info about the overall match and
@@ -597,7 +613,7 @@ textregexreplace_noopt(PG_FUNCTION_ARGS)
text *r = PG_GETARG_TEXT_PP(2);
regex_t *re;
- re = RE_compile_and_cache(p, REG_ADVANCED);
+ re = RE_compile_and_cache(p, REG_ADVANCED, PG_GET_COLLATION());
PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, false));
}
@@ -618,7 +634,7 @@ textregexreplace(PG_FUNCTION_ARGS)
parse_re_flags(&flags, opt);
- re = RE_compile_and_cache(p, flags.cflags);
+ re = RE_compile_and_cache(p, flags.cflags, PG_GET_COLLATION());
PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, flags.glob));
}
@@ -781,7 +797,9 @@ regexp_matches(PG_FUNCTION_ARGS)
/* be sure to copy the input string into the multi-call ctx */
matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
- flags, false, true, false);
+ flags,
+ PG_GET_COLLATION(),
+ false, true, false);
/* Pre-create workspace that build_regexp_matches_result needs */
matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns);
@@ -830,6 +848,7 @@ regexp_matches_no_flags(PG_FUNCTION_ARGS)
*/
static regexp_matches_ctx *
setup_regexp_matches(text *orig_str, text *pattern, text *flags,
+ Oid collation,
bool force_glob, bool use_subpatterns,
bool ignore_degenerate)
{
@@ -868,7 +887,7 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
}
/* set up the compiled pattern */
- cpattern = RE_compile_and_cache(pattern, re_flags.cflags);
+ cpattern = RE_compile_and_cache(pattern, re_flags.cflags, collation);
/* do we want to remember subpatterns? */
if (use_subpatterns && cpattern->re_nsub > 0)
@@ -1039,7 +1058,9 @@ regexp_split_to_table(PG_FUNCTION_ARGS)
/* be sure to copy the input string into the multi-call ctx */
splitctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
- flags, true, false, true);
+ flags,
+ PG_GET_COLLATION(),
+ true, false, true);
MemoryContextSwitchTo(oldcontext);
funcctx->user_fctx = (void *) splitctx;
@@ -1083,6 +1104,7 @@ regexp_split_to_array(PG_FUNCTION_ARGS)
splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0),
PG_GETARG_TEXT_PP(1),
PG_GETARG_TEXT_PP_IF_EXISTS(2),
+ PG_GET_COLLATION(),
true, false, true);
while (splitctx->next_match <= splitctx->nmatches)