diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2002-09-22 17:27:25 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2002-09-22 17:27:25 +0000 |
commit | 9946b83dedb629b9eff1c063b9fbcaab25c209df (patch) | |
tree | b95a72601c503b1139154c1d95561c37b9dfabf8 /src/backend/utils/adt/regexp.c | |
parent | ac355d558e27dd6b11b1d202de887a6d62d22ac6 (diff) | |
download | postgresql-9946b83dedb629b9eff1c063b9fbcaab25c209df.tar.gz postgresql-9946b83dedb629b9eff1c063b9fbcaab25c209df.zip |
Bring SIMILAR TO and SUBSTRING into some semblance of conformance with
the SQL99 standard. (I'm not sure that the character-class features are
quite right, but that can be fixed later.) Document SQL99 and POSIX
regexps as being different features; provide variants of SUBSTRING for
each.
Diffstat (limited to 'src/backend/utils/adt/regexp.c')
-rw-r--r-- | src/backend/utils/adt/regexp.c | 127 |
1 files changed, 118 insertions, 9 deletions
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index b64d6ede65a..ebbca8f0401 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.42 2002/09/04 20:31:28 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.43 2002/09/22 17:27:23 tgl Exp $ * * Alistair Crooks added the code for the regex caching * agc - cached the regular expressions used - there's a good chance @@ -317,8 +317,7 @@ textregexsubstr(PG_FUNCTION_ARGS) char *sterm; int len; bool match; - int nmatch = 1; - regmatch_t pmatch; + regmatch_t pmatch[2]; /* be sure sterm is null-terminated */ len = VARSIZE(s) - VARHDRSZ; @@ -327,21 +326,131 @@ textregexsubstr(PG_FUNCTION_ARGS) sterm[len] = '\0'; /* - * We need the match info back from the pattern match to be able to - * actually extract the substring. It seems to be adequate to pass in - * a structure to return only one result. + * We pass two regmatch_t structs to get info about the overall match + * and the match for the first parenthesized subexpression (if any). + * If there is a parenthesized subexpression, we return what it matched; + * else return what the whole regexp matched. */ - match = RE_compile_and_execute(p, sterm, REG_EXTENDED, nmatch, &pmatch); + match = RE_compile_and_execute(p, sterm, REG_EXTENDED, 2, pmatch); + pfree(sterm); /* match? then return the substring matching the pattern */ if (match) { + int so, + eo; + + so = pmatch[1].rm_so; + eo = pmatch[1].rm_eo; + if (so < 0 || eo < 0) + { + /* no parenthesized subexpression */ + so = pmatch[0].rm_so; + eo = pmatch[0].rm_eo; + } + return (DirectFunctionCall3(text_substr, PointerGetDatum(s), - Int32GetDatum(pmatch.rm_so + 1), - Int32GetDatum(pmatch.rm_eo - pmatch.rm_so))); + Int32GetDatum(so + 1), + Int32GetDatum(eo - so))); } PG_RETURN_NULL(); } + +/* similar_escape() + * Convert a SQL99 regexp pattern to POSIX style, so it can be used by + * our regexp engine. + */ +Datum +similar_escape(PG_FUNCTION_ARGS) +{ + text *pat_text; + text *esc_text; + text *result; + unsigned char *p, + *e, + *r; + int plen, + elen; + bool afterescape = false; + int nquotes = 0; + + /* This function is not strict, so must test explicitly */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + pat_text = PG_GETARG_TEXT_P(0); + p = VARDATA(pat_text); + plen = (VARSIZE(pat_text) - VARHDRSZ); + if (PG_ARGISNULL(1)) + { + /* No ESCAPE clause provided; default to backslash as escape */ + e = "\\"; + elen = 1; + } + else + { + esc_text = PG_GETARG_TEXT_P(1); + e = VARDATA(esc_text); + elen = (VARSIZE(esc_text) - VARHDRSZ); + if (elen == 0) + e = NULL; /* no escape character */ + else if (elen != 1) + elog(ERROR, "ESCAPE string must be empty or one character"); + } + + /* We need room for ^, $, and up to 2 output bytes per input byte */ + result = (text *) palloc(VARHDRSZ + 2 + 2 * plen); + r = VARDATA(result); + + *r++ = '^'; + + while (plen > 0) + { + unsigned char pchar = *p; + + if (afterescape) + { + if (pchar == '"') /* for SUBSTRING patterns */ + *r++ = ((nquotes++ % 2) == 0) ? '(' : ')'; + else + { + *r++ = '\\'; + *r++ = pchar; + } + afterescape = false; + } + else if (e && pchar == *e) + { + /* SQL99 escape character; do not send to output */ + afterescape = true; + } + else if (pchar == '%') + { + *r++ = '.'; + *r++ = '*'; + } + else if (pchar == '_') + { + *r++ = '.'; + } + else if (pchar == '\\' || pchar == '.' || pchar == '?' || + pchar == '{') + { + *r++ = '\\'; + *r++ = pchar; + } + else + { + *r++ = pchar; + } + p++, plen--; + } + + *r++ = '$'; + + VARATT_SIZEP(result) = r - ((unsigned char *) result); + + PG_RETURN_TEXT_P(result); +} |