diff options
Diffstat (limited to 'src/backend')
-rw-r--r-- | src/backend/parser/gram.y | 28 | ||||
-rw-r--r-- | src/backend/utils/adt/regexp.c | 127 |
2 files changed, 141 insertions, 14 deletions
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 43597306d44..be45d7bde16 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.367 2002/09/18 21:35:21 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.368 2002/09/22 17:27:23 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -5644,22 +5644,40 @@ a_expr: c_expr { $$ = $1; } } | a_expr SIMILAR TO a_expr %prec SIMILAR - { $$ = (Node *) makeSimpleA_Expr(OP, "~", $1, $4); } + { + A_Const *c = makeNode(A_Const); + FuncCall *n = makeNode(FuncCall); + c->val.type = T_Null; + n->funcname = SystemFuncName("similar_escape"); + n->args = makeList2($4, (Node *) c); + n->agg_star = FALSE; + n->agg_distinct = FALSE; + $$ = (Node *) makeSimpleA_Expr(OP, "~", $1, (Node *) n); + } | a_expr SIMILAR TO a_expr ESCAPE a_expr { FuncCall *n = makeNode(FuncCall); - n->funcname = SystemFuncName("like_escape"); + n->funcname = SystemFuncName("similar_escape"); n->args = makeList2($4, $6); n->agg_star = FALSE; n->agg_distinct = FALSE; $$ = (Node *) makeSimpleA_Expr(OP, "~", $1, (Node *) n); } | a_expr NOT SIMILAR TO a_expr %prec SIMILAR - { $$ = (Node *) makeSimpleA_Expr(OP, "!~", $1, $5); } + { + A_Const *c = makeNode(A_Const); + FuncCall *n = makeNode(FuncCall); + c->val.type = T_Null; + n->funcname = SystemFuncName("similar_escape"); + n->args = makeList2($5, (Node *) c); + n->agg_star = FALSE; + n->agg_distinct = FALSE; + $$ = (Node *) makeSimpleA_Expr(OP, "!~", $1, (Node *) n); + } | a_expr NOT SIMILAR TO a_expr ESCAPE a_expr { FuncCall *n = makeNode(FuncCall); - n->funcname = SystemFuncName("like_escape"); + n->funcname = SystemFuncName("similar_escape"); n->args = makeList2($5, $7); n->agg_star = FALSE; n->agg_distinct = FALSE; diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index b64d6ede65a..ebbca8f0401 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.42 2002/09/04 20:31:28 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.43 2002/09/22 17:27:23 tgl Exp $ * * Alistair Crooks added the code for the regex caching * agc - cached the regular expressions used - there's a good chance @@ -317,8 +317,7 @@ textregexsubstr(PG_FUNCTION_ARGS) char *sterm; int len; bool match; - int nmatch = 1; - regmatch_t pmatch; + regmatch_t pmatch[2]; /* be sure sterm is null-terminated */ len = VARSIZE(s) - VARHDRSZ; @@ -327,21 +326,131 @@ textregexsubstr(PG_FUNCTION_ARGS) sterm[len] = '\0'; /* - * We need the match info back from the pattern match to be able to - * actually extract the substring. It seems to be adequate to pass in - * a structure to return only one result. + * We pass two regmatch_t structs to get info about the overall match + * and the match for the first parenthesized subexpression (if any). + * If there is a parenthesized subexpression, we return what it matched; + * else return what the whole regexp matched. */ - match = RE_compile_and_execute(p, sterm, REG_EXTENDED, nmatch, &pmatch); + match = RE_compile_and_execute(p, sterm, REG_EXTENDED, 2, pmatch); + pfree(sterm); /* match? then return the substring matching the pattern */ if (match) { + int so, + eo; + + so = pmatch[1].rm_so; + eo = pmatch[1].rm_eo; + if (so < 0 || eo < 0) + { + /* no parenthesized subexpression */ + so = pmatch[0].rm_so; + eo = pmatch[0].rm_eo; + } + return (DirectFunctionCall3(text_substr, PointerGetDatum(s), - Int32GetDatum(pmatch.rm_so + 1), - Int32GetDatum(pmatch.rm_eo - pmatch.rm_so))); + Int32GetDatum(so + 1), + Int32GetDatum(eo - so))); } PG_RETURN_NULL(); } + +/* similar_escape() + * Convert a SQL99 regexp pattern to POSIX style, so it can be used by + * our regexp engine. + */ +Datum +similar_escape(PG_FUNCTION_ARGS) +{ + text *pat_text; + text *esc_text; + text *result; + unsigned char *p, + *e, + *r; + int plen, + elen; + bool afterescape = false; + int nquotes = 0; + + /* This function is not strict, so must test explicitly */ + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + pat_text = PG_GETARG_TEXT_P(0); + p = VARDATA(pat_text); + plen = (VARSIZE(pat_text) - VARHDRSZ); + if (PG_ARGISNULL(1)) + { + /* No ESCAPE clause provided; default to backslash as escape */ + e = "\\"; + elen = 1; + } + else + { + esc_text = PG_GETARG_TEXT_P(1); + e = VARDATA(esc_text); + elen = (VARSIZE(esc_text) - VARHDRSZ); + if (elen == 0) + e = NULL; /* no escape character */ + else if (elen != 1) + elog(ERROR, "ESCAPE string must be empty or one character"); + } + + /* We need room for ^, $, and up to 2 output bytes per input byte */ + result = (text *) palloc(VARHDRSZ + 2 + 2 * plen); + r = VARDATA(result); + + *r++ = '^'; + + while (plen > 0) + { + unsigned char pchar = *p; + + if (afterescape) + { + if (pchar == '"') /* for SUBSTRING patterns */ + *r++ = ((nquotes++ % 2) == 0) ? '(' : ')'; + else + { + *r++ = '\\'; + *r++ = pchar; + } + afterescape = false; + } + else if (e && pchar == *e) + { + /* SQL99 escape character; do not send to output */ + afterescape = true; + } + else if (pchar == '%') + { + *r++ = '.'; + *r++ = '*'; + } + else if (pchar == '_') + { + *r++ = '.'; + } + else if (pchar == '\\' || pchar == '.' || pchar == '?' || + pchar == '{') + { + *r++ = '\\'; + *r++ = pchar; + } + else + { + *r++ = pchar; + } + p++, plen--; + } + + *r++ = '$'; + + VARATT_SIZEP(result) = r - ((unsigned char *) result); + + PG_RETURN_TEXT_P(result); +} |