aboutsummaryrefslogtreecommitdiff
path: root/src/backend
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend')
-rw-r--r--src/backend/parser/gram.y28
-rw-r--r--src/backend/utils/adt/regexp.c127
2 files changed, 141 insertions, 14 deletions
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 43597306d44..be45d7bde16 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.367 2002/09/18 21:35:21 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.368 2002/09/22 17:27:23 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@@ -5644,22 +5644,40 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr SIMILAR TO a_expr %prec SIMILAR
- { $$ = (Node *) makeSimpleA_Expr(OP, "~", $1, $4); }
+ {
+ A_Const *c = makeNode(A_Const);
+ FuncCall *n = makeNode(FuncCall);
+ c->val.type = T_Null;
+ n->funcname = SystemFuncName("similar_escape");
+ n->args = makeList2($4, (Node *) c);
+ n->agg_star = FALSE;
+ n->agg_distinct = FALSE;
+ $$ = (Node *) makeSimpleA_Expr(OP, "~", $1, (Node *) n);
+ }
| a_expr SIMILAR TO a_expr ESCAPE a_expr
{
FuncCall *n = makeNode(FuncCall);
- n->funcname = SystemFuncName("like_escape");
+ n->funcname = SystemFuncName("similar_escape");
n->args = makeList2($4, $6);
n->agg_star = FALSE;
n->agg_distinct = FALSE;
$$ = (Node *) makeSimpleA_Expr(OP, "~", $1, (Node *) n);
}
| a_expr NOT SIMILAR TO a_expr %prec SIMILAR
- { $$ = (Node *) makeSimpleA_Expr(OP, "!~", $1, $5); }
+ {
+ A_Const *c = makeNode(A_Const);
+ FuncCall *n = makeNode(FuncCall);
+ c->val.type = T_Null;
+ n->funcname = SystemFuncName("similar_escape");
+ n->args = makeList2($5, (Node *) c);
+ n->agg_star = FALSE;
+ n->agg_distinct = FALSE;
+ $$ = (Node *) makeSimpleA_Expr(OP, "!~", $1, (Node *) n);
+ }
| a_expr NOT SIMILAR TO a_expr ESCAPE a_expr
{
FuncCall *n = makeNode(FuncCall);
- n->funcname = SystemFuncName("like_escape");
+ n->funcname = SystemFuncName("similar_escape");
n->args = makeList2($5, $7);
n->agg_star = FALSE;
n->agg_distinct = FALSE;
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
index b64d6ede65a..ebbca8f0401 100644
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.42 2002/09/04 20:31:28 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.43 2002/09/22 17:27:23 tgl Exp $
*
* Alistair Crooks added the code for the regex caching
* agc - cached the regular expressions used - there's a good chance
@@ -317,8 +317,7 @@ textregexsubstr(PG_FUNCTION_ARGS)
char *sterm;
int len;
bool match;
- int nmatch = 1;
- regmatch_t pmatch;
+ regmatch_t pmatch[2];
/* be sure sterm is null-terminated */
len = VARSIZE(s) - VARHDRSZ;
@@ -327,21 +326,131 @@ textregexsubstr(PG_FUNCTION_ARGS)
sterm[len] = '\0';
/*
- * We need the match info back from the pattern match to be able to
- * actually extract the substring. It seems to be adequate to pass in
- * a structure to return only one result.
+ * We pass two regmatch_t structs to get info about the overall match
+ * and the match for the first parenthesized subexpression (if any).
+ * If there is a parenthesized subexpression, we return what it matched;
+ * else return what the whole regexp matched.
*/
- match = RE_compile_and_execute(p, sterm, REG_EXTENDED, nmatch, &pmatch);
+ match = RE_compile_and_execute(p, sterm, REG_EXTENDED, 2, pmatch);
+
pfree(sterm);
/* match? then return the substring matching the pattern */
if (match)
{
+ int so,
+ eo;
+
+ so = pmatch[1].rm_so;
+ eo = pmatch[1].rm_eo;
+ if (so < 0 || eo < 0)
+ {
+ /* no parenthesized subexpression */
+ so = pmatch[0].rm_so;
+ eo = pmatch[0].rm_eo;
+ }
+
return (DirectFunctionCall3(text_substr,
PointerGetDatum(s),
- Int32GetDatum(pmatch.rm_so + 1),
- Int32GetDatum(pmatch.rm_eo - pmatch.rm_so)));
+ Int32GetDatum(so + 1),
+ Int32GetDatum(eo - so)));
}
PG_RETURN_NULL();
}
+
+/* similar_escape()
+ * Convert a SQL99 regexp pattern to POSIX style, so it can be used by
+ * our regexp engine.
+ */
+Datum
+similar_escape(PG_FUNCTION_ARGS)
+{
+ text *pat_text;
+ text *esc_text;
+ text *result;
+ unsigned char *p,
+ *e,
+ *r;
+ int plen,
+ elen;
+ bool afterescape = false;
+ int nquotes = 0;
+
+ /* This function is not strict, so must test explicitly */
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+ pat_text = PG_GETARG_TEXT_P(0);
+ p = VARDATA(pat_text);
+ plen = (VARSIZE(pat_text) - VARHDRSZ);
+ if (PG_ARGISNULL(1))
+ {
+ /* No ESCAPE clause provided; default to backslash as escape */
+ e = "\\";
+ elen = 1;
+ }
+ else
+ {
+ esc_text = PG_GETARG_TEXT_P(1);
+ e = VARDATA(esc_text);
+ elen = (VARSIZE(esc_text) - VARHDRSZ);
+ if (elen == 0)
+ e = NULL; /* no escape character */
+ else if (elen != 1)
+ elog(ERROR, "ESCAPE string must be empty or one character");
+ }
+
+ /* We need room for ^, $, and up to 2 output bytes per input byte */
+ result = (text *) palloc(VARHDRSZ + 2 + 2 * plen);
+ r = VARDATA(result);
+
+ *r++ = '^';
+
+ while (plen > 0)
+ {
+ unsigned char pchar = *p;
+
+ if (afterescape)
+ {
+ if (pchar == '"') /* for SUBSTRING patterns */
+ *r++ = ((nquotes++ % 2) == 0) ? '(' : ')';
+ else
+ {
+ *r++ = '\\';
+ *r++ = pchar;
+ }
+ afterescape = false;
+ }
+ else if (e && pchar == *e)
+ {
+ /* SQL99 escape character; do not send to output */
+ afterescape = true;
+ }
+ else if (pchar == '%')
+ {
+ *r++ = '.';
+ *r++ = '*';
+ }
+ else if (pchar == '_')
+ {
+ *r++ = '.';
+ }
+ else if (pchar == '\\' || pchar == '.' || pchar == '?' ||
+ pchar == '{')
+ {
+ *r++ = '\\';
+ *r++ = pchar;
+ }
+ else
+ {
+ *r++ = pchar;
+ }
+ p++, plen--;
+ }
+
+ *r++ = '$';
+
+ VARATT_SIZEP(result) = r - ((unsigned char *) result);
+
+ PG_RETURN_TEXT_P(result);
+}