aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/regex/regcomp.c8
-rw-r--r--src/backend/regex/regexec.c73
-rw-r--r--src/test/regress/expected/regex.out36
-rw-r--r--src/test/regress/parallel_schedule2
-rw-r--r--src/test/regress/serial_schedule1
-rw-r--r--src/test/regress/sql/regex.sql13
6 files changed, 103 insertions, 30 deletions
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index 4f9da5b0468..6b80140e909 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -1088,8 +1088,12 @@ parseqatom(struct vars * v,
NOERR();
}
- /* it's quantifier time; first, turn x{0,...} into x{1,...}|empty */
- if (m == 0)
+ /*
+ * It's quantifier time. If the atom is just a BACKREF, we'll let it deal
+ * with quantifiers internally. Otherwise, the first step is to turn
+ * x{0,...} into x{1,...}|empty
+ */
+ if (m == 0 && atomtype != BACKREF)
{
EMPTYARC(s2, atom->end); /* the bypass */
assert(PREF(qprefer) != 0);
diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c
index f8e31f8f4ad..224da5064b6 100644
--- a/src/backend/regex/regexec.c
+++ b/src/backend/regex/regexec.c
@@ -720,7 +720,7 @@ cdissect(struct vars * v,
case '|': /* alternation */
assert(t->left != NULL);
return caltdissect(v, t, begin, end);
- case 'b': /* back ref -- shouldn't be calling us! */
+ case 'b': /* back reference */
assert(t->left == NULL && t->right == NULL);
return cbrdissect(v, t, begin, end);
case '.': /* concatenation */
@@ -962,12 +962,12 @@ cbrdissect(struct vars * v,
chr *begin, /* beginning of relevant substring */
chr *end) /* end of same */
{
- int i;
int n = t->subno;
- size_t len;
- chr *paren;
+ size_t numreps;
+ size_t tlen;
+ size_t brlen;
+ chr *brstring;
chr *p;
- chr *stop;
int min = t->min;
int max = t->max;
@@ -978,46 +978,65 @@ cbrdissect(struct vars * v,
MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
+ /* get the backreferenced string */
if (v->pmatch[n].rm_so == -1)
return REG_NOMATCH;
- paren = v->start + v->pmatch[n].rm_so;
- len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
+ brstring = v->start + v->pmatch[n].rm_so;
+ brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
/* no room to maneuver -- retries are pointless */
if (v->mem[t->retry])
return REG_NOMATCH;
v->mem[t->retry] = 1;
- /* special-case zero-length string */
- if (len == 0)
+ /* special cases for zero-length strings */
+ if (brlen == 0)
+ {
+ /*
+ * matches only if target is zero length, but any number of
+ * repetitions can be considered to be present
+ */
+ if (begin == end && min <= max)
+ {
+ MDEBUG(("cbackref matched trivially\n"));
+ return REG_OKAY;
+ }
+ return REG_NOMATCH;
+ }
+ if (begin == end)
{
- if (begin == end)
+ /* matches only if zero repetitions are okay */
+ if (min == 0)
+ {
+ MDEBUG(("cbackref matched trivially\n"));
return REG_OKAY;
+ }
return REG_NOMATCH;
}
- /* and too-short string */
- assert(end >= begin);
- if ((size_t) (end - begin) < len)
+ /*
+ * check target length to see if it could possibly be an allowed number of
+ * repetitions of brstring
+ */
+ assert(end > begin);
+ tlen = end - begin;
+ if (tlen % brlen != 0)
+ return REG_NOMATCH;
+ numreps = tlen / brlen;
+ if (numreps < min || (numreps > max && max != INFINITY))
return REG_NOMATCH;
- stop = end - len;
- /* count occurrences */
- i = 0;
- for (p = begin; p <= stop && (i < max || max == INFINITY); p += len)
+ /* okay, compare the actual string contents */
+ p = begin;
+ while (numreps-- > 0)
{
- if ((*v->g->compare) (paren, p, len) != 0)
- break;
- i++;
+ if ((*v->g->compare) (brstring, p, brlen) != 0)
+ return REG_NOMATCH;
+ p += brlen;
}
- MDEBUG(("cbackref found %d\n", i));
- /* and sort it out */
- if (p != end) /* didn't consume all of it */
- return REG_NOMATCH;
- if (min <= i && (i <= max || max == INFINITY))
- return REG_OKAY;
- return REG_NOMATCH; /* out of range */
+ MDEBUG(("cbackref matched\n"));
+ return REG_OKAY;
}
/*
diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out
new file mode 100644
index 00000000000..5694908163a
--- /dev/null
+++ b/src/test/regress/expected/regex.out
@@ -0,0 +1,36 @@
+--
+-- Regular expression tests
+--
+-- Don't want to have to double backslashes in regexes
+set standard_conforming_strings = on;
+-- Test simple quantified backrefs
+select 'bbbbb' ~ '^([bc])\1*$' as t;
+ t
+---
+ t
+(1 row)
+
+select 'ccc' ~ '^([bc])\1*$' as t;
+ t
+---
+ t
+(1 row)
+
+select 'xxx' ~ '^([bc])\1*$' as f;
+ f
+---
+ f
+(1 row)
+
+select 'bbc' ~ '^([bc])\1*$' as f;
+ f
+---
+ f
+(1 row)
+
+select 'b' ~ '^([bc])\1*$' as t;
+ t
+---
+ t
+(1 row)
+
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 862f5b20077..8852e0a40fc 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -30,7 +30,7 @@ test: point lseg box path polygon circle date time timetz timestamp timestamptz
# geometry depends on point, lseg, box, path, polygon and circle
# horology depends on interval, timetz, timestamp, timestamptz, reltime and abstime
# ----------
-test: geometry horology oidjoins type_sanity opr_sanity
+test: geometry horology regex oidjoins type_sanity opr_sanity
# ----------
# These four each depend on the previous one
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 142fc9cf0d1..0bc5df7fe73 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -42,6 +42,7 @@ test: tstypes
test: comments
test: geometry
test: horology
+test: regex
test: oidjoins
test: type_sanity
test: opr_sanity
diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql
new file mode 100644
index 00000000000..242a81ef329
--- /dev/null
+++ b/src/test/regress/sql/regex.sql
@@ -0,0 +1,13 @@
+--
+-- Regular expression tests
+--
+
+-- Don't want to have to double backslashes in regexes
+set standard_conforming_strings = on;
+
+-- Test simple quantified backrefs
+select 'bbbbb' ~ '^([bc])\1*$' as t;
+select 'ccc' ~ '^([bc])\1*$' as t;
+select 'xxx' ~ '^([bc])\1*$' as f;
+select 'bbc' ~ '^([bc])\1*$' as f;
+select 'b' ~ '^([bc])\1*$' as t;