6 files changed, 103 insertions, 30 deletions
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index 4f9da5b0468..6b80140e909 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -1088,8 +1088,12 @@ parseqatom(struct vars * v,
 		NOERR();
 	}
 
-	/* it's quantifier time; first, turn x{0,...} into x{1,...}|empty */
-	if (m == 0)
+	/*
+	 * It's quantifier time.  If the atom is just a BACKREF, we'll let it deal
+	 * with quantifiers internally.  Otherwise, the first step is to turn
+	 * x{0,...} into x{1,...}|empty
+	 */
+	if (m == 0 && atomtype != BACKREF)
 	{
 		EMPTYARC(s2, atom->end);	/* the bypass */
 		assert(PREF(qprefer) != 0);
diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c
index f8e31f8f4ad..224da5064b6 100644
--- a/src/backend/regex/regexec.c
+++ b/src/backend/regex/regexec.c
@@ -720,7 +720,7 @@ cdissect(struct vars * v,
 		case '|':				/* alternation */
 			assert(t->left != NULL);
 			return caltdissect(v, t, begin, end);
-		case 'b':				/* back ref -- shouldn't be calling us! */
+		case 'b':				/* back reference */
 			assert(t->left == NULL && t->right == NULL);
 			return cbrdissect(v, t, begin, end);
 		case '.':				/* concatenation */
@@ -962,12 +962,12 @@ cbrdissect(struct vars * v,
 		   chr *begin,			/* beginning of relevant substring */
 		   chr *end)			/* end of same */
 {
-	int			i;
 	int			n = t->subno;
-	size_t		len;
-	chr		   *paren;
+	size_t		numreps;
+	size_t		tlen;
+	size_t		brlen;
+	chr		   *brstring;
 	chr		   *p;
-	chr		   *stop;
 	int			min = t->min;
 	int			max = t->max;
 
@@ -978,46 +978,65 @@ cbrdissect(struct vars * v,
 
 	MDEBUG(("cbackref n%d %d{%d-%d}\n", t->retry, n, min, max));
 
+	/* get the backreferenced string */
 	if (v->pmatch[n].rm_so == -1)
 		return REG_NOMATCH;
-	paren = v->start + v->pmatch[n].rm_so;
-	len = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
+	brstring = v->start + v->pmatch[n].rm_so;
+	brlen = v->pmatch[n].rm_eo - v->pmatch[n].rm_so;
 
 	/* no room to maneuver -- retries are pointless */
 	if (v->mem[t->retry])
 		return REG_NOMATCH;
 	v->mem[t->retry] = 1;
 
-	/* special-case zero-length string */
-	if (len == 0)
+	/* special cases for zero-length strings */
+	if (brlen == 0)
+	{
+		/*
+		 * matches only if target is zero length, but any number of
+		 * repetitions can be considered to be present
+		 */
+		if (begin == end && min <= max)
+		{
+			MDEBUG(("cbackref matched trivially\n"));
+			return REG_OKAY;
+		}
+		return REG_NOMATCH;
+	}
+	if (begin == end)
 	{
-		if (begin == end)
+		/* matches only if zero repetitions are okay */
+		if (min == 0)
+		{
+			MDEBUG(("cbackref matched trivially\n"));
 			return REG_OKAY;
+		}
 		return REG_NOMATCH;
 	}
 
-	/* and too-short string */
-	assert(end >= begin);
-	if ((size_t) (end - begin) < len)
+	/*
+	 * check target length to see if it could possibly be an allowed number of
+	 * repetitions of brstring
+	 */
+	assert(end > begin);
+	tlen = end - begin;
+	if (tlen % brlen != 0)
+		return REG_NOMATCH;
+	numreps = tlen / brlen;
+	if (numreps < min || (numreps > max && max != INFINITY))
 		return REG_NOMATCH;
-	stop = end - len;
 
-	/* count occurrences */
-	i = 0;
-	for (p = begin; p <= stop && (i < max || max == INFINITY); p += len)
+	/* okay, compare the actual string contents */
+	p = begin;
+	while (numreps-- > 0)
 	{
-		if ((*v->g->compare) (paren, p, len) != 0)
-			break;
-		i++;
+		if ((*v->g->compare) (brstring, p, brlen) != 0)
+			return REG_NOMATCH;
+		p += brlen;
 	}
-	MDEBUG(("cbackref found %d\n", i));
 
-	/* and sort it out */
-	if (p != end)				/* didn't consume all of it */
-		return REG_NOMATCH;
-	if (min <= i && (i <= max || max == INFINITY))
-		return REG_OKAY;
-	return REG_NOMATCH;			/* out of range */
+	MDEBUG(("cbackref matched\n"));
+	return REG_OKAY;
 }
 
 /*
diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out
new file mode 100644
index 00000000000..5694908163a
--- /dev/null
+++ b/src/test/regress/expected/regex.out
@@ -0,0 +1,36 @@
+--
+-- Regular expression tests
+--
+-- Don't want to have to double backslashes in regexes
+set standard_conforming_strings = on;
+-- Test simple quantified backrefs
+select 'bbbbb' ~ '^([bc])\1*$' as t;
+ t 
+---
+ t
+(1 row)
+
+select 'ccc' ~ '^([bc])\1*$' as t;
+ t 
+---
+ t
+(1 row)
+
+select 'xxx' ~ '^([bc])\1*$' as f;
+ f 
+---
+ f
+(1 row)
+
+select 'bbc' ~ '^([bc])\1*$' as f;
+ f 
+---
+ f
+(1 row)
+
+select 'b' ~ '^([bc])\1*$' as t;
+ t 
+---
+ t
+(1 row)
+
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 862f5b20077..8852e0a40fc 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -30,7 +30,7 @@ test: point lseg box path polygon circle date time timetz timestamp timestamptz
 # geometry depends on point, lseg, box, path, polygon and circle
 # horology depends on interval, timetz, timestamp, timestamptz, reltime and abstime
 # ----------
-test: geometry horology oidjoins type_sanity opr_sanity
+test: geometry horology regex oidjoins type_sanity opr_sanity
 
 # ----------
 # These four each depend on the previous one
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 142fc9cf0d1..0bc5df7fe73 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -42,6 +42,7 @@ test: tstypes
 test: comments
 test: geometry
 test: horology
+test: regex
 test: oidjoins
 test: type_sanity
 test: opr_sanity
diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql
new file mode 100644
index 00000000000..242a81ef329
--- /dev/null
+++ b/src/test/regress/sql/regex.sql
@@ -0,0 +1,13 @@
+--
+-- Regular expression tests
+--
+
+-- Don't want to have to double backslashes in regexes
+set standard_conforming_strings = on;
+
+-- Test simple quantified backrefs
+select 'bbbbb' ~ '^([bc])\1*$' as t;
+select 'ccc' ~ '^([bc])\1*$' as t;
+select 'xxx' ~ '^([bc])\1*$' as f;
+select 'bbc' ~ '^([bc])\1*$' as f;
+select 'b' ~ '^([bc])\1*$' as t;