diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2014-09-23 20:25:31 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2014-09-23 20:26:14 -0400 |
commit | 3694b4d7e1aa02f917f9d18c550fbb49b96efa83 (patch) | |
tree | cac10e69dcf1dfe0cca982a28e67f8a1f1d9aeca /src | |
parent | a564307373089fc81a07bce49236fe2bd66de0fe (diff) | |
download | postgresql-3694b4d7e1aa02f917f9d18c550fbb49b96efa83.tar.gz postgresql-3694b4d7e1aa02f917f9d18c550fbb49b96efa83.zip |
Fix incorrect search for "x?" style matches in creviterdissect().
When the number of allowed iterations is limited (either a "?" quantifier
or a bound expression), the last sub-match has to reach to the end of the
target string. The previous coding here first tried the shortest possible
match (one character, usually) and then gave up and back-tracked if that
didn't work, typically leading to failure to match overall, as shown in
bug #11478 from Christoph Berg. The minimum change to fix that would be to
not decrement k before "goto backtrack"; but that would be a pretty stupid
solution, because we'd laboriously try each possible sub-match length
before finally discovering that only ending at the end can work. Instead,
force the sub-match endpoint limit up to the end for even the first
shortest() call if we cannot have any more sub-matches after this one.
Bug introduced in my rewrite that added the iterdissect logic, commit
173e29aa5deefd9e71c183583ba37805c8102a72. The shortest-first search code
was too closely modeled on the longest-first code, which hasn't got this
issue since it tries a match reaching to the end to start with anyway.
Back-patch to all affected branches.
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/regex/regexec.c | 4 | ||||
-rw-r--r-- | src/test/regress/expected/regex.out | 8 | ||||
-rw-r--r-- | src/test/regress/sql/regex.sql | 4 |
3 files changed, 16 insertions, 0 deletions
diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index 7f41437cb58..5e78f8149c8 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -1190,6 +1190,10 @@ creviterdissect(struct vars * v, (k >= min_matches || min_matches - k < end - limit)) limit++; + /* if this is the last allowed sub-match, it must reach to the end */ + if (k >= max_matches) + limit = end; + /* try to find an endpoint for the k'th sub-match */ endpts[k] = shortest(v, d, endpts[k - 1], limit, end, (chr **) NULL, (int *) NULL); diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out index df39ef937dd..497ddcd4677 100644 --- a/src/test/regress/expected/regex.out +++ b/src/test/regress/expected/regex.out @@ -188,3 +188,11 @@ select regexp_matches('Programmer', '(\w)(.*?\1)', 'g'); {m,m} (2 rows) +-- Test for proper matching of non-greedy iteration (bug #11478) +select regexp_matches('foo/bar/baz', + '^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$', ''); + regexp_matches +---------------- + {foo,bar,baz} +(1 row) + diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql index e5f690263b9..ceb9d699ce9 100644 --- a/src/test/regress/sql/regex.sql +++ b/src/test/regress/sql/regex.sql @@ -46,3 +46,7 @@ select 'a' ~ '((((((a+|)+|)+|)+|)+|)+|)'; -- https://core.tcl.tk/tcl/tktview/6585b21ca8fa6f3678d442b97241fdd43dba2ec0 select 'Programmer' ~ '(\w).*?\1' as t; select regexp_matches('Programmer', '(\w)(.*?\1)', 'g'); + +-- Test for proper matching of non-greedy iteration (bug #11478) +select regexp_matches('foo/bar/baz', + '^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$', ''); |