aboutsummaryrefslogtreecommitdiff
path: root/src/test/modules/test_regex/sql/test_regex.sql
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2021-08-17 12:00:02 -0400
committerTom Lane <tgl@sss.pgh.pa.us>2021-08-17 12:00:02 -0400
commit78a843f119ca7d4a6eb173a7ee3bed45889d48d8 (patch)
treef87ef8ebaf0daa5064dd19df9e942bd22d8872b3 /src/test/modules/test_regex/sql/test_regex.sql
parentf576de1db1eeca63180b1ffa4b42b1e360f88577 (diff)
downloadpostgresql-78a843f119ca7d4a6eb173a7ee3bed45889d48d8.tar.gz
postgresql-78a843f119ca7d4a6eb173a7ee3bed45889d48d8.zip
Improve regex compiler's arc moving/copying logic.
The functions moveins(), copyins(), moveouts(), copyouts() are required to preserve the invariant that there are no duplicate arcs in the regex's NFA. Spencer's original implementation of them was O(N^2) since it checked separately for a match to each source arc. In commit 579840ca0 I improved that by adding sort/merge logic to be used if more than a few arcs are to be moved/copied. However, I now realize that that missed a bet. At many call sites, the target state is newly made and cannot have any existing in-arcs (respectively out-arcs) that could be duplicates. So spending any cycles at all on checking for duplicates is wasted effort; in these cases we can just blindly move/copy all the source arcs. Add code paths to do that. It turns out that for copyins()/copyouts(), *all* the call sites have this property, making all the "improved" logic in them flat out unreachable. Perhaps we'll need the full capability again someday, so I just #ifdef'd those paths out rather than removing them entirely. In passing, add a few test cases to improve code coverage in this area as well as in regc_locale.c/regc_pg_locale.c. Discussion: https://postgr.es/m/810272.1629064063@sss.pgh.pa.us
Diffstat (limited to 'src/test/modules/test_regex/sql/test_regex.sql')
-rw-r--r--src/test/modules/test_regex/sql/test_regex.sql32
1 files changed, 32 insertions, 0 deletions
diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql
index 3419564203a..389b8b61b3b 100644
--- a/src/test/modules/test_regex/sql/test_regex.sql
+++ b/src/test/modules/test_regex/sql/test_regex.sql
@@ -304,6 +304,12 @@ select * from test_regex('a[[=x=]]', 'ay', '+Lb');
-- expectNomatch 9.9 &+L {a[[=x=]]} az
select * from test_regex('a[[=x=]]', 'az', '+L');
select * from test_regex('a[[=x=]]', 'az', '+Lb');
+-- expectMatch 9.9b &iL {a[[=Y=]]} ay ay
+select * from test_regex('a[[=Y=]]', 'ay', 'iL');
+select * from test_regex('a[[=Y=]]', 'ay', 'iLb');
+-- expectNomatch 9.9c &L {a[[=Y=]]} ay
+select * from test_regex('a[[=Y=]]', 'ay', 'L');
+select * from test_regex('a[[=Y=]]', 'ay', 'Lb');
-- expectError 9.10 & {a[0-[=x=]]} ERANGE
select * from test_regex('a[0-[=x=]]', '', '');
select * from test_regex('a[0-[=x=]]', '', 'b');
@@ -864,6 +870,12 @@ select * from test_regex('a[b-d]', 'aC', 'iMb');
-- expectNomatch 19.5 &iM {a[^b-d]} aC
select * from test_regex('a[^b-d]', 'aC', 'iM');
select * from test_regex('a[^b-d]', 'aC', 'iMb');
+-- expectMatch 19.6 &iM {a[B-Z]} aC aC
+select * from test_regex('a[B-Z]', 'aC', 'iM');
+select * from test_regex('a[B-Z]', 'aC', 'iMb');
+-- expectNomatch 19.7 &iM {a[^B-Z]} aC
+select * from test_regex('a[^B-Z]', 'aC', 'iM');
+select * from test_regex('a[^B-Z]', 'aC', 'iMb');
-- doing 20 "directors and embedded options"
@@ -1171,6 +1183,8 @@ select * from test_regex('z*4', '123zzzz456', '-');
select * from test_regex('z*?4', '123zzzz456', 'PT');
-- expectMatch 24.13 PT {^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$} {foo/bar/baz} {foo/bar/baz} {foo} {bar} {baz}
select * from test_regex('^([^/]+?)(?:/([^/]+?))(?:/([^/]+?))?$', 'foo/bar/baz', 'PT');
+-- expectMatch 24.14 PRT {^(.+?)(?:/(.+?))(?:/(.+?)\3)?$} {foo/bar/baz/quux} {foo/bar/baz/quux} {foo} {bar/baz/quux} {}
+select * from test_regex('^(.+?)(?:/(.+?))(?:/(.+?)\3)?$', 'foo/bar/baz/quux', 'PRT');
-- doing 25 "mixed quantifiers"
-- # this is very incomplete as yet
@@ -1741,3 +1755,21 @@ select * from test_regex(repeat('x*y*z*', 200), 'x', 'N');
-- regexp {(\Y)+} foo
-- } 1
select * from test_regex('(\Y)+', 'foo', 'LNP');
+
+
+-- and now, tests not from either Spencer or the Tcl project
+
+-- These cases exercise additional code paths in pushfwd()/push()/combine()
+select * from test_regex('a\Y(?=45)', 'a45', 'HLP');
+select * from test_regex('a(?=.)c', 'ac', 'HP');
+select * from test_regex('a(?=.).*(?=3)3*', 'azz33', 'HP');
+select * from test_regex('a(?=\w)\w*(?=.).*', 'az3%', 'HLP');
+
+-- These exercise the bulk-arc-movement paths in moveins() and moveouts();
+-- you may need to make them longer if you change BULK_ARC_OP_USE_SORT()
+select * from test_regex('ABCDEFGHIJKLMNOPQRSTUVWXYZ(?:\w|a|b|c|d|e|f|0|1|2|3|4|5|6|Q)',
+ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ3', 'LP');
+select * from test_regex('ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789(\Y\Y)+',
+ 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789Z', 'LP');
+select * from test_regex('((x|xabcdefghijklmnopqrstuvwxyz0123456789)x*|[^y]z)$',
+ 'az', '');