diff options
-rw-r--r-- | doc/src/sgml/func.sgml | 28 | ||||
-rw-r--r-- | src/backend/regex/re_syntax.n | 7 | ||||
-rw-r--r-- | src/backend/regex/regcomp.c | 6 | ||||
-rw-r--r-- | src/test/modules/test_regex/expected/test_regex.out | 12 |
4 files changed, 36 insertions, 17 deletions
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 860ae118264..c5048a19988 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -6323,32 +6323,38 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', '\s*') AS foo; <tbody> <row> <entry> <literal>\d</literal> </entry> - <entry> <literal>[[:digit:]]</literal> </entry> + <entry> matches any digit, like + <literal>[[:digit:]]</literal> </entry> </row> <row> <entry> <literal>\s</literal> </entry> - <entry> <literal>[[:space:]]</literal> </entry> + <entry> matches any whitespace character, like + <literal>[[:space:]]</literal> </entry> </row> <row> <entry> <literal>\w</literal> </entry> - <entry> <literal>[[:word:]]</literal> </entry> + <entry> matches any word character, like + <literal>[[:word:]]</literal> </entry> </row> <row> <entry> <literal>\D</literal> </entry> - <entry> <literal>[^[:digit:]]</literal> </entry> + <entry> matches any non-digit, like + <literal>[^[:digit:]]</literal> </entry> </row> <row> <entry> <literal>\S</literal> </entry> - <entry> <literal>[^[:space:]]</literal> </entry> + <entry> matches any non-whitespace character, like + <literal>[^[:space:]]</literal> </entry> </row> <row> <entry> <literal>\W</literal> </entry> - <entry> <literal>[^[:word:]]</literal> </entry> + <entry> matches any non-word character, like + <literal>[^[:word:]]</literal> </entry> </row> </tbody> </tgroup> @@ -6813,14 +6819,20 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); If newline-sensitive matching is specified, <literal>.</literal> and bracket expressions using <literal>^</literal> will never match the newline character - (so that matches will never cross newlines unless the RE - explicitly arranges it) + (so that matches will not cross lines unless the RE + explicitly includes a newline) and <literal>^</literal> and <literal>$</literal> will match the empty string after and before a newline respectively, in addition to matching at beginning and end of string respectively. But the ARE escapes <literal>\A</literal> and <literal>\Z</literal> continue to match beginning or end of string <emphasis>only</emphasis>. + Also, the character class shorthands <literal>\D</literal> + and <literal>\W</literal> will match a newline regardless of this mode. + (Before <productname>PostgreSQL</productname> 14, they did not match + newlines when in newline-sensitive mode. + Write <literal>[^[:digit:]]</literal> + or <literal>[^[:word:]]</literal> to get the old behavior.) </para> <para> diff --git a/src/backend/regex/re_syntax.n b/src/backend/regex/re_syntax.n index 1afaa7cce7c..93830fd1000 100644 --- a/src/backend/regex/re_syntax.n +++ b/src/backend/regex/re_syntax.n @@ -804,7 +804,7 @@ and bracket expressions using \fB^\fR will never match the newline character (so that matches will never cross newlines unless the RE -explicitly arranges it) +explicitly includes a newline) and \fB^\fR and @@ -817,6 +817,11 @@ ARE and \fB\eZ\fR continue to match beginning or end of string \fIonly\fR. +Also, the character class shorthands +\fB\eD\fR +and +\fB\eW\fR +will match a newline regardless of this mode. .PP If partial newline-sensitive matching is specified, this affects \fB.\fR diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 7b77a29136c..d3540fdd0f3 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -1407,10 +1407,6 @@ charclasscomplement(struct vars *v, /* build arcs for char class; this may cause color splitting */ subcolorcvec(v, cv, cstate, cstate); - - /* in NLSTOP mode, ensure newline is not part of the result set */ - if (v->cflags & REG_NLSTOP) - newarc(v->nfa, PLAIN, v->nlcolor, cstate, cstate); NOERR(); /* clean up any subcolors in the arc set */ @@ -1612,6 +1608,8 @@ cbracket(struct vars *v, NOERR(); bracket(v, left, right); + + /* in NLSTOP mode, ensure newline is not part of the result set */ if (v->cflags & REG_NLSTOP) newarc(v->nfa, PLAIN, v->nlcolor, left, right); NOERR(); diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out index 92154b6d28a..5d993f40c25 100644 --- a/src/test/modules/test_regex/expected/test_regex.out +++ b/src/test/modules/test_regex/expected/test_regex.out @@ -2144,7 +2144,8 @@ select * from test_regex('\D+', E'abc\ndef345', 'nLP'); test_regex ------------------------------- {0,REG_UNONPOSIX,REG_ULOCALE} - {abc} + {"abc + + def"} (2 rows) select * from test_regex('[\D]+', E'abc\ndef345', 'LPE'); @@ -2159,7 +2160,8 @@ select * from test_regex('[\D]+', E'abc\ndef345', 'nLPE'); test_regex ---------------------------------------- {0,REG_UBBS,REG_UNONPOSIX,REG_ULOCALE} - {abc} + {"abc + + def"} (2 rows) select * from test_regex('\w+', E'abc_012\ndef', 'LP'); @@ -2202,7 +2204,8 @@ select * from test_regex('\W+', E'***\n@@@___', 'nLP'); test_regex ------------------------------- {0,REG_UNONPOSIX,REG_ULOCALE} - {***} + {"*** + + @@@"} (2 rows) select * from test_regex('[\W]+', E'***\n@@@___', 'LPE'); @@ -2217,7 +2220,8 @@ select * from test_regex('[\W]+', E'***\n@@@___', 'nLPE'); test_regex ---------------------------------------- {0,REG_UBBS,REG_UNONPOSIX,REG_ULOCALE} - {***} + {"*** + + @@@"} (2 rows) -- doing 13 "escapes" |