aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPeter Eisentraut <peter@eisentraut.org>2020-06-29 11:04:42 +0200
committerPeter Eisentraut <peter@eisentraut.org>2020-06-29 11:05:00 +0200
commit78c887679d7632c1211f85eb95723f3226bf1b46 (patch)
tree7c333d0f5f4c6238ea74bc44038564a179882107
parentaafefb4dcbf79e8cb1439e888a9cdb3dfefa7657 (diff)
downloadpostgresql-78c887679d7632c1211f85eb95723f3226bf1b46.tar.gz
postgresql-78c887679d7632c1211f85eb95723f3226bf1b46.zip
Add current substring regular expression syntax
SQL:1999 had syntax SUBSTRING(text FROM pattern FOR escapechar) but this was replaced in SQL:2003 by the more clear SUBSTRING(text SIMILAR pattern ESCAPE escapechar) but this was never implemented in PostgreSQL. This patch adds that new syntax as an alternative in the parser, and updates documentation and tests to indicate that this is the preferred alternative now. Reviewed-by: Pavel Stehule <pavel.stehule@gmail.com> Reviewed-by: Vik Fearing <vik@postgresfriends.org> Reviewed-by: Fabien COELHO <coelho@cri.ensmp.fr> Discussion: https://www.postgresql.org/message-id/flat/a15db31c-d0f8-8ce0-9039-578a31758adb%402ndquadrant.com
-rw-r--r--contrib/citext/expected/citext.out2
-rw-r--r--contrib/citext/expected/citext_1.out2
-rw-r--r--contrib/citext/sql/citext.sql2
-rw-r--r--doc/src/sgml/func.sgml20
-rw-r--r--src/backend/catalog/information_schema.sql2
-rw-r--r--src/backend/parser/gram.y26
-rw-r--r--src/test/regress/expected/strings.out31
-rw-r--r--src/test/regress/sql/strings.sql26
8 files changed, 77 insertions, 34 deletions
diff --git a/contrib/citext/expected/citext.out b/contrib/citext/expected/citext.out
index 96800be9c03..ec99aaed5dc 100644
--- a/contrib/citext/expected/citext.out
+++ b/contrib/citext/expected/citext.out
@@ -1602,7 +1602,7 @@ SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
t
(1 row)
-SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t;
+SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
t
---
t
diff --git a/contrib/citext/expected/citext_1.out b/contrib/citext/expected/citext_1.out
index 33e3676d3c4..75fd08b7cc4 100644
--- a/contrib/citext/expected/citext_1.out
+++ b/contrib/citext/expected/citext_1.out
@@ -1602,7 +1602,7 @@ SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
t
(1 row)
-SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t;
+SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
t
---
t
diff --git a/contrib/citext/sql/citext.sql b/contrib/citext/sql/citext.sql
index 261b73cfa6c..10232f5a9f4 100644
--- a/contrib/citext/sql/citext.sql
+++ b/contrib/citext/sql/citext.sql
@@ -564,7 +564,7 @@ SELECT substring('alphabet'::citext, 3, 2) = 'ph' AS t;
SELECT substring('Thomas'::citext from 2 for 3) = 'hom' AS t;
SELECT substring('Thomas'::citext from 2) = 'homas' AS t;
SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
-SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t;
+SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
SELECT trim(' trim '::citext) = 'trim' AS t;
SELECT trim('xxxxxtrimxxxx'::citext, 'x'::citext) = 'trim' AS t;
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 7119f0b2ca5..f0658565350 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -2669,15 +2669,21 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
<row>
<entry role="func_table_entry"><para role="func_signature">
+ <function>substring</function> ( <parameter>string</parameter> <type>text</type> <literal>SIMILAR</literal> <parameter>pattern</parameter> <type>text</type> <literal>ESCAPE</literal> <parameter>escape</parameter> <type>text</type> )
+ <returnvalue>text</returnvalue>
+ </para>
+ <para role="func_signature">
<function>substring</function> ( <parameter>string</parameter> <type>text</type> <literal>FROM</literal> <parameter>pattern</parameter> <type>text</type> <literal>FOR</literal> <parameter>escape</parameter> <type>text</type> )
<returnvalue>text</returnvalue>
</para>
<para>
Extracts substring matching <acronym>SQL</acronym> regular expression;
- see <xref linkend="functions-similarto-regexp"/>.
+ see <xref linkend="functions-similarto-regexp"/>. The first form has
+ been specified since SQL:2003; the second form was only in SQL:1999
+ and should be considered obsolete.
</para>
<para>
- <literal>substring('Thomas' from '%#"o_a#"_' for '#')</literal>
+ <literal>substring('Thomas' similar '%#"o_a#"_' escape '#')</literal>
<returnvalue>oma</returnvalue>
</para></entry>
</row>
@@ -5160,7 +5166,11 @@ cast(-44 as bit(12)) <lineannotation>111111010100</lineannotation>
The <function>substring</function> function with three parameters
provides extraction of a substring that matches an SQL
regular expression pattern. The function can be written according
- to SQL99 syntax:
+ to standard SQL syntax:
+<synopsis>
+substring(<replaceable>string</replaceable> similar <replaceable>pattern</replaceable> escape <replaceable>escape-character</replaceable>)
+</synopsis>
+ or using the now obsolete SQL:1999 syntax:
<synopsis>
substring(<replaceable>string</replaceable> from <replaceable>pattern</replaceable> for <replaceable>escape-character</replaceable>)
</synopsis>
@@ -5201,8 +5211,8 @@ substring(<replaceable>string</replaceable>, <replaceable>pattern</replaceable>,
<para>
Some examples, with <literal>#&quot;</literal> delimiting the return string:
<programlisting>
-substring('foobar' from '%#"o_b#"%' for '#') <lineannotation>oob</lineannotation>
-substring('foobar' from '#"o_b#"%' for '#') <lineannotation>NULL</lineannotation>
+substring('foobar' similar '%#"o_b#"%' escape '#') <lineannotation>oob</lineannotation>
+substring('foobar' similar '#"o_b#"%' escape '#') <lineannotation>NULL</lineannotation>
</programlisting>
</para>
</sect2>
diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql
index 3e07fb107eb..5ab47e77431 100644
--- a/src/backend/catalog/information_schema.sql
+++ b/src/backend/catalog/information_schema.sql
@@ -182,7 +182,7 @@ CREATE FUNCTION _pg_interval_type(typid oid, mod int4) RETURNS text
AS
$$SELECT
CASE WHEN $1 IN (1186) /* interval */
- THEN pg_catalog.upper(substring(pg_catalog.format_type($1, $2) from 'interval[()0-9]* #"%#"' for '#'))
+ THEN pg_catalog.upper(substring(pg_catalog.format_type($1, $2) similar 'interval[()0-9]* #"%#"' escape '#'))
ELSE null
END$$;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 1a843049f05..4ff35095b85 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -14451,7 +14451,27 @@ position_list:
| /*EMPTY*/ { $$ = NIL; }
;
-/* SUBSTRING() arguments */
+/*
+ * SUBSTRING() arguments
+ *
+ * Note that SQL:1999 has both
+ *
+ * text FROM int FOR int
+ *
+ * and
+ *
+ * text FROM pattern FOR escape
+ *
+ * In the parser we map them both to a call to the substring() function and
+ * rely on type resolution to pick the right one.
+ *
+ * In SQL:2003, the second variant was changed to
+ *
+ * text SIMILAR pattern ESCAPE escape
+ *
+ * We could in theory map that to a different function internally, but
+ * since we still support the SQL:1999 version, we don't.
+ */
substr_list:
a_expr FROM a_expr FOR a_expr
{
@@ -14483,6 +14503,10 @@ substr_list:
makeTypeCast($3,
SystemTypeName("int4"), -1));
}
+ | a_expr SIMILAR a_expr ESCAPE a_expr
+ {
+ $$ = list_make3($1, $3, $5);
+ }
/*
* We also want to support generic substring functions that
* accept the usual generic list of arguments.
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 6e98d183f61..8c034c9599f 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -397,6 +397,13 @@ SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
(1 row)
-- T581 regular expression substring (with SQL's bizarre regexp syntax)
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"(b_d)#"%' ESCAPE '#') AS "bcd";
+ bcd
+-----
+ bcd
+(1 row)
+
+-- obsolete SQL99 syntax
SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
bcd
-----
@@ -404,75 +411,75 @@ SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
(1 row)
-- No match should return NULL
-SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR '#"(b_d)#"%' ESCAPE '#') IS NULL AS "True";
True
------
t
(1 row)
-- Null inputs should return NULL
-SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR '%' ESCAPE NULL) IS NULL AS "True";
True
------
t
(1 row)
-SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True";
+SELECT SUBSTRING(NULL SIMILAR '%' ESCAPE '#') IS NULL AS "True";
True
------
t
(1 row)
-SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR NULL ESCAPE '#') IS NULL AS "True";
True
------
t
(1 row)
-- The first and last parts should act non-greedy
-SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"g' ESCAPE '#') AS "bcdef";
bcdef
-------
bcdef
(1 row)
-SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*' ESCAPE '#') AS "abcdefg";
abcdefg
---------
abcdefg
(1 row)
-- Vertical bar in any part affects only that part
-SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a|b#"%#"g' ESCAPE '#') AS "bcdef";
bcdef
-------
bcdef
(1 row)
-SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"x|g' ESCAPE '#') AS "bcdef";
bcdef
-------
bcdef
(1 row)
-SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%|ab#"g' ESCAPE '#') AS "bcdef";
bcdef
-------
bcdef
(1 row)
-- Can't have more than two part separators
-SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*#"x' ESCAPE '#') AS "error";
ERROR: SQL regular expression may not contain more than two escape-double-quote separators
CONTEXT: SQL function "substring" statement 1
-- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty
-SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%g' ESCAPE '#') AS "bcdefg";
bcdefg
--------
bcdefg
(1 row)
-SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a%g' ESCAPE '#') AS "abcdefg";
abcdefg
---------
abcdefg
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 3e89159a4fd..14901a26923 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -132,31 +132,33 @@ SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890";
SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
-- T581 regular expression substring (with SQL's bizarre regexp syntax)
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"(b_d)#"%' ESCAPE '#') AS "bcd";
+-- obsolete SQL99 syntax
SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
-- No match should return NULL
-SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR '#"(b_d)#"%' ESCAPE '#') IS NULL AS "True";
-- Null inputs should return NULL
-SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True";
-SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True";
-SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR '%' ESCAPE NULL) IS NULL AS "True";
+SELECT SUBSTRING(NULL SIMILAR '%' ESCAPE '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR NULL ESCAPE '#') IS NULL AS "True";
-- The first and last parts should act non-greedy
-SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef";
-SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"g' ESCAPE '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*' ESCAPE '#') AS "abcdefg";
-- Vertical bar in any part affects only that part
-SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef";
-SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef";
-SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a|b#"%#"g' ESCAPE '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"x|g' ESCAPE '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%|ab#"g' ESCAPE '#') AS "bcdef";
-- Can't have more than two part separators
-SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*#"x' ESCAPE '#') AS "error";
-- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty
-SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg";
-SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%g' ESCAPE '#') AS "bcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a%g' ESCAPE '#') AS "abcdefg";
-- substring() with just two arguments is not allowed by SQL spec;
-- we accept it, but we interpret the pattern as a POSIX regexp not SQL