aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--contrib/citext/expected/citext.out2
-rw-r--r--contrib/citext/expected/citext_1.out2
-rw-r--r--contrib/citext/sql/citext.sql2
-rw-r--r--doc/src/sgml/func.sgml20
-rw-r--r--src/backend/catalog/information_schema.sql2
-rw-r--r--src/backend/parser/gram.y26
-rw-r--r--src/test/regress/expected/strings.out31
-rw-r--r--src/test/regress/sql/strings.sql26
8 files changed, 77 insertions, 34 deletions
diff --git a/contrib/citext/expected/citext.out b/contrib/citext/expected/citext.out
index 96800be9c03..ec99aaed5dc 100644
--- a/contrib/citext/expected/citext.out
+++ b/contrib/citext/expected/citext.out
@@ -1602,7 +1602,7 @@ SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
t
(1 row)
-SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t;
+SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
t
---
t
diff --git a/contrib/citext/expected/citext_1.out b/contrib/citext/expected/citext_1.out
index 33e3676d3c4..75fd08b7cc4 100644
--- a/contrib/citext/expected/citext_1.out
+++ b/contrib/citext/expected/citext_1.out
@@ -1602,7 +1602,7 @@ SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
t
(1 row)
-SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t;
+SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
t
---
t
diff --git a/contrib/citext/sql/citext.sql b/contrib/citext/sql/citext.sql
index 261b73cfa6c..10232f5a9f4 100644
--- a/contrib/citext/sql/citext.sql
+++ b/contrib/citext/sql/citext.sql
@@ -564,7 +564,7 @@ SELECT substring('alphabet'::citext, 3, 2) = 'ph' AS t;
SELECT substring('Thomas'::citext from 2 for 3) = 'hom' AS t;
SELECT substring('Thomas'::citext from 2) = 'homas' AS t;
SELECT substring('Thomas'::citext from '...$') = 'mas' AS t;
-SELECT substring('Thomas'::citext from '%#"o_a#"_' for '#') = 'oma' AS t;
+SELECT substring('Thomas'::citext similar '%#"o_a#"_' escape '#') = 'oma' AS t;
SELECT trim(' trim '::citext) = 'trim' AS t;
SELECT trim('xxxxxtrimxxxx'::citext, 'x'::citext) = 'trim' AS t;
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 7119f0b2ca5..f0658565350 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -2669,15 +2669,21 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
<row>
<entry role="func_table_entry"><para role="func_signature">
+ <function>substring</function> ( <parameter>string</parameter> <type>text</type> <literal>SIMILAR</literal> <parameter>pattern</parameter> <type>text</type> <literal>ESCAPE</literal> <parameter>escape</parameter> <type>text</type> )
+ <returnvalue>text</returnvalue>
+ </para>
+ <para role="func_signature">
<function>substring</function> ( <parameter>string</parameter> <type>text</type> <literal>FROM</literal> <parameter>pattern</parameter> <type>text</type> <literal>FOR</literal> <parameter>escape</parameter> <type>text</type> )
<returnvalue>text</returnvalue>
</para>
<para>
Extracts substring matching <acronym>SQL</acronym> regular expression;
- see <xref linkend="functions-similarto-regexp"/>.
+ see <xref linkend="functions-similarto-regexp"/>. The first form has
+ been specified since SQL:2003; the second form was only in SQL:1999
+ and should be considered obsolete.
</para>
<para>
- <literal>substring('Thomas' from '%#"o_a#"_' for '#')</literal>
+ <literal>substring('Thomas' similar '%#"o_a#"_' escape '#')</literal>
<returnvalue>oma</returnvalue>
</para></entry>
</row>
@@ -5160,7 +5166,11 @@ cast(-44 as bit(12)) <lineannotation>111111010100</lineannotation>
The <function>substring</function> function with three parameters
provides extraction of a substring that matches an SQL
regular expression pattern. The function can be written according
- to SQL99 syntax:
+ to standard SQL syntax:
+<synopsis>
+substring(<replaceable>string</replaceable> similar <replaceable>pattern</replaceable> escape <replaceable>escape-character</replaceable>)
+</synopsis>
+ or using the now obsolete SQL:1999 syntax:
<synopsis>
substring(<replaceable>string</replaceable> from <replaceable>pattern</replaceable> for <replaceable>escape-character</replaceable>)
</synopsis>
@@ -5201,8 +5211,8 @@ substring(<replaceable>string</replaceable>, <replaceable>pattern</replaceable>,
<para>
Some examples, with <literal>#&quot;</literal> delimiting the return string:
<programlisting>
-substring('foobar' from '%#"o_b#"%' for '#') <lineannotation>oob</lineannotation>
-substring('foobar' from '#"o_b#"%' for '#') <lineannotation>NULL</lineannotation>
+substring('foobar' similar '%#"o_b#"%' escape '#') <lineannotation>oob</lineannotation>
+substring('foobar' similar '#"o_b#"%' escape '#') <lineannotation>NULL</lineannotation>
</programlisting>
</para>
</sect2>
diff --git a/src/backend/catalog/information_schema.sql b/src/backend/catalog/information_schema.sql
index 3e07fb107eb..5ab47e77431 100644
--- a/src/backend/catalog/information_schema.sql
+++ b/src/backend/catalog/information_schema.sql
@@ -182,7 +182,7 @@ CREATE FUNCTION _pg_interval_type(typid oid, mod int4) RETURNS text
AS
$$SELECT
CASE WHEN $1 IN (1186) /* interval */
- THEN pg_catalog.upper(substring(pg_catalog.format_type($1, $2) from 'interval[()0-9]* #"%#"' for '#'))
+ THEN pg_catalog.upper(substring(pg_catalog.format_type($1, $2) similar 'interval[()0-9]* #"%#"' escape '#'))
ELSE null
END$$;
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index 1a843049f05..4ff35095b85 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -14451,7 +14451,27 @@ position_list:
| /*EMPTY*/ { $$ = NIL; }
;
-/* SUBSTRING() arguments */
+/*
+ * SUBSTRING() arguments
+ *
+ * Note that SQL:1999 has both
+ *
+ * text FROM int FOR int
+ *
+ * and
+ *
+ * text FROM pattern FOR escape
+ *
+ * In the parser we map them both to a call to the substring() function and
+ * rely on type resolution to pick the right one.
+ *
+ * In SQL:2003, the second variant was changed to
+ *
+ * text SIMILAR pattern ESCAPE escape
+ *
+ * We could in theory map that to a different function internally, but
+ * since we still support the SQL:1999 version, we don't.
+ */
substr_list:
a_expr FROM a_expr FOR a_expr
{
@@ -14483,6 +14503,10 @@ substr_list:
makeTypeCast($3,
SystemTypeName("int4"), -1));
}
+ | a_expr SIMILAR a_expr ESCAPE a_expr
+ {
+ $$ = list_make3($1, $3, $5);
+ }
/*
* We also want to support generic substring functions that
* accept the usual generic list of arguments.
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 6e98d183f61..8c034c9599f 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -397,6 +397,13 @@ SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
(1 row)
-- T581 regular expression substring (with SQL's bizarre regexp syntax)
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"(b_d)#"%' ESCAPE '#') AS "bcd";
+ bcd
+-----
+ bcd
+(1 row)
+
+-- obsolete SQL99 syntax
SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
bcd
-----
@@ -404,75 +411,75 @@ SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
(1 row)
-- No match should return NULL
-SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR '#"(b_d)#"%' ESCAPE '#') IS NULL AS "True";
True
------
t
(1 row)
-- Null inputs should return NULL
-SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR '%' ESCAPE NULL) IS NULL AS "True";
True
------
t
(1 row)
-SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True";
+SELECT SUBSTRING(NULL SIMILAR '%' ESCAPE '#') IS NULL AS "True";
True
------
t
(1 row)
-SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR NULL ESCAPE '#') IS NULL AS "True";
True
------
t
(1 row)
-- The first and last parts should act non-greedy
-SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"g' ESCAPE '#') AS "bcdef";
bcdef
-------
bcdef
(1 row)
-SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*' ESCAPE '#') AS "abcdefg";
abcdefg
---------
abcdefg
(1 row)
-- Vertical bar in any part affects only that part
-SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a|b#"%#"g' ESCAPE '#') AS "bcdef";
bcdef
-------
bcdef
(1 row)
-SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"x|g' ESCAPE '#') AS "bcdef";
bcdef
-------
bcdef
(1 row)
-SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%|ab#"g' ESCAPE '#') AS "bcdef";
bcdef
-------
bcdef
(1 row)
-- Can't have more than two part separators
-SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*#"x' ESCAPE '#') AS "error";
ERROR: SQL regular expression may not contain more than two escape-double-quote separators
CONTEXT: SQL function "substring" statement 1
-- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty
-SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%g' ESCAPE '#') AS "bcdefg";
bcdefg
--------
bcdefg
(1 row)
-SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a%g' ESCAPE '#') AS "abcdefg";
abcdefg
---------
abcdefg
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 3e89159a4fd..14901a26923 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -132,31 +132,33 @@ SELECT SUBSTRING('1234567890' FROM 3) = '34567890' AS "34567890";
SELECT SUBSTRING('1234567890' FROM 4 FOR 3) = '456' AS "456";
-- T581 regular expression substring (with SQL's bizarre regexp syntax)
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"(b_d)#"%' ESCAPE '#') AS "bcd";
+-- obsolete SQL99 syntax
SELECT SUBSTRING('abcdefg' FROM 'a#"(b_d)#"%' FOR '#') AS "bcd";
-- No match should return NULL
-SELECT SUBSTRING('abcdefg' FROM '#"(b_d)#"%' FOR '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR '#"(b_d)#"%' ESCAPE '#') IS NULL AS "True";
-- Null inputs should return NULL
-SELECT SUBSTRING('abcdefg' FROM '%' FOR NULL) IS NULL AS "True";
-SELECT SUBSTRING(NULL FROM '%' FOR '#') IS NULL AS "True";
-SELECT SUBSTRING('abcdefg' FROM NULL FOR '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR '%' ESCAPE NULL) IS NULL AS "True";
+SELECT SUBSTRING(NULL SIMILAR '%' ESCAPE '#') IS NULL AS "True";
+SELECT SUBSTRING('abcdefg' SIMILAR NULL ESCAPE '#') IS NULL AS "True";
-- The first and last parts should act non-greedy
-SELECT SUBSTRING('abcdefg' FROM 'a#"%#"g' FOR '#') AS "bcdef";
-SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*' FOR '#') AS "abcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"g' ESCAPE '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*' ESCAPE '#') AS "abcdefg";
-- Vertical bar in any part affects only that part
-SELECT SUBSTRING('abcdefg' FROM 'a|b#"%#"g' FOR '#') AS "bcdef";
-SELECT SUBSTRING('abcdefg' FROM 'a#"%#"x|g' FOR '#') AS "bcdef";
-SELECT SUBSTRING('abcdefg' FROM 'a#"%|ab#"g' FOR '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a|b#"%#"g' ESCAPE '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%#"x|g' ESCAPE '#') AS "bcdef";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%|ab#"g' ESCAPE '#') AS "bcdef";
-- Can't have more than two part separators
-SELECT SUBSTRING('abcdefg' FROM 'a*#"%#"g*#"x' FOR '#') AS "error";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a*#"%#"g*#"x' ESCAPE '#') AS "error";
-- Postgres extension: with 0 or 1 separator, assume parts 1 and 3 are empty
-SELECT SUBSTRING('abcdefg' FROM 'a#"%g' FOR '#') AS "bcdefg";
-SELECT SUBSTRING('abcdefg' FROM 'a%g' FOR '#') AS "abcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a#"%g' ESCAPE '#') AS "bcdefg";
+SELECT SUBSTRING('abcdefg' SIMILAR 'a%g' ESCAPE '#') AS "abcdefg";
-- substring() with just two arguments is not allowed by SQL spec;
-- we accept it, but we interpret the pattern as a POSIX regexp not SQL