aboutsummaryrefslogtreecommitdiff
path: root/doc/src
diff options
context:
space:
mode:
Diffstat (limited to 'doc/src')
-rw-r--r--doc/src/sgml/charset.sgml10
-rw-r--r--doc/src/sgml/func.sgml48
2 files changed, 58 insertions, 0 deletions
diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml
index 20cdfabd7bf..b6023fa459e 100644
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -934,6 +934,16 @@ CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-tr
such as pattern matching operations. Therefore, they should be used
only in cases where they are specifically wanted.
</para>
+
+ <tip>
+ <para>
+ To deal with text in different Unicode normalization forms, it is also
+ an option to use the functions/expressions
+ <function>normalize</function> and <literal>is normalized</literal> to
+ preprocess or check the strings, instead of using nondeterministic
+ collations. There are different trade-offs for each approach.
+ </para>
+ </tip>
</sect3>
</sect2>
</sect1>
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index cbfd2a762e4..a329f61f339 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1563,6 +1563,30 @@
<row>
<entry>
<indexterm>
+ <primary>normalized</primary>
+ </indexterm>
+ <indexterm>
+ <primary>Unicode normalization</primary>
+ </indexterm>
+ <literal><parameter>string</parameter> is <optional>not</optional> <optional><parameter>form</parameter></optional> normalized</literal>
+ </entry>
+ <entry><type>boolean</type></entry>
+ <entry>
+ Checks whether the string is in the specified Unicode normalization
+ form. The optional parameter specifies the form:
+ <literal>NFC</literal> (default), <literal>NFD</literal>,
+ <literal>NFKC</literal>, <literal>NFKD</literal>. This expression can
+ only be used if the server encoding is <literal>UTF8</literal>. Note
+ that checking for normalization using this expression is often faster
+ than normalizing possibly already normalized strings.
+ </entry>
+ <entry><literal>U&amp;'\0061\0308bc' IS NFD NORMALIZED</literal></entry>
+ <entry><literal>true</literal></entry>
+ </row>
+
+ <row>
+ <entry>
+ <indexterm>
<primary>bit_length</primary>
</indexterm>
<literal><function>bit_length(<parameter>string</parameter>)</function></literal>
@@ -1613,6 +1637,30 @@
<row>
<entry>
<indexterm>
+ <primary>normalize</primary>
+ </indexterm>
+ <indexterm>
+ <primary>Unicode normalization</primary>
+ </indexterm>
+ <literal><function>normalize(<parameter>string</parameter> <type>text</type>
+ <optional>, <parameter>form</parameter> </optional>)</function></literal>
+ </entry>
+ <entry><type>text</type></entry>
+ <entry>
+ Converts the string in the first argument to the specified Unicode
+ normalization form. The optional second argument specifies the form
+ as an identifier: <literal>NFC</literal> (default),
+ <literal>NFD</literal>, <literal>NFKC</literal>,
+ <literal>NFKD</literal>. This function can only be used if the server
+ encoding is <literal>UTF8</literal>.
+ </entry>
+ <entry><literal>normalize(U&amp;'\0061\0308bc', NFC)</literal></entry>
+ <entry><literal>U&amp;'\00E4bc'</literal></entry>
+ </row>
+
+ <row>
+ <entry>
+ <indexterm>
<primary>octet_length</primary>
</indexterm>
<literal><function>octet_length(<parameter>string</parameter>)</function></literal>