2 files changed, 58 insertions, 0 deletions
diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml
index 20cdfabd7bf..b6023fa459e 100644
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -934,6 +934,16 @@ CREATE COLLATION ignore_accents (provider = icu, locale = 'und-u-ks-level1-kc-tr
      such as pattern matching operations.  Therefore, they should be used
      only in cases where they are specifically wanted.
     </para>
+
+    <tip>
+     <para>
+      To deal with text in different Unicode normalization forms, it is also
+      an option to use the functions/expressions
+      <function>normalize</function> and <literal>is normalized</literal> to
+      preprocess or check the strings, instead of using nondeterministic
+      collations.  There are different trade-offs for each approach.
+     </para>
+    </tip>
    </sect3>
   </sect2>
  </sect1>
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index cbfd2a762e4..a329f61f339 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -1563,6 +1563,30 @@
       <row>
        <entry>
         <indexterm>
+         <primary>normalized</primary>
+        </indexterm>
+        <indexterm>
+         <primary>Unicode normalization</primary>
+        </indexterm>
+        <literal><parameter>string</parameter> is <optional>not</optional> <optional><parameter>form</parameter></optional> normalized</literal>
+       </entry>
+       <entry><type>boolean</type></entry>
+       <entry>
+        Checks whether the string is in the specified Unicode normalization
+        form.  The optional parameter specifies the form:
+        <literal>NFC</literal> (default), <literal>NFD</literal>,
+        <literal>NFKC</literal>, <literal>NFKD</literal>.  This expression can
+        only be used if the server encoding is <literal>UTF8</literal>.  Note
+        that checking for normalization using this expression is often faster
+        than normalizing possibly already normalized strings.
+       </entry>
+       <entry><literal>U&amp;'\0061\0308bc' IS NFD NORMALIZED</literal></entry>
+       <entry><literal>true</literal></entry>
+      </row>
+
+      <row>
+       <entry>
+        <indexterm>
          <primary>bit_length</primary>
         </indexterm>
         <literal><function>bit_length(<parameter>string</parameter>)</function></literal>
@@ -1613,6 +1637,30 @@
       <row>
        <entry>
         <indexterm>
+         <primary>normalize</primary>
+        </indexterm>
+        <indexterm>
+         <primary>Unicode normalization</primary>
+        </indexterm>
+        <literal><function>normalize(<parameter>string</parameter> <type>text</type>
+        <optional>, <parameter>form</parameter> </optional>)</function></literal>
+       </entry>
+       <entry><type>text</type></entry>
+       <entry>
+        Converts the string in the first argument to the specified Unicode
+        normalization form.  The optional second argument specifies the form
+        as an identifier: <literal>NFC</literal> (default),
+        <literal>NFD</literal>, <literal>NFKC</literal>,
+        <literal>NFKD</literal>.  This function can only be used if the server
+        encoding is <literal>UTF8</literal>.
+       </entry>
+       <entry><literal>normalize(U&amp;'\0061\0308bc', NFC)</literal></entry>
+       <entry><literal>U&amp;'\00E4bc'</literal></entry>
+      </row>
+
+      <row>
+       <entry>
+        <indexterm>
          <primary>octet_length</primary>
         </indexterm>
         <literal><function>octet_length(<parameter>string</parameter>)</function></literal>