Assorted editing for collation documentation.

I made a pass over this to familiarize myself with the feature, and found some things that could be improved.
author: Tom Lane <tgl@sss.pgh.pa.us> 2011-03-08 17:10:34 -0500
committer: Tom Lane <tgl@sss.pgh.pa.us> 2011-03-08 17:10:59 -0500
commit: a612b17120fc011cefcdec6948b1cc8543529d06 (patch)
tree: 4d44863e18e5de4b3d9fee9bcc1020f24bc1074b
parent: 4502c8e1c06164adb7be526096e91e04d1844d36 (diff)
download: postgresql-a612b17120fc011cefcdec6948b1cc8543529d06.tar.gz
postgresql-a612b17120fc011cefcdec6948b1cc8543529d06.zip
4 files changed, 118 insertions, 88 deletions
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index cc0cbe134c4..297ad532080 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -1128,8 +1128,8 @@
       <entry><type>oid</type></entry>
       <entry><literal><link linkend="catalog-pg-collation"><structname>pg_collation</structname></link>.oid</literal></entry>
       <entry>
-       The defined collation of the column, zero if the column does
-       not have a collatable type.
+       The defined collation of the column, or zero if the column is
+       not of a collatable datatype.
       </entry>
      </row>
 
@@ -2088,7 +2088,7 @@
    The catalog <structname>pg_collation</structname> describes the
    available collations, which are essentially mappings from an SQL
    name to operating system locale categories.
-   See <xref linkend="locale"> for more information.
+   See <xref linkend="collation"> for more information.
   </para>
 
   <table>
@@ -2132,38 +2132,48 @@
       <entry><structfield>collencoding</structfield></entry>
       <entry><type>int4</type></entry>
       <entry></entry>
-      <entry>
-       Encoding to which the collation is applicable.  SQL-level
-       commands such as <command>ALTER COLLATION</command> only
-       operate on the collation belonging to the current database
-       encoding.  But this field is necessary because when this
-       catalog is initialized, the encoding of future databases is not
-       yet known.  For practical purposes, collations that do not
-       match the current database encoding should be considered
-       invalid or invisible.  It could be useful, however, to create
-       collations whose encoding does not match the database encoding
-       in template databases.  This would currently have to be done
-       manually.
-      </entry>
+      <entry>Encoding to which the collation is applicable</entry>
      </row>
 
      <row>
       <entry><structfield>collcollate</structfield></entry>
       <entry><type>name</type></entry>
       <entry></entry>
-      <entry>LC_COLLATE for this collation object</entry>
+      <entry><symbol>LC_COLLATE</> for this collation object</entry>
      </row>
 
      <row>
       <entry><structfield>collctype</structfield></entry>
       <entry><type>name</type></entry>
       <entry></entry>
-      <entry>LC_CTYPE for this collation object</entry>
+      <entry><symbol>LC_CTYPE</> for this collation object</entry>
      </row>
     </tbody>
    </tgroup>
   </table>
 
+  <para>
+   Note that the unique key on this catalog is (<structfield>collname</>,
+   <structfield>collencoding</>, <structfield>collnamespace</>) not just
+   (<structfield>collname</>, <structfield>collnamespace</>).
+   <productname>PostgreSQL</productname> generally ignores all
+   collations not belonging to the current database's encoding; therefore
+   it is sufficient to use a qualified SQL name
+   (<replaceable>schema</>.<replaceable>name</>) to identify a collation,
+   even though this is not unique according to the catalog definition.
+   The current database's encoding is automatically used as an additional
+   lookup key.  The reason for defining the catalog this way is that
+   <application>initdb</> fills it in at cluster initialization time with
+   entries for all locales available on the system, so it must be able to
+   hold entries for all encodings that might ever be used in the cluster.
+  </para>
+
+  <para>
+   In the <literal>template0</> database, it could be useful to create
+   collations whose encoding does not match the database encoding,
+   since they could match the encodings of databases later cloned from
+   <literal>template0</>.  This would currently have to be done manually.
+  </para>
  </sect1>
 
  <sect1 id="catalog-pg-conversion">
@@ -6123,12 +6133,11 @@
       <entry><literal><link linkend="catalog-pg-collation"><structname>pg_collation</structname></link>.oid</literal></entry>
       <entry><para>
        <structfield>typcollation</structfield> specifies the collation
-       of the type.  If a type does not support collations, this will
-       be zero, collation analysis at parse time is skipped, and
-       the use of <literal>COLLATE</literal> clauses with the type is
-       invalid.  A base type that supports collations will have
-       <symbol>DEFAULT_COLLATION_OID</symbol> here.  A domain can have
-       another collation OID, if one was defined for the domain.
+       of the type.  If the type does not support collations, this will
+       be zero.  A base type that supports collations will have
+       <symbol>DEFAULT_COLLATION_OID</symbol> here.  A domain over a
+       collatable type can have some other collation OID, if one was defined
+       for the domain.
       </para></entry>
      </row>
 
diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml
index 046c3d14168..dd96d009506 100644
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -15,6 +15,8 @@
       Using the locale features of the operating system to provide
       locale-specific collation order, number formatting, translated
       messages, and other aspects.
+      This is covered in <xref linkend="locale"> and
+      <xref linkend="collation">.
      </para>
     </listitem>
 
@@ -23,6 +25,7 @@
       Providing a number of different character sets to support storing text
       in all kinds of languages, and providing character set translation
       between client and server.
+      This is covered in <xref linkend="multibyte">.
      </para>
     </listitem>
    </itemizedlist>
@@ -138,9 +141,12 @@ initdb --locale=sv_SE
     fixed when the database is created.  You can use different settings
     for different databases, but once a database is created, you cannot
     change them for that database anymore. <literal>LC_COLLATE</literal>
-    and <literal>LC_CTYPE</literal> are these type of categories.  They affect
+    and <literal>LC_CTYPE</literal> are these categories.  They affect
     the sort order of indexes, so they must be kept fixed, or indexes on
-    text columns would become corrupt.  The default values for these
+    text columns would become corrupt.
+    (But you can alleviate this restriction using collations, as discussed
+    in <xref linkend="collation">.)
+    The default values for these
     categories are determined when <command>initdb</command> is run, and
     those values are used when new databases are created, unless
     specified otherwise in the <command>CREATE DATABASE</command> command.
@@ -153,7 +159,7 @@ initdb --locale=sv_SE
     linkend="runtime-config-client-format"> for details).  The values
     that are chosen by <command>initdb</command> are actually only written
     into the configuration file <filename>postgresql.conf</filename> to
-    serve as defaults when the server is started.  If you disable these
+    serve as defaults when the server is started.  If you remove these
     assignments from <filename>postgresql.conf</filename> then the
     server will inherit the settings from its execution environment.
    </para>
@@ -308,17 +314,17 @@ initdb --locale=sv_SE
   <title>Collation Support</title>
 
   <para>
-   The collation support allows specifying the sort order and certain
-   other locale aspects of data per column or per operation at run
-   time.  This alleviates the problem that the
+   The collation feature allows specifying the sort order and certain
+   other locale aspects of data per-column, or even per-operation.
+   This alleviates the restriction that the
    <symbol>LC_COLLATE</symbol> and <symbol>LC_CTYPE</symbol> settings
    of a database cannot be changed after its creation.
   </para>
 
   <note>
    <para>
-    The collation support feature is currently only known to work on
-    Linux/glibc and Mac OS X platforms.
+    Collation support is currently only known to work on
+    Linux (glibc) and Mac OS X platforms.
    </para>
   </note>
 
@@ -326,48 +332,51 @@ initdb --locale=sv_SE
    <title>Concepts</title>
 
    <para>
-    Conceptually, every datum of a collatable data type has a
-    collation.  (Collatable data types in the base system are
+    Conceptually, every expression of a collatable data type has a
+    collation.  (The built-in collatable data types are
     <type>text</type>, <type>varchar</type>, and <type>char</type>.
     User-defined base types can also be marked collatable.)  If the
-    datum is a column reference, the collation of the datum is the
-    defined collation of the column.  If the datum is a constant, the
+    expression is a column reference, the collation of the expression is the
+    defined collation of the column.  If the expression is a constant, the
     collation is the default collation of the data type of the
-    constant.  The collation of more complex expressions is derived
-    from the input collations as described below.
+    constant.  The collation of a more complex expression is derived
+    from the collations of its inputs, as described below.
    </para>
 
    <para>
-    The collation of a datum can also be the <quote>default</quote>
-    collation, which reverts to the locale settings defined for the
-    database.  In some cases, a datum can also have no known
+    The collation of an expression can be the <quote>default</quote>
+    collation, which means the locale settings defined for the
+    database.  In some cases, an expression can also have no known
     collation.  In such cases, ordering operations and other
     operations that need to know the collation will fail.
    </para>
 
    <para>
     When the database system has to perform an ordering or a
-    comparison, it considers the collation of the input data.  This
-    happens in two situations: an <literal>ORDER BY</literal> clause
-    and a function or operator call such as <literal>&lt;</literal>.
-    The collation to apply for the performance of the <literal>ORDER
-    BY</literal> clause is simply the collation of the sort key.  The
-    collation to apply for a function or operator call is derived from
-    the arguments, as described below.  Additionally, collations are
-    taken into account by functions that convert between lower and
-    upper case letters, that is, <function>lower</function>,
-    <function>upper</function>, and <function>initcap</function>.
+    comparison, it uses the collation of the input expression.  This
+    happens, for example, with <literal>ORDER BY</literal> clauses
+    and function or operator calls such as <literal>&lt;</literal>.
+    The collation to apply for an <literal>ORDER BY</literal> clause
+    is simply the collation of the sort key.  The collation to apply for a
+    function or operator call is derived from the arguments, as described
+    below.  In addition to comparison operators, collations are taken into
+    account by functions that convert between lower and upper case
+    letters, such as <function>lower</>, <function>upper</>, and
+    <function>initcap</>.
    </para>
 
    <para>
-    For a function call, the collation that is derived from combining
-    the argument collations is both used for performing any
-    comparisons or ordering and for the collation of the function
-    result, if the result type is collatable.
+    For a function or operator call, the collation that is derived by
+    examining the argument collations is used at run time for performing
+    the specified operation.  If the result of the function or operator
+    call is of a collatable data type, the collation is also used at parse
+    time as the defined collation of the function or operator expression,
+    in case there is a surrounding expression that requires knowledge of
+    its collation.
    </para>
 
    <para>
-    The <firstterm>collation derivation</firstterm> of a datum can be
+    The <firstterm>collation derivation</firstterm> of an expression can be
     implicit or explicit.  This distinction affects how collations are
     combined when multiple different collations appear in an
     expression.  An explicit collation derivation arises when a
@@ -379,9 +388,9 @@ initdb --locale=sv_SE
     <orderedlist>
      <listitem>
       <para>
-       If any input item has an explicit collation derivation, then
-       all explicitly derived collations among the input items must be
-       the same, otherwise an error is raised.  If an explicitly
+       If any input expression has an explicit collation derivation, then
+       all explicitly derived collations among the input expressions must be
+       the same, otherwise an error is raised.  If any explicitly
        derived collation is present, that is the result of the
        collation combination.
       </para>
@@ -389,8 +398,8 @@ initdb --locale=sv_SE
 
      <listitem>
       <para>
-       Otherwise, all input items must have the same implicit
-       collation derivation or the default collation.  If an
+       Otherwise, all input expressions must have the same implicit
+       collation derivation or the default collation.  If any
        implicitly derived collation is present, that is the result of
        the collation combination.  Otherwise, the result is the
        default collation.
@@ -428,19 +437,19 @@ SELECT a || ('foo' COLLATE "y") FROM test1;
     A collation is an SQL schema object that maps an SQL name to
     operating system locales.  In particular, it maps to a combination
     of <symbol>LC_COLLATE</symbol> and <symbol>LC_CTYPE</symbol>.  (As
-    the name would indicate, the main purpose of a collation is to set
+    the name would suggest, the main purpose of a collation is to set
     <symbol>LC_COLLATE</symbol>, which controls the sort order.  But
     it is rarely necessary in practice to have an
     <symbol>LC_CTYPE</symbol> setting that is different from
     <symbol>LC_COLLATE</symbol>, so it is more convenient to collect
     these under one concept than to create another infrastructure for
-    setting <symbol>LC_CTYPE</symbol> per datum.)  Also, a collation
-    is tied to a character encoding.  The same collation name may
-    exist for different encodings.
+    setting <symbol>LC_CTYPE</symbol> per expression.)  Also, a collation
+    is tied to a character set encoding (see <xref linkend="multibyte">).
+    The same collation name may exist for different encodings.
    </para>
 
    <para>
-    When a database system is initialized, <command>initdb</command>
+    When a database cluster is initialized, <command>initdb</command>
     populates the system catalog <literal>pg_collation</literal> with
     collations based on all the locales it finds on the operating
     system at the time.  For example, the operating system might
@@ -463,8 +472,19 @@ SELECT a || ('foo' COLLATE "y") FROM test1;
     collation may be created using
     the <xref linkend="sql-createcollation"> command.  That command
     can also be used to create a new collation from an existing
-    collation, which can be useful to be able to use operating-system
-    independent collation names in applications.
+    collation, which can be useful to be able to use
+    operating-system-independent collation names in applications.
+   </para>
+
+   <para>
+    Within any particular database, only collations that use that
+    database's encoding are of interest.  Other entries in
+    <literal>pg_collation</literal> are ignored.  Thus, a stripped collation
+    name such as <literal>de_DE</literal> can be considered unique
+    within a given database even though it would not be unique globally.
+    Use of the stripped collation names is recommendable, since it will
+    make one less thing you need to change if you decide to change to
+    another database encoding.
    </para>
   </sect2>
  </sect1>
diff --git a/doc/src/sgml/ref/create_collation.sgml b/doc/src/sgml/ref/create_collation.sgml
index 9d03ca5a4eb..fc792250011 100644
--- a/doc/src/sgml/ref/create_collation.sgml
+++ b/doc/src/sgml/ref/create_collation.sgml
@@ -21,7 +21,7 @@
 CREATE COLLATION <replaceable>name</replaceable> (
     [ LOCALE = <replaceable>locale</replaceable>, ]
     [ LC_COLLATE = <replaceable>lc_collate</replaceable>, ]
-    [ LC_CTYPE = <replaceable>lc_ctype</replaceable>, ]
+    [ LC_CTYPE = <replaceable>lc_ctype</replaceable> ]
 )
 CREATE COLLATION <replaceable>name</replaceable> FROM <replaceable>existing_collation</replaceable>
 </synopsis>
@@ -32,7 +32,8 @@ CREATE COLLATION <replaceable>name</replaceable> FROM <replaceable>existing_coll
 
   <para>
    <command>CREATE COLLATION</command> defines a new collation using
-   the specified operating system locales or from an existing collation.
+   the specified operating system locale settings,
+   or by copying an existing collation.
  </para>
 
   <para>
@@ -53,34 +54,22 @@ CREATE COLLATION <replaceable>name</replaceable> FROM <replaceable>existing_coll
       <para>
        The name of the collation. The collation name can be
        schema-qualified. If it is not, the collation is defined in the
-       current schema. The collation name must be unique within a
+       current schema. The collation name must be unique within that
        schema.  (The system catalogs can contain collations with the
-       same name for other encodings, but these are not usable if the
+       same name for other encodings, but these are ignored if the
        database encoding does not match.)
       </para>
      </listitem>
     </varlistentry>
 
     <varlistentry>
-     <term><replaceable>existing_collation</replaceable></term>
-
-     <listitem>
-      <para>
-       The name of an existing collation to copy.  The new collation
-       will have the same properties as the existing one, but they
-       will become independent objects.
-      </para>
-     </listitem>
-    </varlistentry>
-
-    <varlistentry>
      <term><replaceable>locale</replaceable></term>
 
      <listitem>
       <para>
        This is a shortcut for setting <symbol>LC_COLLATE</symbol>
        and <symbol>LC_CTYPE</symbol> at once.  If you specify this,
-       you cannot specify either of the other parameters.
+       you cannot specify either of those parameters.
       </para>
      </listitem>
     </varlistentry>
@@ -112,6 +101,18 @@ CREATE COLLATION <replaceable>name</replaceable> FROM <replaceable>existing_coll
       </para>
      </listitem>
     </varlistentry>
+
+    <varlistentry>
+     <term><replaceable>existing_collation</replaceable></term>
+
+     <listitem>
+      <para>
+       The name of an existing collation to copy.  The new collation
+       will have the same properties as the existing one, but they
+       will become independent objects.
+      </para>
+     </listitem>
+    </varlistentry>
    </variablelist>
  </refsect1>
 
@@ -145,8 +146,8 @@ CREATE COLLATION french (LOCALE = 'fr_FR.utf8');
 <programlisting>
 CREATE COLLATION german FROM "de_DE";
 </programlisting>
-   This can be convenient to be able to use operating-system
-   independent collation names in applications.
+   This can be convenient to be able to use operating-system-independent
+   collation names in applications.
   </para>
  </refsect1>
 
diff --git a/doc/src/sgml/ref/drop_collation.sgml b/doc/src/sgml/ref/drop_collation.sgml
index 7be9317932c..0afcaaf2dee 100644
--- a/doc/src/sgml/ref/drop_collation.sgml
+++ b/doc/src/sgml/ref/drop_collation.sgml
@@ -94,7 +94,7 @@ DROP COLLATION german;
   <para>
    The <command>DROP COLLATION</command> command conforms to the
    <acronym>SQL</acronym> standard, apart from the <literal>IF
-   EXISTS</> option, which is a <productname>PostgreSQL</> extension..
+   EXISTS</> option, which is a <productname>PostgreSQL</> extension.
   </para>
  </refsect1>
author	Tom Lane <tgl@sss.pgh.pa.us>	2011-03-08 17:10:34 -0500
committer	Tom Lane <tgl@sss.pgh.pa.us>	2011-03-08 17:10:59 -0500
commit	a612b17120fc011cefcdec6948b1cc8543529d06 (patch)
tree	4d44863e18e5de4b3d9fee9bcc1020f24bc1074b
parent	4502c8e1c06164adb7be526096e91e04d1844d36 (diff)
download	postgresql-a612b17120fc011cefcdec6948b1cc8543529d06.tar.gz postgresql-a612b17120fc011cefcdec6948b1cc8543529d06.zip