3 files changed, 294 insertions, 2 deletions
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 86844147517..822ef4b60c9 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2018,6 +2018,92 @@ SET ENABLE_SEQSCAN TO OFF;
      </variablelist>
     </sect2>
 
+    <sect2 id="runtime-config-sync-rep">
+     <title>Synchronous Replication</title>
+
+     <para>
+      These settings control the behavior of the built-in
+      <firstterm>synchronous replication</> feature.
+      These parameters would be set on the primary server that is
+      to send replication data to one or more standby servers.
+     </para>
+
+     <variablelist>
+     <varlistentry id="guc-synchronous-replication" xreflabel="synchronous_replication">
+      <term><varname>synchronous_replication</varname> (<type>boolean</type>)</term>
+      <indexterm>
+       <primary><varname>synchronous_replication</> configuration parameter</primary>
+      </indexterm>
+      <listitem>
+       <para>
+        Specifies whether transaction commit will wait for WAL records
+        to be replicated before the command returns a <quote>success</>
+        indication to the client.  The default setting is <literal>off</>.
+        When <literal>on</>, there will be a delay while the client waits
+        for confirmation of successful replication. That delay will
+        increase depending upon the physical distance and network activity
+        between primary and standby. The commit wait will last until a
+        reply from the current synchronous standby indicates it has received
+        the commit record of the transaction. Synchronous standbys must
+        already have been defined (see <xref linkend="guc-sync-standby-names">).
+       </para>
+       <para>
+        This parameter can be changed at any time; the
+        behavior for any one transaction is determined by the setting in
+        effect when it commits.  It is therefore possible, and useful, to have
+        some transactions replicate synchronously and others asynchronously.
+        For example, to make a single multistatement transaction commit
+        asynchronously when the default is synchronous replication, issue
+        <command>SET LOCAL synchronous_replication TO OFF</> within the
+        transaction.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry id="guc-sync-standby-names" xreflabel="synchronous_standby_names">
+      <term><varname>synchronous_standby_names</varname> (<type>integer</type>)</term>
+      <indexterm>
+       <primary><varname>synchronous_standby_names</> configuration parameter</primary>
+      </indexterm>
+      <listitem>
+       <para>
+        Specifies a priority ordered list of standby names that can offer
+        synchronous replication.  At any one time there will be just one
+        synchronous standby that will wake sleeping users following commit.
+        The synchronous standby will be the first named standby that is
+        both currently connected and streaming in real-time to the standby
+        (as shown by a state of "STREAMING").  Other standby servers
+        with listed later will become potential synchronous standbys.
+        If the current synchronous standby disconnects for whatever reason
+        it will be replaced immediately with the next highest priority standby.
+        Specifying more than one standby name can allow very high availability.
+       </para>
+       <para>
+        The standby name is currently taken as the application_name of the
+        standby, as set in the primary_conninfo on the standby. Names are
+        not enforced for uniqueness. In case of duplicates one of the standbys
+        will be chosen to be the synchronous standby, though exactly which
+        one is indeterminate.
+       </para>
+       <para>
+        No value is set by default.
+        The special entry <literal>*</> matches any application_name, including
+        the default application name of <literal>walreceiver</>.
+       </para>
+       <para>
+        If a standby is removed from the list of servers then it will stop
+        being the synchronous standby, allowing another to take it's place.
+        If the list is empty, synchronous replication will not be
+        possible, whatever the setting of <varname>synchronous_replication</>,
+        however, already waiting commits will continue to wait.
+        Standbys may also be added to the list without restarting the server.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     </variablelist>
+    </sect2>
+
     <sect2 id="runtime-config-standby">
     <title>Standby Servers</title>
 
diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml
index 37ba43b5fd7..e30552f09fa 100644
--- a/doc/src/sgml/high-availability.sgml
+++ b/doc/src/sgml/high-availability.sgml
@@ -875,6 +875,209 @@ primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass'
    </sect3>
 
   </sect2>
+  <sect2 id="synchronous-replication">
+   <title>Synchronous Replication</title>
+
+   <indexterm zone="high-availability">
+    <primary>Synchronous Replication</primary>
+   </indexterm>
+
+   <para>
+    <productname>PostgreSQL</> streaming replication is asynchronous by
+    default. If the primary server
+    crashes then some transactions that were committed may not have been
+    replicated to the standby server, causing data loss. The amount
+    of data loss is proportional to the replication delay at the time of
+    failover.
+   </para>
+
+   <para>
+    Synchronous replication offers the ability to confirm that all changes
+    made by a transaction have been transferred to one synchronous standby
+    server. This extends the standard level of durability
+    offered by a transaction commit. This level of protection is referred
+    to as 2-safe replication in computer science theory.
+   </para>
+
+   <para>
+    When requesting synchronous replication, each commit of a
+    write transaction will wait until confirmation is
+    received that the commit has been written to the transaction log on disk
+    of both the primary and standby server. The only possibility that data
+    can be lost is if both the primary and the standby suffer crashes at the
+    same time. This can provide a much higher level of durability, though only
+    if the sysadmin is cautious about the placement and management of the two
+    servers.  Waiting for confirmation increases the user's confidence that the
+    changes will not be lost in the event of server crashes but it also
+    necessarily increases the response time for the requesting transaction.
+    The minimum wait time is the roundtrip time between primary to standby.
+   </para>
+
+   <para>
+    Read only transactions and transaction rollbacks need not wait for
+    replies from standby servers. Subtransaction commits do not wait for
+    responses from standby servers, only top-level commits. Long
+    running actions such as data loading or index building do not wait
+    until the very final commit message. All two-phase commit actions
+    require commit waits, including both prepare and commit.
+   </para>
+
+   <sect3 id="synchronous-replication-config">
+    <title>Basic Configuration</title>
+
+   <para>
+    All parameters have useful default values, so we can enable
+    synchronous replication easily just by setting this on the primary
+
+<programlisting>
+synchronous_replication = on
+</programlisting>
+
+    When <varname>synchronous_replication</> is set, a commit will wait
+    for confirmation that the standby has received the commit record,
+    even if that takes a very long time.
+    <varname>synchronous_replication</> can be set by individual
+    users, so can be configured in the configuration file, for particular
+    users or databases, or dynamically by applications programs.
+   </para>
+
+   <para>
+    After a commit record has been written to disk on the primary the
+    WAL record is then sent to the standby. The standby sends reply
+    messages each time a new batch of WAL data is received, unless
+    <varname>wal_receiver_status_interval</> is set to zero on the standby.
+    If the standby is the first matching standby, as specified in
+    <varname>synchronous_standby_names</> on the primary, the reply
+    messages from that standby will be used to wake users waiting for
+    confirmation the commit record has been received. These parameters
+    allow the administrator to specify which standby servers should be
+    synchronous standbys. Note that the configuration of synchronous
+    replication is mainly on the master.
+   </para>
+
+   <para>
+    Users will stop waiting if a fast shutdown is requested, though the
+    server does not fully shutdown until all outstanding WAL records are
+    transferred to standby servers.
+   </para>
+
+   <para>
+    Note also that <varname>synchronous_commit</> is used when the user
+    specifies <varname>synchronous_replication</>, overriding even an
+    explicit setting of <varname>synchronous_commit</> to <literal>off</>.
+    This is because we must write WAL to disk on primary before we replicate
+    to ensure the standby never gets ahead of the primary.
+   </para>
+
+   </sect3>
+
+   <sect3 id="synchronous-replication-performance">
+    <title>Planning for Performance</title>
+
+   <para>
+    Synchronous replication usually requires carefully planned and placed
+    standby servers to ensure applications perform acceptably. Waiting
+    doesn't utilise system resources, but transaction locks continue to be
+    held until the transfer is confirmed. As a result, incautious use of
+    synchronous replication will reduce performance for database
+    applications because of increased response times and higher contention.
+   </para>
+
+   <para>
+    <productname>PostgreSQL</> allows the application developer
+    to specify the durability level required via replication. This can be
+    specified for the system overall, though it can also be specified for
+    specific users or connections, or even individual transactions.
+   </para>
+
+   <para>
+    For example, an application workload might consist of:
+    10% of changes are important customer details, while
+    90% of changes are less important data that the business can more
+    easily survive if it is lost, such as chat messages between users.
+   </para>
+
+   <para>
+    With synchronous replication options specified at the application level
+    (on the primary) we can offer sync rep for the most important changes,
+    without slowing down the bulk of the total workload. Application level
+    options are an important and practical tool for allowing the benefits of
+    synchronous replication for high performance applications.
+   </para>
+
+   <para>
+    You should consider that the network bandwidth must be higher than
+    the rate of generation of WAL data.
+    10% of changes are important customer details, while
+    90% of changes are less important data that the business can more
+    easily survive if it is lost, such as chat messages between users.
+   </para>
+
+   </sect3>
+
+   <sect3 id="synchronous-replication-ha">
+    <title>Planning for High Availability</title>
+
+   <para>
+    Commits made when synchronous_replication is set will wait until at
+    the sync standby responds. The response may never occur if the last,
+    or only, standby should crash.
+   </para>
+
+   <para>
+    The best solution for avoiding data loss is to ensure you don't lose
+    your last remaining sync standby. This can be achieved by naming multiple
+    potential synchronous standbys using <varname>synchronous_standby_names</>.
+    The first named standby will be used as the synchronous standby. Standbys
+    listed after this will takeover the role of synchronous standby if the
+    first one should fail.
+   </para>
+
+   <para>
+    When a standby first attaches to the primary, it will not yet be properly
+    synchronized. This is described as <literal>CATCHUP</> mode. Once
+    the lag between standby and primary reaches zero for the first time
+    we move to real-time <literal>STREAMING</> state.
+    The catch-up duration may be long immediately after the standby has
+    been created. If the standby is shutdown, then the catch-up period
+    will increase according to the length of time the standby has been down.
+    The standby is only able to become a synchronous standby
+    once it has reached <literal>STREAMING</> state.
+   </para>
+
+   <para>
+    If primary restarts while commits are waiting for acknowledgement, those
+    waiting transactions will be marked fully committed once the primary
+    database recovers.
+    There is no way to be certain that all standbys have received all
+    outstanding WAL data at time of the crash of the primary. Some
+    transactions may not show as committed on the standby, even though
+    they show as committed on the primary. The guarantee we offer is that
+    the application will not receive explicit acknowledgement of the
+    successful commit of a transaction until the WAL data is known to be
+    safely received by the standby.
+   </para>
+
+   <para>
+    If you really do lose your last standby server then you should disable
+    <varname>synchronous_standby_names</> and restart the primary server.
+   </para>
+
+   <para>
+    If the primary is isolated from remaining standby severs you should
+    failover to the best candidate of those other remaining standby servers.
+   </para>
+
+   <para>
+    If you need to re-create a standby server while transactions are
+    waiting, make sure that the commands to run pg_start_backup() and
+    pg_stop_backup() are run in a session with
+    synchronous_replication = off, otherwise those requests will wait
+    forever for the standby to appear.
+   </para>
+
+   </sect3>
+  </sect2>
   </sect1>
 
   <sect1 id="warm-standby-failover">
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index aaa613e988a..319a57c6e23 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -306,8 +306,11 @@ postgres: <replaceable>user</> <replaceable>database</> <replaceable>host</> <re
       location.  In addition, the standby reports the last transaction log
       position it received and wrote, the last position it flushed to disk,
       and the last position it replayed, and this information is also
-      displayed here.  The columns detailing what exactly the connection is
-      doing are only visible if the user examining the view is a superuser.
+      displayed here. If the standby's application names matches one of the
+      settings in <varname>synchronous_standby_names</> then the sync_priority
+      is shown here also, that is the order in which standbys will become
+      the synchronous standby. The columns detailing what exactly the connection
+      is doing are only visible if the user examining the view is a superuser.
       The client's hostname will be available only if
       <xref linkend="guc-log-hostname"> is set or if the user's hostname
       needed to be looked up during <filename>pg_hba.conf</filename>