From 858ec11858a914d4c380971985709b6d6b7dd6fc Mon Sep 17 00:00:00 2001 From: Robert Haas Date: Fri, 31 Jan 2014 22:45:17 -0500 Subject: Introduce replication slots. Replication slots are a crash-safe data structure which can be created on either a master or a standby to prevent premature removal of write-ahead log segments needed by a standby, as well as (with hot_standby_feedback=on) pruning of tuples whose removal would cause replication conflicts. Slots have some advantages over existing techniques, as explained in the documentation. In a few places, we refer to the type of replication slots introduced by this patch as "physical" slots, because forthcoming patches for logical decoding will also have slots, but with somewhat different properties. Andres Freund and Robert Haas --- doc/src/sgml/catalogs.sgml | 99 ++++++++++++++++++++++++++++++++++++ doc/src/sgml/config.sgml | 19 +++++++ doc/src/sgml/func.sgml | 70 +++++++++++++++++++++++++ doc/src/sgml/high-availability.sgml | 94 +++++++++++++++++++++++++++++++--- doc/src/sgml/protocol.sgml | 64 ++++++++++++++++++++--- doc/src/sgml/recovery-config.sgml | 16 ++++++ doc/src/sgml/ref/pg_receivexlog.sgml | 18 +++++++ 7 files changed, 366 insertions(+), 14 deletions(-) (limited to 'doc/src') diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index 3f8d9bfafbb..dca24fc0705 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -233,6 +233,11 @@ query rewrite rules + + pg_replication_slots + replication slot information + + pg_seclabel security labels on database objects @@ -5157,6 +5162,100 @@ + + <structname>pg_replication_slots</structname> + + + pg_replication_slots + + + + The pg_replication_slots view provides a listing + of all replication slots that currently exist on the database cluster, + along with their current state. + + + + For more on replication slots, + see . + + + + + <structname>pg_replication_slots</structname> Columns + + + + + Name + Type + References + Description + + + + + + slot_name + text + + A unique, cluster-wide identifier for the replication slot + + + + slot_type + text + + The slot type - physical or logical + + + + datoid + oid + pg_database.oid + The oid of the database this slot is associated with, or + null. Only logical slots have an associated database. + + + + database + text + pg_database.datname + The name of the database this slot is associated with, or + null. Only logical slots have an associated database. + + + + active + boolean + + True if this slot is currently actively being used + + + + xmin + xid + + The oldest transaction that this slot needs the database to + retain. VACUUM cannot remove tuples deleted + by any later transaction. + + + + + restart_lsn + text + + The address (LSN) of oldest WAL which still + might be required by the consumer of this slot and thus won't be + automatically removed during checkpoints. + + + + +
+
+ <structname>pg_seclabel</structname> diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 1b5f831d655..000a46fabb0 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -2348,6 +2348,25 @@ include 'filename' + + max_replication_slots (integer) + + max_replication_slots configuration parameter + + + + Specifies the maximum number of replication slots + (see that the server + can support. The default is zero. This parameter can only be set at + server start. + wal_level must be set + to archive or higher to allow replication slots to + be used. Setting it to a lower value than the number of currently + existing replication slots will prevent the server from starting. + + + + wal_keep_segments (integer) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 252539f93be..8cc65b94d10 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -16290,6 +16290,76 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup()); + + Replication Functions + + + PostgreSQL exposes a number of functions for controlling and interacting + with replication features. See + and . + + + + Many of these functions have equivalent commands in the replication + protocol; see . + + + + The sections , and are also relevant for replication. + + + + Replication <acronym>SQL</acronym> Functions + + + + Function + Return Type + Description + + + + + + + pg_create_physical_replication_slot + + pg_create_physical_replication_slot(slotname text, plugin text) + + + (slotname text, xlog_position text) + + + Creates a new physical replication slot named + slotname. Streaming changes from a physical slot + is only possible with the walsender protocol - see . Corresponds to the walsender protocol + command CREATE_REPLICATION_SLOT ... PHYSICAL. + + + + + + pg_drop_replication_slot + + pg_drop_replication_slot(slotname text) + + + (slotname text) + + + Drops the physical or logical replication slot + named slotname. Same as walsender protocol + command DROP_REPLICATION_SLOT. + + + + +
+
+ Database Object Management Functions diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml index e2e5ac93ab9..9d43586fe2f 100644 --- a/doc/src/sgml/high-availability.sgml +++ b/doc/src/sgml/high-availability.sgml @@ -643,7 +643,9 @@ protocol to make nodes agree on a serializable transactional order. entries in pg_hba.conf with the database field set to replication. Also ensure max_wal_senders is set to a sufficiently large value in the configuration file of the primary - server. + server. If replication slots will be used, + ensure that max_replication_slots is set sufficiently + high as well. @@ -750,13 +752,14 @@ archive_cleanup_command = 'pg_archivecleanup /path/to/archive %r' If you use streaming replication without file-based continuous - archiving, you have to set wal_keep_segments in the master - to a value high enough to ensure that old WAL segments are not recycled - too early, while the standby might still need them to catch up. If the - standby falls behind too much, it needs to be reinitialized from a new - base backup. If you set up a WAL archive that's accessible from the - standby, wal_keep_segments is not required as the standby can always - use the archive to catch up. + archiving, the server might recycle old WAL segments before the standby + has received them. If this occurs, the standby will need to be + reinitialized from a new base backup. You can avoid this by setting + wal_keep_segments to a value large enough to ensure that + WAL segments are not recycled too early, or by configuration a replication + slot for the standby. If you set up a WAL archive that's accessible from + the standby, these solutions are not required, since the standby can + always use the archive to catch up provided it retains enough segments. @@ -871,6 +874,81 @@ primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass' + + Replication Slots + + Replication Slots + + + Replication slots provide an automated way to ensure that the master does + not remove WAL segments until they have been received by all standbys, + and that the master does not remove rows which could cause a + recovery conflict even when the + standby is disconnected. + + + In lieu of using replication slots, it is possible to prevent the removal + of old WAL segments using , or by + storing the segments in an archive using . + However, these methods often result in retaining more WAL segments than + required, whereas replication slots retain only the number of segments + known to be needed. An advantage of these methods is that they bound + the space requirement for pg_xlog; there is currently no way + to do this using replication slots. + + + Similarly, hot_standby_feedback + and vacuum_defer_cleanup_age provide protection against + relevant rows being removed by vacuum, but the former provides no + protection during any time period when the standby is not connected, + and the latter often needs to be set to a high value to provide adequate + protection. Replication slots overcome these disadvantages. + + + Querying and manipulating replication slots + + Each replication slot has a name, which can contain lower-case letters, + numbers, and the underscore character. + + + Existing replication slots and their state can be seen in the + pg_replication_slots + view. + + + Slots can be created and dropped either via the streaming replication + protocol (see ) or via SQL + functions (see ). + + + + Configuration Example + + You can create a replication slot like this: + +postgres=# SELECT * FROM pg_create_physical_replication_slot('node_a_slot'); + slotname | xlog_position +-------------+--------------- + node_a_slot | + +postgres=# SELECT * FROM pg_replication_slots; + slot_name | slot_type | datoid | database | active | xmin | restart_lsn +-------------+-----------+--------+----------+--------+------+------------- + node_a_slot | physical | 0 | | f | | +(1 row) + + To configure the standby to use this slot, primary_slotname + should be configured in the standby's recovery.conf. + Here is a simple example: + +standby_mode = 'on' +primary_conninfo = 'host=192.168.1.50 port=5432 user=foo password=foopass' +primary_slotname = 'node_a_slot' + + + + + Cascading Replication diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index 7d99976a49c..832524e95e4 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -1401,15 +1401,39 @@ The commands accepted in walsender mode are:
- START_REPLICATION XXX/XXX TIMELINE tli + CREATE_REPLICATION_SLOT slotname PHYSICAL + CREATE_REPLICATION_SLOT + + + Create a physical replication + slot. See for more about + replication slots. + + + + slotname + + + The name of the slot to create. Must be a valid replication slot + name (see ). + + + + + + + + + START_REPLICATION [SLOT slotname] [PHYSICAL] XXX/XXX TIMELINE tli Instructs server to start streaming WAL, starting at - WAL position XXX/XXX on timeline - tli. - The server can reply with an error, e.g. if the requested section of WAL - has already been recycled. On success, server responds with a - CopyBothResponse message, and then starts to stream WAL to the frontend. + WAL position XXX/XXX. If specified, + streaming starts on timeline tli; + otherwise, the server's current timeline is selected. The server can + reply with an error, e.g. if the requested section of WAL has already + been recycled. On success, server responds with a CopyBothResponse + message, and then starts to stream WAL to the frontend. @@ -1443,6 +1467,14 @@ The commands accepted in walsender mode are: client contains a message of one of the following formats: + + If a slot's name is provided + via slotname, it will be updated + as replication progresses so that the server knows which WAL segments - + and if hot_standby_feedback is on which transactions - + are still needed by the standby. + + @@ -1719,6 +1751,26 @@ The commands accepted in walsender mode are: + + DROP_REPLICATION_SLOT slotname + + + Drops a replication slot, freeing any reserved server-side resources. If + the slot is currently in use by an active connection, this command fails. + + + + slotname + + + The name of the slot to drop. + + + + + + + BASE_BACKUP [LABEL 'label'] [PROGRESS] [FAST] [WAL] [NOWAIT] diff --git a/doc/src/sgml/recovery-config.sgml b/doc/src/sgml/recovery-config.sgml index 4a97bb7a9c4..b69ce287c8c 100644 --- a/doc/src/sgml/recovery-config.sgml +++ b/doc/src/sgml/recovery-config.sgml @@ -418,6 +418,22 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows + + primary_slotname (string) + + primary_slotname recovery parameter + + + + Optionally specifies an existing replication slot to be used when + connecting to the primary via streaming replication to control + resource removal on the upstream node + (see ). + This setting has no effect if primary_conninfo is not + set. + + + trigger_file (string) diff --git a/doc/src/sgml/ref/pg_receivexlog.sgml b/doc/src/sgml/ref/pg_receivexlog.sgml index 19bebb62f7a..2a44af46c52 100644 --- a/doc/src/sgml/ref/pg_receivexlog.sgml +++ b/doc/src/sgml/ref/pg_receivexlog.sgml @@ -225,6 +225,24 @@ PostgreSQL documentation + + + + + + Require pg_receivexlog to use an existing + replication slot (see ). + When this option is used, pg_receivexlog will report + a flush position to the server, indicating when each segment has been + synchronized to disk so that the server can remove that segment if it + is not otherwise needed. When using this paramter, it is important + to make sure that pg_receivexlog cannot become the + synchronous standby through an incautious setting of + ; it does not flush + data frequently enough for this to work correctly. + + + -- cgit v1.2.3