aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFujii Masao <fujii@postgresql.org>2025-04-02 15:13:01 +0900
committerFujii Masao <fujii@postgresql.org>2025-04-02 15:13:01 +0900
commitb53b88109f94bd81ed0ac580035a936000bc2865 (patch)
treed6ca80d80d232207621553a395afd64f4ff81ce7 /src
parent121d774caea4c93c8b36fb20a17ef774e60894d6 (diff)
downloadpostgresql-b53b88109f94bd81ed0ac580035a936000bc2865.tar.gz
postgresql-b53b88109f94bd81ed0ac580035a936000bc2865.zip
Improve error message when standby does accept connections.
Even after reaching the minimum recovery point, if there are long-lived write transactions with 64 subtransactions on the primary, the recovery snapshot may not yet be ready for hot standby, delaying read-only connections on the standby. Previously, when read-only connections were not accepted due to this condition, the following error message was logged: FATAL: the database system is not yet accepting connections DETAIL: Consistent recovery state has not been yet reached. This DETAIL message was misleading because the following message was already logged in this case: LOG: consistent recovery state reached This contradiction, i.e., indicating that the recovery state was consistent while also stating it wasn’t, caused confusion. This commit improves the error message to better reflect the actual state: FATAL: the database system is not yet accepting connections DETAIL: Recovery snapshot is not yet ready for hot standby. HINT: To enable hot standby, close write transactions with more than 64 subtransactions on the primary server. To implement this, the commit introduces a new postmaster signal, PMSIGNAL_RECOVERY_CONSISTENT. When the startup process reaches a consistent recovery state, it sends this signal to the postmaster, allowing it to correctly recognize that state. Since this is not a clear bug, the change is applied only to the master branch and is not back-patched. Author: Atsushi Torikoshi <torikoshia@oss.nttdata.com> Co-authored-by: Fujii Masao <masao.fujii@gmail.com> Reviewed-by: Yugo Nagata <nagata@sraoss.co.jp> Discussion: https://postgr.es/m/02db8cd8e1f527a8b999b94a4bee3165@oss.nttdata.com
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/transam/xlogrecovery.c6
-rw-r--r--src/backend/postmaster/postmaster.c12
-rw-r--r--src/backend/tcop/backend_startup.c18
-rw-r--r--src/include/storage/pmsignal.h1
-rw-r--r--src/include/tcop/backend_startup.h2
5 files changed, 30 insertions, 9 deletions
diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index 0aa3ab59085..6ce979f2d8b 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -291,6 +291,11 @@ static bool backupEndRequired = false;
* Consistent state means that the system is internally consistent, all
* the WAL has been replayed up to a certain point, and importantly, there
* is no trace of later actions on disk.
+ *
+ * This flag is used only by the startup process and postmaster. When
+ * minRecoveryPoint is reached, the startup process sets it to true and
+ * sends a PMSIGNAL_RECOVERY_CONSISTENT signal to the postmaster,
+ * which then sets it to true upon receiving the signal.
*/
bool reachedConsistency = false;
@@ -2248,6 +2253,7 @@ CheckRecoveryConsistency(void)
CheckTablespaceDirectory();
reachedConsistency = true;
+ SendPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT);
ereport(LOG,
(errmsg("consistent recovery state reached at %X/%X",
LSN_FORMAT_ARGS(lastReplayedEndRecPtr))));
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index c966c2e83af..3fe45de5da0 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -1825,8 +1825,7 @@ canAcceptConnections(BackendType backend_type)
else if (!FatalError && pmState == PM_STARTUP)
return CAC_STARTUP; /* normal startup */
else if (!FatalError && pmState == PM_RECOVERY)
- return CAC_NOTCONSISTENT; /* not yet at consistent recovery
- * state */
+ return CAC_NOTHOTSTANDBY; /* not yet ready for hot standby */
else
return CAC_RECOVERY; /* else must be crash recovery */
}
@@ -3699,6 +3698,7 @@ process_pm_pmsignal(void)
/* WAL redo has started. We're out of reinitialization. */
FatalError = false;
AbortStartTime = 0;
+ reachedConsistency = false;
/*
* Start the archiver if we're responsible for (re-)archiving received
@@ -3724,9 +3724,15 @@ process_pm_pmsignal(void)
UpdatePMState(PM_RECOVERY);
}
- if (CheckPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY) &&
+ if (CheckPostmasterSignal(PMSIGNAL_RECOVERY_CONSISTENT) &&
pmState == PM_RECOVERY && Shutdown == NoShutdown)
{
+ reachedConsistency = true;
+ }
+
+ if (CheckPostmasterSignal(PMSIGNAL_BEGIN_HOT_STANDBY) &&
+ (pmState == PM_RECOVERY && Shutdown == NoShutdown))
+ {
ereport(LOG,
(errmsg("database system is ready to accept read-only connections")));
diff --git a/src/backend/tcop/backend_startup.c b/src/backend/tcop/backend_startup.c
index a07c59ece01..84e1c6f2831 100644
--- a/src/backend/tcop/backend_startup.c
+++ b/src/backend/tcop/backend_startup.c
@@ -18,6 +18,7 @@
#include <unistd.h>
#include "access/xlog.h"
+#include "access/xlogrecovery.h"
#include "common/ip.h"
#include "common/string.h"
#include "libpq/libpq.h"
@@ -306,17 +307,24 @@ BackendInitialize(ClientSocket *client_sock, CAC_state cac)
(errcode(ERRCODE_CANNOT_CONNECT_NOW),
errmsg("the database system is starting up")));
break;
- case CAC_NOTCONSISTENT:
- if (EnableHotStandby)
+ case CAC_NOTHOTSTANDBY:
+ if (!EnableHotStandby)
+ ereport(FATAL,
+ (errcode(ERRCODE_CANNOT_CONNECT_NOW),
+ errmsg("the database system is not accepting connections"),
+ errdetail("Hot standby mode is disabled.")));
+ else if (reachedConsistency)
ereport(FATAL,
(errcode(ERRCODE_CANNOT_CONNECT_NOW),
errmsg("the database system is not yet accepting connections"),
- errdetail("Consistent recovery state has not been yet reached.")));
+ errdetail("Recovery snapshot is not yet ready for hot standby."),
+ errhint("To enable hot standby, close write transactions with more than %d subtransactions on the primary server.",
+ PGPROC_MAX_CACHED_SUBXIDS)));
else
ereport(FATAL,
(errcode(ERRCODE_CANNOT_CONNECT_NOW),
- errmsg("the database system is not accepting connections"),
- errdetail("Hot standby mode is disabled.")));
+ errmsg("the database system is not yet accepting connections"),
+ errdetail("Consistent recovery state has not been yet reached.")));
break;
case CAC_SHUTDOWN:
ereport(FATAL,
diff --git a/src/include/storage/pmsignal.h b/src/include/storage/pmsignal.h
index d84a383047e..67fa9ac06e1 100644
--- a/src/include/storage/pmsignal.h
+++ b/src/include/storage/pmsignal.h
@@ -33,6 +33,7 @@
typedef enum
{
PMSIGNAL_RECOVERY_STARTED, /* recovery has started */
+ PMSIGNAL_RECOVERY_CONSISTENT, /* recovery has reached consistent state */
PMSIGNAL_BEGIN_HOT_STANDBY, /* begin Hot Standby */
PMSIGNAL_ROTATE_LOGFILE, /* send SIGUSR1 to syslogger to rotate logfile */
PMSIGNAL_START_AUTOVAC_LAUNCHER, /* start an autovacuum launcher */
diff --git a/src/include/tcop/backend_startup.h b/src/include/tcop/backend_startup.h
index 578828c1caf..dcb9d056643 100644
--- a/src/include/tcop/backend_startup.h
+++ b/src/include/tcop/backend_startup.h
@@ -36,7 +36,7 @@ typedef enum CAC_state
CAC_STARTUP,
CAC_SHUTDOWN,
CAC_RECOVERY,
- CAC_NOTCONSISTENT,
+ CAC_NOTHOTSTANDBY,
CAC_TOOMANY,
} CAC_state;