aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/heap/rewriteheap.c6
-rw-r--r--src/backend/access/transam/slru.c2
-rw-r--r--src/backend/access/transam/timeline.c4
-rw-r--r--src/backend/access/transam/xlog.c2
-rw-r--r--src/backend/replication/logical/snapbuild.c3
-rw-r--r--src/backend/storage/file/fd.c48
-rw-r--r--src/backend/storage/smgr/md.c6
-rw-r--r--src/backend/utils/cache/relmapper.c2
-rw-r--r--src/backend/utils/misc/guc.c9
-rw-r--r--src/backend/utils/misc/postgresql.conf.sample1
-rw-r--r--src/include/storage/fd.h2
11 files changed, 67 insertions, 18 deletions
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
index c5db75afa1f..d5bd282f8c7 100644
--- a/src/backend/access/heap/rewriteheap.c
+++ b/src/backend/access/heap/rewriteheap.c
@@ -978,7 +978,7 @@ logical_end_heap_rewrite(RewriteState state)
while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
{
if (FileSync(src->vfd, WAIT_EVENT_LOGICAL_REWRITE_SYNC) != 0)
- ereport(ERROR,
+ ereport(data_sync_elevel(ERROR),
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", src->path)));
FileClose(src->vfd);
@@ -1199,7 +1199,7 @@ heap_xlog_logical_rewrite(XLogReaderState *r)
*/
pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_MAPPING_SYNC);
if (pg_fsync(fd) != 0)
- ereport(ERROR,
+ ereport(data_sync_elevel(ERROR),
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", path)));
pgstat_report_wait_end();
@@ -1298,7 +1298,7 @@ CheckPointLogicalRewriteHeap(void)
*/
pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_CHECKPOINT_SYNC);
if (pg_fsync(fd) != 0)
- ereport(ERROR,
+ ereport(data_sync_elevel(ERROR),
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", path)));
pgstat_report_wait_end();
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 1132eef0384..fad5d363e32 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -928,7 +928,7 @@ SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid)
path, offset)));
break;
case SLRU_FSYNC_FAILED:
- ereport(ERROR,
+ ereport(data_sync_elevel(ERROR),
(errcode_for_file_access(),
errmsg("could not access status of transaction %u", xid),
errdetail("Could not fsync file \"%s\": %m.",
diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c
index 61d36050c34..70eec5676eb 100644
--- a/src/backend/access/transam/timeline.c
+++ b/src/backend/access/transam/timeline.c
@@ -406,7 +406,7 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_SYNC);
if (pg_fsync(fd) != 0)
- ereport(ERROR,
+ ereport(data_sync_elevel(ERROR),
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath)));
pgstat_report_wait_end();
@@ -485,7 +485,7 @@ writeTimeLineHistoryFile(TimeLineID tli, char *content, int size)
pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC);
if (pg_fsync(fd) != 0)
- ereport(ERROR,
+ ereport(data_sync_elevel(ERROR),
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath)));
pgstat_report_wait_end();
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 2875fe023af..80616c5f1e7 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -3455,7 +3455,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_SYNC);
if (pg_fsync(fd) != 0)
- ereport(ERROR,
+ ereport(data_sync_elevel(ERROR),
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath)));
pgstat_report_wait_end();
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index a6cd6c67d16..363ddf4505e 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -1629,6 +1629,9 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn)
* fsync the file before renaming so that even if we crash after this we
* have either a fully valid file or nothing.
*
+ * It's safe to just ERROR on fsync() here because we'll retry the whole
+ * operation including the writes.
+ *
* TODO: Do the fsync() via checkpoints/restartpoints, doing it here has
* some noticeable overhead since it's performed synchronously during
* decoding?
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 2d75773ef02..827a1e2620b 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -145,6 +145,8 @@ int max_files_per_process = 1000;
*/
int max_safe_fds = 32; /* default if not changed */
+/* Whether it is safe to continue running after fsync() fails. */
+bool data_sync_retry = false;
/* Debugging.... */
@@ -430,11 +432,9 @@ pg_flush_data(int fd, off_t offset, off_t nbytes)
*/
rc = sync_file_range(fd, offset, nbytes,
SYNC_FILE_RANGE_WRITE);
-
- /* don't error out, this is just a performance optimization */
if (rc != 0)
{
- ereport(WARNING,
+ ereport(data_sync_elevel(WARNING),
(errcode_for_file_access(),
errmsg("could not flush dirty data: %m")));
}
@@ -506,7 +506,7 @@ pg_flush_data(int fd, off_t offset, off_t nbytes)
rc = msync(p, (size_t) nbytes, MS_ASYNC);
if (rc != 0)
{
- ereport(WARNING,
+ ereport(data_sync_elevel(WARNING),
(errcode_for_file_access(),
errmsg("could not flush dirty data: %m")));
/* NB: need to fall through to munmap()! */
@@ -562,7 +562,7 @@ pg_flush_data(int fd, off_t offset, off_t nbytes)
void
fsync_fname(const char *fname, bool isdir)
{
- fsync_fname_ext(fname, isdir, false, ERROR);
+ fsync_fname_ext(fname, isdir, false, data_sync_elevel(ERROR));
}
/*
@@ -1022,7 +1022,8 @@ LruDelete(File file)
* to leak the FD than to mess up our internal state.
*/
if (close(vfdP->fd))
- elog(LOG, "could not close file \"%s\": %m", vfdP->fileName);
+ elog(vfdP->fdstate & FD_TEMP_FILE_LIMIT ? LOG : data_sync_elevel(LOG),
+ "could not close file \"%s\": %m", vfdP->fileName);
vfdP->fd = VFD_CLOSED;
--nfile;
@@ -1698,7 +1699,14 @@ FileClose(File file)
{
/* close the file */
if (close(vfdP->fd))
- elog(LOG, "could not close file \"%s\": %m", vfdP->fileName);
+ {
+ /*
+ * We may need to panic on failure to close non-temporary files;
+ * see LruDelete.
+ */
+ elog(vfdP->fdstate & FD_TEMP_FILE_LIMIT ? LOG : data_sync_elevel(LOG),
+ "could not close file \"%s\": %m", vfdP->fileName);
+ }
--nfile;
vfdP->fd = VFD_CLOSED;
@@ -3091,6 +3099,9 @@ looks_like_temp_rel_name(const char *name)
* harmless cases such as read-only files in the data directory, and that's
* not good either.
*
+ * Note that if we previously crashed due to a PANIC on fsync(), we'll be
+ * rewriting all changes again during recovery.
+ *
* Note we assume we're chdir'd into PGDATA to begin with.
*/
void
@@ -3413,3 +3424,26 @@ MakePGDirectory(const char *directoryName)
{
return mkdir(directoryName, pg_dir_create_mode);
}
+
+/*
+ * Return the passed-in error level, or PANIC if data_sync_retry is off.
+ *
+ * Failure to fsync any data file is cause for immediate panic, unless
+ * data_sync_retry is enabled. Data may have been written to the operating
+ * system and removed from our buffer pool already, and if we are running on
+ * an operating system that forgets dirty data on write-back failure, there
+ * may be only one copy of the data remaining: in the WAL. A later attempt to
+ * fsync again might falsely report success. Therefore we must not allow any
+ * further checkpoints to be attempted. data_sync_retry can in theory be
+ * enabled on systems known not to drop dirty buffered data on write-back
+ * failure (with the likely outcome that checkpoints will continue to fail
+ * until the underlying problem is fixed).
+ *
+ * Any code that reports a failure from fsync() or related functions should
+ * filter the error level with this function.
+ */
+int
+data_sync_elevel(int elevel)
+{
+ return data_sync_retry ? elevel : PANIC;
+}
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 04c1069a60b..4c6a50509f8 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -1012,7 +1012,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum)
MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1];
if (FileSync(v->mdfd_vfd, WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC) < 0)
- ereport(ERROR,
+ ereport(data_sync_elevel(ERROR),
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m",
FilePathName(v->mdfd_vfd))));
@@ -1257,7 +1257,7 @@ mdsync(void)
bms_join(new_requests, requests);
errno = save_errno;
- ereport(ERROR,
+ ereport(data_sync_elevel(ERROR),
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m",
path)));
@@ -1431,7 +1431,7 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
(errmsg("could not forward fsync request because request queue is full")));
if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0)
- ereport(ERROR,
+ ereport(data_sync_elevel(ERROR),
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m",
FilePathName(seg->mdfd_vfd))));
diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c
index 905867dc767..328d4aae7b7 100644
--- a/src/backend/utils/cache/relmapper.c
+++ b/src/backend/utils/cache/relmapper.c
@@ -876,7 +876,7 @@ write_relmap_file(bool shared, RelMapFile *newmap,
*/
pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_SYNC);
if (pg_fsync(fd) != 0)
- ereport(ERROR,
+ ereport(data_sync_elevel(ERROR),
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m",
mapfilename)));
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index f9074215a2d..514595699be 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1830,6 +1830,15 @@ static struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL
},
+ {
+ {"data_sync_retry", PGC_POSTMASTER, ERROR_HANDLING_OPTIONS,
+ gettext_noop("Whether to continue running after a failure to sync data files."),
+ },
+ &data_sync_retry,
+ false,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 3fe257c53f1..ab063dae419 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -666,6 +666,7 @@
#exit_on_error = off # terminate session on any error?
#restart_after_crash = on # reinitialize after backend crash?
+#data_sync_retry = off # retry or panic on failure to fsync data?
#------------------------------------------------------------------------------
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index 1289589a46b..cb882fb74e5 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -47,6 +47,7 @@ typedef int File;
/* GUC parameter */
extern PGDLLIMPORT int max_files_per_process;
+extern PGDLLIMPORT bool data_sync_retry;
/*
* This is private to fd.c, but exported for save/restore_backend_variables()
@@ -134,6 +135,7 @@ extern int durable_rename(const char *oldfile, const char *newfile, int loglevel
extern int durable_unlink(const char *fname, int loglevel);
extern int durable_link_or_rename(const char *oldfile, const char *newfile, int loglevel);
extern void SyncDataDirectory(void);
+extern int data_sync_elevel(int elevel);
/* Filename components */
#define PG_TEMP_FILES_DIR "pgsql_tmp"