diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/backend/access/heap/rewriteheap.c | 6 | ||||
-rw-r--r-- | src/backend/access/transam/slru.c | 2 | ||||
-rw-r--r-- | src/backend/access/transam/timeline.c | 4 | ||||
-rw-r--r-- | src/backend/access/transam/xlog.c | 2 | ||||
-rw-r--r-- | src/backend/replication/logical/snapbuild.c | 3 | ||||
-rw-r--r-- | src/backend/storage/file/fd.c | 48 | ||||
-rw-r--r-- | src/backend/storage/smgr/md.c | 6 | ||||
-rw-r--r-- | src/backend/utils/cache/relmapper.c | 2 | ||||
-rw-r--r-- | src/backend/utils/misc/guc.c | 9 | ||||
-rw-r--r-- | src/backend/utils/misc/postgresql.conf.sample | 1 | ||||
-rw-r--r-- | src/include/storage/fd.h | 2 |
11 files changed, 67 insertions, 18 deletions
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index c5db75afa1f..d5bd282f8c7 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -978,7 +978,7 @@ logical_end_heap_rewrite(RewriteState state) while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL) { if (FileSync(src->vfd, WAIT_EVENT_LOGICAL_REWRITE_SYNC) != 0) - ereport(ERROR, + ereport(data_sync_elevel(ERROR), (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", src->path))); FileClose(src->vfd); @@ -1199,7 +1199,7 @@ heap_xlog_logical_rewrite(XLogReaderState *r) */ pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_MAPPING_SYNC); if (pg_fsync(fd) != 0) - ereport(ERROR, + ereport(data_sync_elevel(ERROR), (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", path))); pgstat_report_wait_end(); @@ -1298,7 +1298,7 @@ CheckPointLogicalRewriteHeap(void) */ pgstat_report_wait_start(WAIT_EVENT_LOGICAL_REWRITE_CHECKPOINT_SYNC); if (pg_fsync(fd) != 0) - ereport(ERROR, + ereport(data_sync_elevel(ERROR), (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", path))); pgstat_report_wait_end(); diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 1132eef0384..fad5d363e32 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -928,7 +928,7 @@ SlruReportIOError(SlruCtl ctl, int pageno, TransactionId xid) path, offset))); break; case SLRU_FSYNC_FAILED: - ereport(ERROR, + ereport(data_sync_elevel(ERROR), (errcode_for_file_access(), errmsg("could not access status of transaction %u", xid), errdetail("Could not fsync file \"%s\": %m.", diff --git a/src/backend/access/transam/timeline.c b/src/backend/access/transam/timeline.c index 61d36050c34..70eec5676eb 100644 --- a/src/backend/access/transam/timeline.c +++ b/src/backend/access/transam/timeline.c @@ -406,7 +406,7 @@ writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI, pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_SYNC); if (pg_fsync(fd) != 0) - ereport(ERROR, + ereport(data_sync_elevel(ERROR), (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", tmppath))); pgstat_report_wait_end(); @@ -485,7 +485,7 @@ writeTimeLineHistoryFile(TimeLineID tli, char *content, int size) pgstat_report_wait_start(WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC); if (pg_fsync(fd) != 0) - ereport(ERROR, + ereport(data_sync_elevel(ERROR), (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", tmppath))); pgstat_report_wait_end(); diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 2875fe023af..80616c5f1e7 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -3455,7 +3455,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno, pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_SYNC); if (pg_fsync(fd) != 0) - ereport(ERROR, + ereport(data_sync_elevel(ERROR), (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", tmppath))); pgstat_report_wait_end(); diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index a6cd6c67d16..363ddf4505e 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -1629,6 +1629,9 @@ SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn) * fsync the file before renaming so that even if we crash after this we * have either a fully valid file or nothing. * + * It's safe to just ERROR on fsync() here because we'll retry the whole + * operation including the writes. + * * TODO: Do the fsync() via checkpoints/restartpoints, doing it here has * some noticeable overhead since it's performed synchronously during * decoding? diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 2d75773ef02..827a1e2620b 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -145,6 +145,8 @@ int max_files_per_process = 1000; */ int max_safe_fds = 32; /* default if not changed */ +/* Whether it is safe to continue running after fsync() fails. */ +bool data_sync_retry = false; /* Debugging.... */ @@ -430,11 +432,9 @@ pg_flush_data(int fd, off_t offset, off_t nbytes) */ rc = sync_file_range(fd, offset, nbytes, SYNC_FILE_RANGE_WRITE); - - /* don't error out, this is just a performance optimization */ if (rc != 0) { - ereport(WARNING, + ereport(data_sync_elevel(WARNING), (errcode_for_file_access(), errmsg("could not flush dirty data: %m"))); } @@ -506,7 +506,7 @@ pg_flush_data(int fd, off_t offset, off_t nbytes) rc = msync(p, (size_t) nbytes, MS_ASYNC); if (rc != 0) { - ereport(WARNING, + ereport(data_sync_elevel(WARNING), (errcode_for_file_access(), errmsg("could not flush dirty data: %m"))); /* NB: need to fall through to munmap()! */ @@ -562,7 +562,7 @@ pg_flush_data(int fd, off_t offset, off_t nbytes) void fsync_fname(const char *fname, bool isdir) { - fsync_fname_ext(fname, isdir, false, ERROR); + fsync_fname_ext(fname, isdir, false, data_sync_elevel(ERROR)); } /* @@ -1022,7 +1022,8 @@ LruDelete(File file) * to leak the FD than to mess up our internal state. */ if (close(vfdP->fd)) - elog(LOG, "could not close file \"%s\": %m", vfdP->fileName); + elog(vfdP->fdstate & FD_TEMP_FILE_LIMIT ? LOG : data_sync_elevel(LOG), + "could not close file \"%s\": %m", vfdP->fileName); vfdP->fd = VFD_CLOSED; --nfile; @@ -1698,7 +1699,14 @@ FileClose(File file) { /* close the file */ if (close(vfdP->fd)) - elog(LOG, "could not close file \"%s\": %m", vfdP->fileName); + { + /* + * We may need to panic on failure to close non-temporary files; + * see LruDelete. + */ + elog(vfdP->fdstate & FD_TEMP_FILE_LIMIT ? LOG : data_sync_elevel(LOG), + "could not close file \"%s\": %m", vfdP->fileName); + } --nfile; vfdP->fd = VFD_CLOSED; @@ -3091,6 +3099,9 @@ looks_like_temp_rel_name(const char *name) * harmless cases such as read-only files in the data directory, and that's * not good either. * + * Note that if we previously crashed due to a PANIC on fsync(), we'll be + * rewriting all changes again during recovery. + * * Note we assume we're chdir'd into PGDATA to begin with. */ void @@ -3413,3 +3424,26 @@ MakePGDirectory(const char *directoryName) { return mkdir(directoryName, pg_dir_create_mode); } + +/* + * Return the passed-in error level, or PANIC if data_sync_retry is off. + * + * Failure to fsync any data file is cause for immediate panic, unless + * data_sync_retry is enabled. Data may have been written to the operating + * system and removed from our buffer pool already, and if we are running on + * an operating system that forgets dirty data on write-back failure, there + * may be only one copy of the data remaining: in the WAL. A later attempt to + * fsync again might falsely report success. Therefore we must not allow any + * further checkpoints to be attempted. data_sync_retry can in theory be + * enabled on systems known not to drop dirty buffered data on write-back + * failure (with the likely outcome that checkpoints will continue to fail + * until the underlying problem is fixed). + * + * Any code that reports a failure from fsync() or related functions should + * filter the error level with this function. + */ +int +data_sync_elevel(int elevel) +{ + return data_sync_retry ? elevel : PANIC; +} diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 04c1069a60b..4c6a50509f8 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -1012,7 +1012,7 @@ mdimmedsync(SMgrRelation reln, ForkNumber forknum) MdfdVec *v = &reln->md_seg_fds[forknum][segno - 1]; if (FileSync(v->mdfd_vfd, WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC) < 0) - ereport(ERROR, + ereport(data_sync_elevel(ERROR), (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", FilePathName(v->mdfd_vfd)))); @@ -1257,7 +1257,7 @@ mdsync(void) bms_join(new_requests, requests); errno = save_errno; - ereport(ERROR, + ereport(data_sync_elevel(ERROR), (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", path))); @@ -1431,7 +1431,7 @@ register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg) (errmsg("could not forward fsync request because request queue is full"))); if (FileSync(seg->mdfd_vfd, WAIT_EVENT_DATA_FILE_SYNC) < 0) - ereport(ERROR, + ereport(data_sync_elevel(ERROR), (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", FilePathName(seg->mdfd_vfd)))); diff --git a/src/backend/utils/cache/relmapper.c b/src/backend/utils/cache/relmapper.c index 905867dc767..328d4aae7b7 100644 --- a/src/backend/utils/cache/relmapper.c +++ b/src/backend/utils/cache/relmapper.c @@ -876,7 +876,7 @@ write_relmap_file(bool shared, RelMapFile *newmap, */ pgstat_report_wait_start(WAIT_EVENT_RELATION_MAP_SYNC); if (pg_fsync(fd) != 0) - ereport(ERROR, + ereport(data_sync_elevel(ERROR), (errcode_for_file_access(), errmsg("could not fsync file \"%s\": %m", mapfilename))); diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index f9074215a2d..514595699be 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1830,6 +1830,15 @@ static struct config_bool ConfigureNamesBool[] = NULL, NULL, NULL }, + { + {"data_sync_retry", PGC_POSTMASTER, ERROR_HANDLING_OPTIONS, + gettext_noop("Whether to continue running after a failure to sync data files."), + }, + &data_sync_retry, + false, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, false, NULL, NULL, NULL diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 3fe257c53f1..ab063dae419 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -666,6 +666,7 @@ #exit_on_error = off # terminate session on any error? #restart_after_crash = on # reinitialize after backend crash? +#data_sync_retry = off # retry or panic on failure to fsync data? #------------------------------------------------------------------------------ diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index 1289589a46b..cb882fb74e5 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -47,6 +47,7 @@ typedef int File; /* GUC parameter */ extern PGDLLIMPORT int max_files_per_process; +extern PGDLLIMPORT bool data_sync_retry; /* * This is private to fd.c, but exported for save/restore_backend_variables() @@ -134,6 +135,7 @@ extern int durable_rename(const char *oldfile, const char *newfile, int loglevel extern int durable_unlink(const char *fname, int loglevel); extern int durable_link_or_rename(const char *oldfile, const char *newfile, int loglevel); extern void SyncDataDirectory(void); +extern int data_sync_elevel(int elevel); /* Filename components */ #define PG_TEMP_FILES_DIR "pgsql_tmp" |