diff options
Diffstat (limited to 'src/backend/access/transam/xlog.c')
-rw-r--r-- | src/backend/access/transam/xlog.c | 231 |
1 files changed, 205 insertions, 26 deletions
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 6c2ce3f1bd7..d2d75d652c9 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.58 2001/03/14 20:23:04 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.59 2001/03/16 05:44:33 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -42,6 +42,47 @@ #include "miscadmin.h" +/* + * This chunk of hackery attempts to determine which file sync methods + * are available on the current platform, and to choose an appropriate + * default method. We assume that fsync() is always available, and that + * configure determined whether fdatasync() is. + */ +#define SYNC_METHOD_FSYNC 0 +#define SYNC_METHOD_FDATASYNC 1 +#define SYNC_METHOD_OPEN 2 /* used for both O_SYNC and O_DSYNC */ + +#if defined(O_SYNC) +# define OPEN_SYNC_FLAG O_SYNC +#else +# if defined(O_FSYNC) +# define OPEN_SYNC_FLAG O_FSYNC +# endif +#endif + +#if defined(OPEN_SYNC_FLAG) +# if defined(O_DSYNC) && (O_DSYNC != OPEN_SYNC_FLAG) +# define OPEN_DATASYNC_FLAG O_DSYNC +# endif +#endif + +#if defined(OPEN_DATASYNC_FLAG) +# define DEFAULT_SYNC_METHOD_STR "open_datasync" +# define DEFAULT_SYNC_METHOD SYNC_METHOD_OPEN +# define DEFAULT_SYNC_FLAGBIT OPEN_DATASYNC_FLAG +#else +# if defined(HAVE_FDATASYNC) +# define DEFAULT_SYNC_METHOD_STR "fdatasync" +# define DEFAULT_SYNC_METHOD SYNC_METHOD_FDATASYNC +# define DEFAULT_SYNC_FLAGBIT 0 +# else +# define DEFAULT_SYNC_METHOD_STR "fsync" +# define DEFAULT_SYNC_METHOD SYNC_METHOD_FSYNC +# define DEFAULT_SYNC_FLAGBIT 0 +# endif +#endif + + /* Max time to wait to acquire XLog activity locks */ #define XLOG_LOCK_TIMEOUT (5*60*1000000) /* 5 minutes */ /* Max time to wait to acquire checkpoint lock */ @@ -52,10 +93,18 @@ int CheckPointSegments = 3; int XLOGbuffers = 8; int XLOGfiles = 0; /* how many files to pre-allocate during ckpt */ int XLOG_DEBUG = 0; +char *XLOG_sync_method = NULL; +const char XLOG_sync_method_default[] = DEFAULT_SYNC_METHOD_STR; char XLOG_archive_dir[MAXPGPATH]; /* null string means delete 'em */ +/* these are derived from XLOG_sync_method by assign_xlog_sync_method */ +static int sync_method = DEFAULT_SYNC_METHOD; +static int open_sync_bit = DEFAULT_SYNC_FLAGBIT; + #define MinXLOGbuffers 4 +#define XLOG_SYNC_BIT (enableFsync ? open_sync_bit : 0) + /* * ThisStartUpID will be same in all backends --- it identifies current @@ -365,6 +414,7 @@ static void WriteControlFile(void); static void ReadControlFile(void); static char *str_time(time_t tnow); static void xlog_outrec(char *buf, XLogRecord *record); +static void issue_xlog_fsync(void); /* @@ -917,6 +967,15 @@ XLogWrite(XLogwrtRqst WriteRqst) while (XLByteLT(LogwrtResult.Write, WriteRqst.Write)) { + /* + * Make sure we're not ahead of the insert process. This could + * happen if we're passed a bogus WriteRqst.Write that is past the + * end of the last page that's been initialized by + * AdvanceXLInsertBuffer. + */ + if (!XLByteLT(LogwrtResult.Write, XLogCtl->xlblocks[Write->curridx])) + elog(STOP, "XLogWrite: write request is past end of log"); + /* Advance LogwrtResult.Write to end of current buffer page */ LogwrtResult.Write = XLogCtl->xlblocks[Write->curridx]; ispartialpage = XLByteLT(WriteRqst.Write, LogwrtResult.Write); @@ -1004,9 +1063,7 @@ XLogWrite(XLogwrtRqst WriteRqst) */ if (openLogOff >= XLogSegSize && !ispartialpage) { - if (pg_fdatasync(openLogFile) != 0) - elog(STOP, "fsync(logfile %u seg %u) failed: %m", - openLogId, openLogSeg); + issue_xlog_fsync(); LogwrtResult.Flush = LogwrtResult.Write; /* end of current page */ } @@ -1030,24 +1087,24 @@ XLogWrite(XLogwrtRqst WriteRqst) * we might have no open file or the wrong one. However, we do * not need to fsync more than one file. */ - if (openLogFile >= 0 && - !XLByteInPrevSeg(LogwrtResult.Write, openLogId, openLogSeg)) + if (sync_method != SYNC_METHOD_OPEN) { - if (close(openLogFile) != 0) - elog(STOP, "close(logfile %u seg %u) failed: %m", - openLogId, openLogSeg); - openLogFile = -1; - } - if (openLogFile < 0) - { - XLByteToPrevSeg(LogwrtResult.Write, openLogId, openLogSeg); - openLogFile = XLogFileOpen(openLogId, openLogSeg, false); - openLogOff = 0; + if (openLogFile >= 0 && + !XLByteInPrevSeg(LogwrtResult.Write, openLogId, openLogSeg)) + { + if (close(openLogFile) != 0) + elog(STOP, "close(logfile %u seg %u) failed: %m", + openLogId, openLogSeg); + openLogFile = -1; + } + if (openLogFile < 0) + { + XLByteToPrevSeg(LogwrtResult.Write, openLogId, openLogSeg); + openLogFile = XLogFileOpen(openLogId, openLogSeg, false); + openLogOff = 0; + } + issue_xlog_fsync(); } - - if (pg_fdatasync(openLogFile) != 0) - elog(STOP, "fsync(logfile %u seg %u) failed: %m", - openLogId, openLogSeg); LogwrtResult.Flush = LogwrtResult.Write; } @@ -1191,7 +1248,8 @@ XLogFileInit(uint32 log, uint32 seg, bool *usexistent) */ if (*usexistent) { - fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); + fd = BasicOpenFile(path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT, + S_IRUSR | S_IWUSR); if (fd < 0) { if (errno != ENOENT) @@ -1208,6 +1266,7 @@ XLogFileInit(uint32 log, uint32 seg, bool *usexistent) unlink(tpath); unlink(path); + /* do not use XLOG_SYNC_BIT here --- want to fsync only at end of fill */ fd = BasicOpenFile(tpath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR); if (fd < 0) @@ -1220,8 +1279,8 @@ XLogFileInit(uint32 log, uint32 seg, bool *usexistent) * allow "holes" in files, just seeking to the end doesn't allocate * intermediate space. This way, we know that we have all the space * and (after the fsync below) that all the indirect blocks are down - * on disk. Therefore, fdatasync(2) will be sufficient to sync future - * writes to the log file. + * on disk. Therefore, fdatasync(2) or O_DSYNC will be sufficient to + * sync future writes to the log file. */ MemSet(zbuffer, 0, sizeof(zbuffer)); for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(zbuffer)) @@ -1261,7 +1320,8 @@ XLogFileInit(uint32 log, uint32 seg, bool *usexistent) log, seg); #endif - fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); + fd = BasicOpenFile(path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT, + S_IRUSR | S_IWUSR); if (fd < 0) elog(STOP, "InitReopen(logfile %u seg %u) failed: %m", log, seg); @@ -1280,7 +1340,8 @@ XLogFileOpen(uint32 log, uint32 seg, bool econt) XLogFileName(path, log, seg); - fd = BasicOpenFile(path, O_RDWR | PG_BINARY, S_IRUSR | S_IWUSR); + fd = BasicOpenFile(path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT, + S_IRUSR | S_IWUSR); if (fd < 0) { if (econt && errno == ENOENT) @@ -1845,7 +1906,8 @@ WriteControlFile(void) memset(buffer, 0, BLCKSZ); memcpy(buffer, ControlFile, sizeof(ControlFileData)); - fd = BasicOpenFile(ControlFilePath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, S_IRUSR | S_IWUSR); + fd = BasicOpenFile(ControlFilePath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, + S_IRUSR | S_IWUSR); if (fd < 0) elog(STOP, "WriteControlFile failed to create control file (%s): %m", ControlFilePath); @@ -2852,3 +2914,120 @@ xlog_outrec(char *buf, XLogRecord *record) sprintf(buf + strlen(buf), ": %s", RmgrTable[record->xl_rmid].rm_name); } + + +/* + * GUC support routines + */ + +bool +check_xlog_sync_method(const char *method) +{ + if (strcasecmp(method, "fsync") == 0) return true; +#ifdef HAVE_FDATASYNC + if (strcasecmp(method, "fdatasync") == 0) return true; +#endif +#ifdef OPEN_SYNC_FLAG + if (strcasecmp(method, "open_sync") == 0) return true; +#endif +#ifdef OPEN_DATASYNC_FLAG + if (strcasecmp(method, "open_datasync") == 0) return true; +#endif + return false; +} + +void +assign_xlog_sync_method(const char *method) +{ + int new_sync_method; + int new_sync_bit; + + if (strcasecmp(method, "fsync") == 0) + { + new_sync_method = SYNC_METHOD_FSYNC; + new_sync_bit = 0; + } +#ifdef HAVE_FDATASYNC + else if (strcasecmp(method, "fdatasync") == 0) + { + new_sync_method = SYNC_METHOD_FDATASYNC; + new_sync_bit = 0; + } +#endif +#ifdef OPEN_SYNC_FLAG + else if (strcasecmp(method, "open_sync") == 0) + { + new_sync_method = SYNC_METHOD_OPEN; + new_sync_bit = OPEN_SYNC_FLAG; + } +#endif +#ifdef OPEN_DATASYNC_FLAG + else if (strcasecmp(method, "open_datasync") == 0) + { + new_sync_method = SYNC_METHOD_OPEN; + new_sync_bit = OPEN_DATASYNC_FLAG; + } +#endif + else + { + /* Can't get here unless guc.c screwed up */ + elog(ERROR, "Bogus xlog sync method %s", method); + new_sync_method = 0; /* keep compiler quiet */ + new_sync_bit = 0; + } + + if (sync_method != new_sync_method || open_sync_bit != new_sync_bit) + { + /* + * To ensure that no blocks escape unsynced, force an fsync on + * the currently open log segment (if any). Also, if the open + * flag is changing, close the log file so it will be reopened + * (with new flag bit) at next use. + */ + if (openLogFile >= 0) + { + if (pg_fsync(openLogFile) != 0) + elog(STOP, "fsync(logfile %u seg %u) failed: %m", + openLogId, openLogSeg); + if (open_sync_bit != new_sync_bit) + { + if (close(openLogFile) != 0) + elog(STOP, "close(logfile %u seg %u) failed: %m", + openLogId, openLogSeg); + openLogFile = -1; + } + } + sync_method = new_sync_method; + open_sync_bit = new_sync_bit; + } +} + + +/* + * Issue appropriate kind of fsync (if any) on the current XLOG output file + */ +static void +issue_xlog_fsync(void) +{ + switch (sync_method) + { + case SYNC_METHOD_FSYNC: + if (pg_fsync(openLogFile) != 0) + elog(STOP, "fsync(logfile %u seg %u) failed: %m", + openLogId, openLogSeg); + break; +#ifdef HAVE_FDATASYNC + case SYNC_METHOD_FDATASYNC: + if (pg_fdatasync(openLogFile) != 0) + elog(STOP, "fdatasync(logfile %u seg %u) failed: %m", + openLogId, openLogSeg); + break; +#endif + case SYNC_METHOD_OPEN: + /* write synced it already */ + break; + default: + elog(STOP, "bogus sync_method %d", sync_method); + break; + } +} |