diff options
Diffstat (limited to 'src/backend/storage/file/fd.c')
-rw-r--r-- | src/backend/storage/file/fd.c | 395 |
1 files changed, 336 insertions, 59 deletions
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index aa2fe2c6c04..2e93e4ad632 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -39,6 +39,14 @@ * for a long time, like relation files. It is the caller's responsibility * to close them, there is no automatic mechanism in fd.c for that. * + * PathName(Create|Open|Delete)Temporary(File|Dir) are used to manage + * temporary files that have names so that they can be shared between + * backends. Such files are automatically closed and count against the + * temporary file limit of the backend that creates them, but unlike anonymous + * files they are not automatically deleted. See sharedfileset.c for a shared + * ownership mechanism that provides automatic cleanup for shared files when + * the last of a group of backends detaches. + * * AllocateFile, AllocateDir, OpenPipeStream and OpenTransientFile are * wrappers around fopen(3), opendir(3), popen(3) and open(2), respectively. * They behave like the corresponding native functions, except that the handle @@ -175,8 +183,9 @@ int max_safe_fds = 32; /* default if not changed */ #define FilePosIsUnknown(pos) ((pos) < 0) /* these are the assigned bits in fdstate below: */ -#define FD_TEMPORARY (1 << 0) /* T = delete when closed */ -#define FD_XACT_TEMPORARY (1 << 1) /* T = delete at eoXact */ +#define FD_DELETE_AT_CLOSE (1 << 0) /* T = delete when closed */ +#define FD_CLOSE_AT_EOXACT (1 << 1) /* T = close at eoXact */ +#define FD_TEMP_FILE_LIMIT (1 << 2) /* T = respect temp_file_limit */ typedef struct vfd { @@ -313,7 +322,7 @@ static struct dirent *ReadDirExtended(DIR *dir, const char *dirname, int elevel) static void AtProcExit_Files(int code, Datum arg); static void CleanupTempFiles(bool isProcExit); -static void RemovePgTempFilesInDir(const char *tmpdirname); +static void RemovePgTempFilesInDir(const char *tmpdirname, bool unlink_all); static void RemovePgTempRelationFiles(const char *tsdirname); static void RemovePgTempRelationFilesInDbspace(const char *dbspacedirname); static bool looks_like_temp_rel_name(const char *name); @@ -326,6 +335,7 @@ static void walkdir(const char *path, static void pre_sync_fname(const char *fname, bool isdir, int elevel); #endif static void datadir_fsync_fname(const char *fname, bool isdir, int elevel); +static void unlink_if_exists_fname(const char *fname, bool isdir, int elevel); static int fsync_fname_ext(const char *fname, bool isdir, bool ignore_perm, int elevel); static int fsync_parent_path(const char *fname, int elevel); @@ -1295,6 +1305,39 @@ FileAccess(File file) } /* + * Called whenever a temporary file is deleted to report its size. + */ +static void +ReportTemporaryFileUsage(const char *path, off_t size) +{ + pgstat_report_tempfile(size); + + if (log_temp_files >= 0) + { + if ((size / 1024) >= log_temp_files) + ereport(LOG, + (errmsg("temporary file: path \"%s\", size %lu", + path, (unsigned long) size))); + } +} + +/* + * Called to register a temporary file for automatic close. + * ResourceOwnerEnlargeFiles(CurrentResourceOwner) must have been called + * before the file was opened. + */ +static void +RegisterTemporaryFile(File file) +{ + ResourceOwnerRememberFile(CurrentResourceOwner, file); + VfdCache[file].resowner = CurrentResourceOwner; + + /* Backup mechanism for closing at end of xact. */ + VfdCache[file].fdstate |= FD_CLOSE_AT_EOXACT; + have_xact_temporary_files = true; +} + +/* * Called when we get a shared invalidation message on some relation. */ #ifdef NOT_USED @@ -1379,6 +1422,67 @@ PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode) } /* + * Create directory 'directory'. If necessary, create 'basedir', which must + * be the directory above it. This is designed for creating the top-level + * temporary directory on demand before creating a directory underneath it. + * Do nothing if the directory already exists. + * + * Directories created within the top-level temporary directory should begin + * with PG_TEMP_FILE_PREFIX, so that they can be identified as temporary and + * deleted at startup by RemovePgTempFiles(). Further subdirectories below + * that do not need any particular prefix. +*/ +void +PathNameCreateTemporaryDir(const char *basedir, const char *directory) +{ + if (mkdir(directory, S_IRWXU) < 0) + { + if (errno == EEXIST) + return; + + /* + * Failed. Try to create basedir first in case it's missing. Tolerate + * EEXIST to close a race against another process following the same + * algorithm. + */ + if (mkdir(basedir, S_IRWXU) < 0 && errno != EEXIST) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("cannot create temporary directory \"%s\": %m", + basedir))); + + /* Try again. */ + if (mkdir(directory, S_IRWXU) < 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("cannot create temporary subdirectory \"%s\": %m", + directory))); + } +} + +/* + * Delete a directory and everything in it, if it exists. + */ +void +PathNameDeleteTemporaryDir(const char *dirname) +{ + struct stat statbuf; + + /* Silently ignore missing directory. */ + if (stat(dirname, &statbuf) != 0 && errno == ENOENT) + return; + + /* + * Currently, walkdir doesn't offer a way for our passed in function to + * maintain state. Perhaps it should, so that we could tell the caller + * whether this operation succeeded or failed. Since this operation is + * used in a cleanup path, we wouldn't actually behave differently: we'll + * just log failures. + */ + walkdir(dirname, unlink_if_exists_fname, false, LOG); +} + +/* * Open a temporary file that will disappear when we close it. * * This routine takes care of generating an appropriate tempfile name. @@ -1432,53 +1536,52 @@ OpenTemporaryFile(bool interXact) DEFAULTTABLESPACE_OID, true); - /* Mark it for deletion at close */ - VfdCache[file].fdstate |= FD_TEMPORARY; + /* Mark it for deletion at close and temporary file size limit */ + VfdCache[file].fdstate |= FD_DELETE_AT_CLOSE | FD_TEMP_FILE_LIMIT; /* Register it with the current resource owner */ if (!interXact) - { - VfdCache[file].fdstate |= FD_XACT_TEMPORARY; - - VfdCache[file].resowner = CurrentResourceOwner; - ResourceOwnerRememberFile(CurrentResourceOwner, file); - - /* ensure cleanup happens at eoxact */ - have_xact_temporary_files = true; - } + RegisterTemporaryFile(file); return file; } /* - * Open a temporary file in a specific tablespace. - * Subroutine for OpenTemporaryFile, which see for details. + * Return the path of the temp directory in a given tablespace. */ -static File -OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError) +void +TempTablespacePath(char *path, Oid tablespace) { - char tempdirpath[MAXPGPATH]; - char tempfilepath[MAXPGPATH]; - File file; - /* * Identify the tempfile directory for this tablespace. * * If someone tries to specify pg_global, use pg_default instead. */ - if (tblspcOid == DEFAULTTABLESPACE_OID || - tblspcOid == GLOBALTABLESPACE_OID) - { - /* The default tablespace is {datadir}/base */ - snprintf(tempdirpath, sizeof(tempdirpath), "base/%s", - PG_TEMP_FILES_DIR); - } + if (tablespace == InvalidOid || + tablespace == DEFAULTTABLESPACE_OID || + tablespace == GLOBALTABLESPACE_OID) + snprintf(path, MAXPGPATH, "base/%s", PG_TEMP_FILES_DIR); else { /* All other tablespaces are accessed via symlinks */ - snprintf(tempdirpath, sizeof(tempdirpath), "pg_tblspc/%u/%s/%s", - tblspcOid, TABLESPACE_VERSION_DIRECTORY, PG_TEMP_FILES_DIR); + snprintf(path, MAXPGPATH, "pg_tblspc/%u/%s/%s", + tablespace, TABLESPACE_VERSION_DIRECTORY, + PG_TEMP_FILES_DIR); } +} + +/* + * Open a temporary file in a specific tablespace. + * Subroutine for OpenTemporaryFile, which see for details. + */ +static File +OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError) +{ + char tempdirpath[MAXPGPATH]; + char tempfilepath[MAXPGPATH]; + File file; + + TempTablespacePath(tempdirpath, tblspcOid); /* * Generate a tempfile name that should be unique within the current @@ -1515,6 +1618,130 @@ OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError) return file; } + +/* + * Create a new file. The directory containing it must already exist. Files + * created this way are subject to temp_file_limit and are automatically + * closed at end of transaction, but are not automatically deleted on close + * because they are intended to be shared between cooperating backends. + * + * If the file is inside the top-level temporary directory, its name should + * begin with PG_TEMP_FILE_PREFIX so that it can be identified as temporary + * and deleted at startup by RemovePgTempFiles(). Alternatively, it can be + * inside a directory created with PathnameCreateTemporaryDir(), in which case + * the prefix isn't needed. + */ +File +PathNameCreateTemporaryFile(const char *path, bool error_on_failure) +{ + File file; + + ResourceOwnerEnlargeFiles(CurrentResourceOwner); + + /* + * Open the file. Note: we don't use O_EXCL, in case there is an orphaned + * temp file that can be reused. + */ + file = PathNameOpenFile(path, O_RDWR | O_CREAT | O_TRUNC | PG_BINARY); + if (file <= 0) + { + if (error_on_failure) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not create temporary file \"%s\": %m", + path))); + else + return file; + } + + /* Mark it for temp_file_limit accounting. */ + VfdCache[file].fdstate |= FD_TEMP_FILE_LIMIT; + + /* Register it for automatic close. */ + RegisterTemporaryFile(file); + + return file; +} + +/* + * Open a file that was created with PathNameCreateTemporaryFile, possibly in + * another backend. Files opened this way don't count against the + * temp_file_limit of the caller, are read-only and are automatically closed + * at the end of the transaction but are not deleted on close. + */ +File +PathNameOpenTemporaryFile(const char *path) +{ + File file; + + ResourceOwnerEnlargeFiles(CurrentResourceOwner); + + /* We open the file read-only. */ + file = PathNameOpenFile(path, O_RDONLY | PG_BINARY); + + /* If no such file, then we don't raise an error. */ + if (file <= 0 && errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not open temporary file \"%s\": %m", + path))); + + if (file > 0) + { + /* Register it for automatic close. */ + RegisterTemporaryFile(file); + } + + return file; +} + +/* + * Delete a file by pathname. Return true if the file existed, false if + * didn't. + */ +bool +PathNameDeleteTemporaryFile(const char *path, bool error_on_failure) +{ + struct stat filestats; + int stat_errno; + + /* Get the final size for pgstat reporting. */ + if (stat(path, &filestats) != 0) + stat_errno = errno; + else + stat_errno = 0; + + /* + * Unlike FileClose's automatic file deletion code, we tolerate + * non-existence to support BufFileDeleteShared which doesn't know how + * many segments it has to delete until it runs out. + */ + if (stat_errno == ENOENT) + return false; + + if (unlink(path) < 0) + { + if (errno != ENOENT) + ereport(error_on_failure ? ERROR : LOG, + (errcode_for_file_access(), + errmsg("cannot unlink temporary file \"%s\": %m", + path))); + return false; + } + + if (stat_errno == 0) + ReportTemporaryFileUsage(path, filestats.st_size); + else + { + errno = stat_errno; + ereport(LOG, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", path))); + } + + return true; +} + /* * close a file when done with it */ @@ -1543,10 +1770,17 @@ FileClose(File file) Delete(file); } + if (vfdP->fdstate & FD_TEMP_FILE_LIMIT) + { + /* Subtract its size from current usage (do first in case of error) */ + temporary_files_size -= vfdP->fileSize; + vfdP->fileSize = 0; + } + /* * Delete the file if it was temporary, and make a log entry if wanted */ - if (vfdP->fdstate & FD_TEMPORARY) + if (vfdP->fdstate & FD_DELETE_AT_CLOSE) { struct stat filestats; int stat_errno; @@ -1558,11 +1792,8 @@ FileClose(File file) * is arranged to ensure that the worst-case consequence is failing to * emit log message(s), not failing to attempt the unlink. */ - vfdP->fdstate &= ~FD_TEMPORARY; + vfdP->fdstate &= ~FD_DELETE_AT_CLOSE; - /* Subtract its size from current usage (do first in case of error) */ - temporary_files_size -= vfdP->fileSize; - vfdP->fileSize = 0; /* first try the stat() */ if (stat(vfdP->fileName, &filestats)) @@ -1576,18 +1807,7 @@ FileClose(File file) /* and last report the stat results */ if (stat_errno == 0) - { - pgstat_report_tempfile(filestats.st_size); - - if (log_temp_files >= 0) - { - if ((filestats.st_size / 1024) >= log_temp_files) - ereport(LOG, - (errmsg("temporary file: path \"%s\", size %lu", - vfdP->fileName, - (unsigned long) filestats.st_size))); - } - } + ReportTemporaryFileUsage(vfdP->fileName, filestats.st_size); else { errno = stat_errno; @@ -1761,7 +1981,7 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info) * message if we do that. All current callers would just throw error * immediately anyway, so this is safe at present. */ - if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMPORARY)) + if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMP_FILE_LIMIT)) { off_t newPos; @@ -1814,7 +2034,7 @@ retry: * get here in that state if we're not enforcing temporary_files_size, * so we don't care. */ - if (vfdP->fdstate & FD_TEMPORARY) + if (vfdP->fdstate & FD_TEMP_FILE_LIMIT) { off_t newPos = vfdP->seekPos; @@ -1985,7 +2205,7 @@ FileTruncate(File file, off_t offset, uint32 wait_event_info) if (returnCode == 0 && VfdCache[file].fileSize > offset) { /* adjust our state for truncation of a temp file */ - Assert(VfdCache[file].fdstate & FD_TEMPORARY); + Assert(VfdCache[file].fdstate & FD_TEMP_FILE_LIMIT); temporary_files_size -= VfdCache[file].fileSize - offset; VfdCache[file].fileSize = offset; } @@ -2594,6 +2814,24 @@ TempTablespacesAreSet(void) } /* + * GetTempTablespaces + * + * Populate an array with the OIDs of the tablespaces that should be used for + * temporary files. Return the number that were copied into the output array. + */ +int +GetTempTablespaces(Oid *tableSpaces, int numSpaces) +{ + int i; + + Assert(TempTablespacesAreSet()); + for (i = 0; i < numTempTableSpaces && i < numSpaces; ++i) + tableSpaces[i] = tempTableSpaces[i]; + + return i; +} + +/* * GetNextTempTableSpace * * Select the next temp tablespace to use. A result of InvalidOid means @@ -2696,7 +2934,8 @@ CleanupTempFiles(bool isProcExit) { unsigned short fdstate = VfdCache[i].fdstate; - if ((fdstate & FD_TEMPORARY) && VfdCache[i].fileName != NULL) + if (((fdstate & FD_DELETE_AT_CLOSE) || (fdstate & FD_CLOSE_AT_EOXACT)) && + VfdCache[i].fileName != NULL) { /* * If we're in the process of exiting a backend process, close @@ -2707,7 +2946,7 @@ CleanupTempFiles(bool isProcExit) */ if (isProcExit) FileClose(i); - else if (fdstate & FD_XACT_TEMPORARY) + else if (fdstate & FD_CLOSE_AT_EOXACT) { elog(WARNING, "temporary file %s not closed at end-of-transaction", @@ -2751,7 +2990,7 @@ RemovePgTempFiles(void) * First process temp files in pg_default ($PGDATA/base) */ snprintf(temp_path, sizeof(temp_path), "base/%s", PG_TEMP_FILES_DIR); - RemovePgTempFilesInDir(temp_path); + RemovePgTempFilesInDir(temp_path, false); RemovePgTempRelationFiles("base"); /* @@ -2767,7 +3006,7 @@ RemovePgTempFiles(void) snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s/%s", spc_de->d_name, TABLESPACE_VERSION_DIRECTORY, PG_TEMP_FILES_DIR); - RemovePgTempFilesInDir(temp_path); + RemovePgTempFilesInDir(temp_path, false); snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s", spc_de->d_name, TABLESPACE_VERSION_DIRECTORY); @@ -2785,9 +3024,15 @@ RemovePgTempFiles(void) #endif } -/* Process one pgsql_tmp directory for RemovePgTempFiles */ +/* + * Process one pgsql_tmp directory for RemovePgTempFiles. At the top level in + * each tablespace, this should be called with unlink_all = false, so that + * only files matching the temporary name prefix will be unlinked. When + * recursing it will be called with unlink_all = true to unlink everything + * under a top-level temporary directory. + */ static void -RemovePgTempFilesInDir(const char *tmpdirname) +RemovePgTempFilesInDir(const char *tmpdirname, bool unlink_all) { DIR *temp_dir; struct dirent *temp_de; @@ -2813,10 +3058,25 @@ RemovePgTempFilesInDir(const char *tmpdirname) snprintf(rm_path, sizeof(rm_path), "%s/%s", tmpdirname, temp_de->d_name); - if (strncmp(temp_de->d_name, + if (unlink_all || + strncmp(temp_de->d_name, PG_TEMP_FILE_PREFIX, strlen(PG_TEMP_FILE_PREFIX)) == 0) - unlink(rm_path); /* note we ignore any error */ + { + struct stat statbuf; + + /* note that we ignore any error here and below */ + if (lstat(rm_path, &statbuf) < 0) + continue; + + if (S_ISDIR(statbuf.st_mode)) + { + RemovePgTempFilesInDir(rm_path, true); + rmdir(rm_path); + } + else + unlink(rm_path); + } else elog(LOG, "unexpected file found in temporary-files directory: \"%s\"", @@ -3152,6 +3412,23 @@ datadir_fsync_fname(const char *fname, bool isdir, int elevel) fsync_fname_ext(fname, isdir, true, elevel); } +static void +unlink_if_exists_fname(const char *fname, bool isdir, int elevel) +{ + if (isdir) + { + if (rmdir(fname) != 0 && errno != ENOENT) + ereport(elevel, + (errcode_for_file_access(), + errmsg("could not rmdir directory \"%s\": %m", fname))); + } + else + { + /* Use PathNameDeleteTemporaryFile to report filesize */ + PathNameDeleteTemporaryFile(fname, false); + } +} + /* * fsync_fname_ext -- Try to fsync a file or directory * |