aboutsummaryrefslogtreecommitdiff
path: root/src/backend/storage/file/fd.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/storage/file/fd.c')
-rw-r--r--src/backend/storage/file/fd.c395
1 files changed, 336 insertions, 59 deletions
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index aa2fe2c6c04..2e93e4ad632 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -39,6 +39,14 @@
* for a long time, like relation files. It is the caller's responsibility
* to close them, there is no automatic mechanism in fd.c for that.
*
+ * PathName(Create|Open|Delete)Temporary(File|Dir) are used to manage
+ * temporary files that have names so that they can be shared between
+ * backends. Such files are automatically closed and count against the
+ * temporary file limit of the backend that creates them, but unlike anonymous
+ * files they are not automatically deleted. See sharedfileset.c for a shared
+ * ownership mechanism that provides automatic cleanup for shared files when
+ * the last of a group of backends detaches.
+ *
* AllocateFile, AllocateDir, OpenPipeStream and OpenTransientFile are
* wrappers around fopen(3), opendir(3), popen(3) and open(2), respectively.
* They behave like the corresponding native functions, except that the handle
@@ -175,8 +183,9 @@ int max_safe_fds = 32; /* default if not changed */
#define FilePosIsUnknown(pos) ((pos) < 0)
/* these are the assigned bits in fdstate below: */
-#define FD_TEMPORARY (1 << 0) /* T = delete when closed */
-#define FD_XACT_TEMPORARY (1 << 1) /* T = delete at eoXact */
+#define FD_DELETE_AT_CLOSE (1 << 0) /* T = delete when closed */
+#define FD_CLOSE_AT_EOXACT (1 << 1) /* T = close at eoXact */
+#define FD_TEMP_FILE_LIMIT (1 << 2) /* T = respect temp_file_limit */
typedef struct vfd
{
@@ -313,7 +322,7 @@ static struct dirent *ReadDirExtended(DIR *dir, const char *dirname, int elevel)
static void AtProcExit_Files(int code, Datum arg);
static void CleanupTempFiles(bool isProcExit);
-static void RemovePgTempFilesInDir(const char *tmpdirname);
+static void RemovePgTempFilesInDir(const char *tmpdirname, bool unlink_all);
static void RemovePgTempRelationFiles(const char *tsdirname);
static void RemovePgTempRelationFilesInDbspace(const char *dbspacedirname);
static bool looks_like_temp_rel_name(const char *name);
@@ -326,6 +335,7 @@ static void walkdir(const char *path,
static void pre_sync_fname(const char *fname, bool isdir, int elevel);
#endif
static void datadir_fsync_fname(const char *fname, bool isdir, int elevel);
+static void unlink_if_exists_fname(const char *fname, bool isdir, int elevel);
static int fsync_fname_ext(const char *fname, bool isdir, bool ignore_perm, int elevel);
static int fsync_parent_path(const char *fname, int elevel);
@@ -1295,6 +1305,39 @@ FileAccess(File file)
}
/*
+ * Called whenever a temporary file is deleted to report its size.
+ */
+static void
+ReportTemporaryFileUsage(const char *path, off_t size)
+{
+ pgstat_report_tempfile(size);
+
+ if (log_temp_files >= 0)
+ {
+ if ((size / 1024) >= log_temp_files)
+ ereport(LOG,
+ (errmsg("temporary file: path \"%s\", size %lu",
+ path, (unsigned long) size)));
+ }
+}
+
+/*
+ * Called to register a temporary file for automatic close.
+ * ResourceOwnerEnlargeFiles(CurrentResourceOwner) must have been called
+ * before the file was opened.
+ */
+static void
+RegisterTemporaryFile(File file)
+{
+ ResourceOwnerRememberFile(CurrentResourceOwner, file);
+ VfdCache[file].resowner = CurrentResourceOwner;
+
+ /* Backup mechanism for closing at end of xact. */
+ VfdCache[file].fdstate |= FD_CLOSE_AT_EOXACT;
+ have_xact_temporary_files = true;
+}
+
+/*
* Called when we get a shared invalidation message on some relation.
*/
#ifdef NOT_USED
@@ -1379,6 +1422,67 @@ PathNameOpenFilePerm(const char *fileName, int fileFlags, mode_t fileMode)
}
/*
+ * Create directory 'directory'. If necessary, create 'basedir', which must
+ * be the directory above it. This is designed for creating the top-level
+ * temporary directory on demand before creating a directory underneath it.
+ * Do nothing if the directory already exists.
+ *
+ * Directories created within the top-level temporary directory should begin
+ * with PG_TEMP_FILE_PREFIX, so that they can be identified as temporary and
+ * deleted at startup by RemovePgTempFiles(). Further subdirectories below
+ * that do not need any particular prefix.
+*/
+void
+PathNameCreateTemporaryDir(const char *basedir, const char *directory)
+{
+ if (mkdir(directory, S_IRWXU) < 0)
+ {
+ if (errno == EEXIST)
+ return;
+
+ /*
+ * Failed. Try to create basedir first in case it's missing. Tolerate
+ * EEXIST to close a race against another process following the same
+ * algorithm.
+ */
+ if (mkdir(basedir, S_IRWXU) < 0 && errno != EEXIST)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("cannot create temporary directory \"%s\": %m",
+ basedir)));
+
+ /* Try again. */
+ if (mkdir(directory, S_IRWXU) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("cannot create temporary subdirectory \"%s\": %m",
+ directory)));
+ }
+}
+
+/*
+ * Delete a directory and everything in it, if it exists.
+ */
+void
+PathNameDeleteTemporaryDir(const char *dirname)
+{
+ struct stat statbuf;
+
+ /* Silently ignore missing directory. */
+ if (stat(dirname, &statbuf) != 0 && errno == ENOENT)
+ return;
+
+ /*
+ * Currently, walkdir doesn't offer a way for our passed in function to
+ * maintain state. Perhaps it should, so that we could tell the caller
+ * whether this operation succeeded or failed. Since this operation is
+ * used in a cleanup path, we wouldn't actually behave differently: we'll
+ * just log failures.
+ */
+ walkdir(dirname, unlink_if_exists_fname, false, LOG);
+}
+
+/*
* Open a temporary file that will disappear when we close it.
*
* This routine takes care of generating an appropriate tempfile name.
@@ -1432,53 +1536,52 @@ OpenTemporaryFile(bool interXact)
DEFAULTTABLESPACE_OID,
true);
- /* Mark it for deletion at close */
- VfdCache[file].fdstate |= FD_TEMPORARY;
+ /* Mark it for deletion at close and temporary file size limit */
+ VfdCache[file].fdstate |= FD_DELETE_AT_CLOSE | FD_TEMP_FILE_LIMIT;
/* Register it with the current resource owner */
if (!interXact)
- {
- VfdCache[file].fdstate |= FD_XACT_TEMPORARY;
-
- VfdCache[file].resowner = CurrentResourceOwner;
- ResourceOwnerRememberFile(CurrentResourceOwner, file);
-
- /* ensure cleanup happens at eoxact */
- have_xact_temporary_files = true;
- }
+ RegisterTemporaryFile(file);
return file;
}
/*
- * Open a temporary file in a specific tablespace.
- * Subroutine for OpenTemporaryFile, which see for details.
+ * Return the path of the temp directory in a given tablespace.
*/
-static File
-OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError)
+void
+TempTablespacePath(char *path, Oid tablespace)
{
- char tempdirpath[MAXPGPATH];
- char tempfilepath[MAXPGPATH];
- File file;
-
/*
* Identify the tempfile directory for this tablespace.
*
* If someone tries to specify pg_global, use pg_default instead.
*/
- if (tblspcOid == DEFAULTTABLESPACE_OID ||
- tblspcOid == GLOBALTABLESPACE_OID)
- {
- /* The default tablespace is {datadir}/base */
- snprintf(tempdirpath, sizeof(tempdirpath), "base/%s",
- PG_TEMP_FILES_DIR);
- }
+ if (tablespace == InvalidOid ||
+ tablespace == DEFAULTTABLESPACE_OID ||
+ tablespace == GLOBALTABLESPACE_OID)
+ snprintf(path, MAXPGPATH, "base/%s", PG_TEMP_FILES_DIR);
else
{
/* All other tablespaces are accessed via symlinks */
- snprintf(tempdirpath, sizeof(tempdirpath), "pg_tblspc/%u/%s/%s",
- tblspcOid, TABLESPACE_VERSION_DIRECTORY, PG_TEMP_FILES_DIR);
+ snprintf(path, MAXPGPATH, "pg_tblspc/%u/%s/%s",
+ tablespace, TABLESPACE_VERSION_DIRECTORY,
+ PG_TEMP_FILES_DIR);
}
+}
+
+/*
+ * Open a temporary file in a specific tablespace.
+ * Subroutine for OpenTemporaryFile, which see for details.
+ */
+static File
+OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError)
+{
+ char tempdirpath[MAXPGPATH];
+ char tempfilepath[MAXPGPATH];
+ File file;
+
+ TempTablespacePath(tempdirpath, tblspcOid);
/*
* Generate a tempfile name that should be unique within the current
@@ -1515,6 +1618,130 @@ OpenTemporaryFileInTablespace(Oid tblspcOid, bool rejectError)
return file;
}
+
+/*
+ * Create a new file. The directory containing it must already exist. Files
+ * created this way are subject to temp_file_limit and are automatically
+ * closed at end of transaction, but are not automatically deleted on close
+ * because they are intended to be shared between cooperating backends.
+ *
+ * If the file is inside the top-level temporary directory, its name should
+ * begin with PG_TEMP_FILE_PREFIX so that it can be identified as temporary
+ * and deleted at startup by RemovePgTempFiles(). Alternatively, it can be
+ * inside a directory created with PathnameCreateTemporaryDir(), in which case
+ * the prefix isn't needed.
+ */
+File
+PathNameCreateTemporaryFile(const char *path, bool error_on_failure)
+{
+ File file;
+
+ ResourceOwnerEnlargeFiles(CurrentResourceOwner);
+
+ /*
+ * Open the file. Note: we don't use O_EXCL, in case there is an orphaned
+ * temp file that can be reused.
+ */
+ file = PathNameOpenFile(path, O_RDWR | O_CREAT | O_TRUNC | PG_BINARY);
+ if (file <= 0)
+ {
+ if (error_on_failure)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create temporary file \"%s\": %m",
+ path)));
+ else
+ return file;
+ }
+
+ /* Mark it for temp_file_limit accounting. */
+ VfdCache[file].fdstate |= FD_TEMP_FILE_LIMIT;
+
+ /* Register it for automatic close. */
+ RegisterTemporaryFile(file);
+
+ return file;
+}
+
+/*
+ * Open a file that was created with PathNameCreateTemporaryFile, possibly in
+ * another backend. Files opened this way don't count against the
+ * temp_file_limit of the caller, are read-only and are automatically closed
+ * at the end of the transaction but are not deleted on close.
+ */
+File
+PathNameOpenTemporaryFile(const char *path)
+{
+ File file;
+
+ ResourceOwnerEnlargeFiles(CurrentResourceOwner);
+
+ /* We open the file read-only. */
+ file = PathNameOpenFile(path, O_RDONLY | PG_BINARY);
+
+ /* If no such file, then we don't raise an error. */
+ if (file <= 0 && errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open temporary file \"%s\": %m",
+ path)));
+
+ if (file > 0)
+ {
+ /* Register it for automatic close. */
+ RegisterTemporaryFile(file);
+ }
+
+ return file;
+}
+
+/*
+ * Delete a file by pathname. Return true if the file existed, false if
+ * didn't.
+ */
+bool
+PathNameDeleteTemporaryFile(const char *path, bool error_on_failure)
+{
+ struct stat filestats;
+ int stat_errno;
+
+ /* Get the final size for pgstat reporting. */
+ if (stat(path, &filestats) != 0)
+ stat_errno = errno;
+ else
+ stat_errno = 0;
+
+ /*
+ * Unlike FileClose's automatic file deletion code, we tolerate
+ * non-existence to support BufFileDeleteShared which doesn't know how
+ * many segments it has to delete until it runs out.
+ */
+ if (stat_errno == ENOENT)
+ return false;
+
+ if (unlink(path) < 0)
+ {
+ if (errno != ENOENT)
+ ereport(error_on_failure ? ERROR : LOG,
+ (errcode_for_file_access(),
+ errmsg("cannot unlink temporary file \"%s\": %m",
+ path)));
+ return false;
+ }
+
+ if (stat_errno == 0)
+ ReportTemporaryFileUsage(path, filestats.st_size);
+ else
+ {
+ errno = stat_errno;
+ ereport(LOG,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m", path)));
+ }
+
+ return true;
+}
+
/*
* close a file when done with it
*/
@@ -1543,10 +1770,17 @@ FileClose(File file)
Delete(file);
}
+ if (vfdP->fdstate & FD_TEMP_FILE_LIMIT)
+ {
+ /* Subtract its size from current usage (do first in case of error) */
+ temporary_files_size -= vfdP->fileSize;
+ vfdP->fileSize = 0;
+ }
+
/*
* Delete the file if it was temporary, and make a log entry if wanted
*/
- if (vfdP->fdstate & FD_TEMPORARY)
+ if (vfdP->fdstate & FD_DELETE_AT_CLOSE)
{
struct stat filestats;
int stat_errno;
@@ -1558,11 +1792,8 @@ FileClose(File file)
* is arranged to ensure that the worst-case consequence is failing to
* emit log message(s), not failing to attempt the unlink.
*/
- vfdP->fdstate &= ~FD_TEMPORARY;
+ vfdP->fdstate &= ~FD_DELETE_AT_CLOSE;
- /* Subtract its size from current usage (do first in case of error) */
- temporary_files_size -= vfdP->fileSize;
- vfdP->fileSize = 0;
/* first try the stat() */
if (stat(vfdP->fileName, &filestats))
@@ -1576,18 +1807,7 @@ FileClose(File file)
/* and last report the stat results */
if (stat_errno == 0)
- {
- pgstat_report_tempfile(filestats.st_size);
-
- if (log_temp_files >= 0)
- {
- if ((filestats.st_size / 1024) >= log_temp_files)
- ereport(LOG,
- (errmsg("temporary file: path \"%s\", size %lu",
- vfdP->fileName,
- (unsigned long) filestats.st_size)));
- }
- }
+ ReportTemporaryFileUsage(vfdP->fileName, filestats.st_size);
else
{
errno = stat_errno;
@@ -1761,7 +1981,7 @@ FileWrite(File file, char *buffer, int amount, uint32 wait_event_info)
* message if we do that. All current callers would just throw error
* immediately anyway, so this is safe at present.
*/
- if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMPORARY))
+ if (temp_file_limit >= 0 && (vfdP->fdstate & FD_TEMP_FILE_LIMIT))
{
off_t newPos;
@@ -1814,7 +2034,7 @@ retry:
* get here in that state if we're not enforcing temporary_files_size,
* so we don't care.
*/
- if (vfdP->fdstate & FD_TEMPORARY)
+ if (vfdP->fdstate & FD_TEMP_FILE_LIMIT)
{
off_t newPos = vfdP->seekPos;
@@ -1985,7 +2205,7 @@ FileTruncate(File file, off_t offset, uint32 wait_event_info)
if (returnCode == 0 && VfdCache[file].fileSize > offset)
{
/* adjust our state for truncation of a temp file */
- Assert(VfdCache[file].fdstate & FD_TEMPORARY);
+ Assert(VfdCache[file].fdstate & FD_TEMP_FILE_LIMIT);
temporary_files_size -= VfdCache[file].fileSize - offset;
VfdCache[file].fileSize = offset;
}
@@ -2594,6 +2814,24 @@ TempTablespacesAreSet(void)
}
/*
+ * GetTempTablespaces
+ *
+ * Populate an array with the OIDs of the tablespaces that should be used for
+ * temporary files. Return the number that were copied into the output array.
+ */
+int
+GetTempTablespaces(Oid *tableSpaces, int numSpaces)
+{
+ int i;
+
+ Assert(TempTablespacesAreSet());
+ for (i = 0; i < numTempTableSpaces && i < numSpaces; ++i)
+ tableSpaces[i] = tempTableSpaces[i];
+
+ return i;
+}
+
+/*
* GetNextTempTableSpace
*
* Select the next temp tablespace to use. A result of InvalidOid means
@@ -2696,7 +2934,8 @@ CleanupTempFiles(bool isProcExit)
{
unsigned short fdstate = VfdCache[i].fdstate;
- if ((fdstate & FD_TEMPORARY) && VfdCache[i].fileName != NULL)
+ if (((fdstate & FD_DELETE_AT_CLOSE) || (fdstate & FD_CLOSE_AT_EOXACT)) &&
+ VfdCache[i].fileName != NULL)
{
/*
* If we're in the process of exiting a backend process, close
@@ -2707,7 +2946,7 @@ CleanupTempFiles(bool isProcExit)
*/
if (isProcExit)
FileClose(i);
- else if (fdstate & FD_XACT_TEMPORARY)
+ else if (fdstate & FD_CLOSE_AT_EOXACT)
{
elog(WARNING,
"temporary file %s not closed at end-of-transaction",
@@ -2751,7 +2990,7 @@ RemovePgTempFiles(void)
* First process temp files in pg_default ($PGDATA/base)
*/
snprintf(temp_path, sizeof(temp_path), "base/%s", PG_TEMP_FILES_DIR);
- RemovePgTempFilesInDir(temp_path);
+ RemovePgTempFilesInDir(temp_path, false);
RemovePgTempRelationFiles("base");
/*
@@ -2767,7 +3006,7 @@ RemovePgTempFiles(void)
snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s/%s",
spc_de->d_name, TABLESPACE_VERSION_DIRECTORY, PG_TEMP_FILES_DIR);
- RemovePgTempFilesInDir(temp_path);
+ RemovePgTempFilesInDir(temp_path, false);
snprintf(temp_path, sizeof(temp_path), "pg_tblspc/%s/%s",
spc_de->d_name, TABLESPACE_VERSION_DIRECTORY);
@@ -2785,9 +3024,15 @@ RemovePgTempFiles(void)
#endif
}
-/* Process one pgsql_tmp directory for RemovePgTempFiles */
+/*
+ * Process one pgsql_tmp directory for RemovePgTempFiles. At the top level in
+ * each tablespace, this should be called with unlink_all = false, so that
+ * only files matching the temporary name prefix will be unlinked. When
+ * recursing it will be called with unlink_all = true to unlink everything
+ * under a top-level temporary directory.
+ */
static void
-RemovePgTempFilesInDir(const char *tmpdirname)
+RemovePgTempFilesInDir(const char *tmpdirname, bool unlink_all)
{
DIR *temp_dir;
struct dirent *temp_de;
@@ -2813,10 +3058,25 @@ RemovePgTempFilesInDir(const char *tmpdirname)
snprintf(rm_path, sizeof(rm_path), "%s/%s",
tmpdirname, temp_de->d_name);
- if (strncmp(temp_de->d_name,
+ if (unlink_all ||
+ strncmp(temp_de->d_name,
PG_TEMP_FILE_PREFIX,
strlen(PG_TEMP_FILE_PREFIX)) == 0)
- unlink(rm_path); /* note we ignore any error */
+ {
+ struct stat statbuf;
+
+ /* note that we ignore any error here and below */
+ if (lstat(rm_path, &statbuf) < 0)
+ continue;
+
+ if (S_ISDIR(statbuf.st_mode))
+ {
+ RemovePgTempFilesInDir(rm_path, true);
+ rmdir(rm_path);
+ }
+ else
+ unlink(rm_path);
+ }
else
elog(LOG,
"unexpected file found in temporary-files directory: \"%s\"",
@@ -3152,6 +3412,23 @@ datadir_fsync_fname(const char *fname, bool isdir, int elevel)
fsync_fname_ext(fname, isdir, true, elevel);
}
+static void
+unlink_if_exists_fname(const char *fname, bool isdir, int elevel)
+{
+ if (isdir)
+ {
+ if (rmdir(fname) != 0 && errno != ENOENT)
+ ereport(elevel,
+ (errcode_for_file_access(),
+ errmsg("could not rmdir directory \"%s\": %m", fname)));
+ }
+ else
+ {
+ /* Use PathNameDeleteTemporaryFile to report filesize */
+ PathNameDeleteTemporaryFile(fname, false);
+ }
+}
+
/*
* fsync_fname_ext -- Try to fsync a file or directory
*