aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/commands/dbcommands.c9
-rw-r--r--src/backend/postmaster/bgwriter.c14
-rw-r--r--src/backend/storage/smgr/md.c152
-rw-r--r--src/include/storage/smgr.h4
4 files changed, 124 insertions, 55 deletions
diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c
index 18340481d23..38db4ae3b92 100644
--- a/src/backend/commands/dbcommands.c
+++ b/src/backend/commands/dbcommands.c
@@ -13,7 +13,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.189 2007/01/16 13:28:56 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/dbcommands.c,v 1.190 2007/01/17 16:25:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -40,6 +40,7 @@
#include "postmaster/bgwriter.h"
#include "storage/freespace.h"
#include "storage/procarray.h"
+#include "storage/smgr.h"
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/flatfiles.h"
@@ -644,6 +645,12 @@ dropdb(const char *dbname, bool missing_ok)
FreeSpaceMapForgetDatabase(db_id);
/*
+ * Tell bgwriter to forget any pending fsync requests for files in the
+ * database; else it'll fail at next checkpoint.
+ */
+ ForgetDatabaseFsyncRequests(db_id);
+
+ /*
* On Windows, force a checkpoint so that the bgwriter doesn't hold any
* open files, which would cause rmdir() to fail.
*/
diff --git a/src/backend/postmaster/bgwriter.c b/src/backend/postmaster/bgwriter.c
index 26650253361..1224f556e83 100644
--- a/src/backend/postmaster/bgwriter.c
+++ b/src/backend/postmaster/bgwriter.c
@@ -37,7 +37,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.35 2007/01/17 00:17:20 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.36 2007/01/17 16:25:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -103,7 +103,7 @@
typedef struct
{
RelFileNode rnode;
- BlockNumber segno; /* InvalidBlockNumber means "revoke" */
+ BlockNumber segno; /* see md.c for special values */
/* might add a real request-type field later; not needed yet */
} BgWriterRequest;
@@ -695,16 +695,16 @@ RequestCheckpoint(bool waitforit, bool warnontime)
* ForwardFsyncRequest
* Forward a file-fsync request from a backend to the bgwriter
*
- * segno specifies which segment (not block!) of the relation needs to be
- * fsync'd. If segno == InvalidBlockNumber, the meaning is to revoke any
- * pending fsync requests for the entire relation (this message is sent
- * when the relation is about to be deleted).
- *
* Whenever a backend is compelled to write directly to a relation
* (which should be seldom, if the bgwriter is getting its job done),
* the backend calls this routine to pass over knowledge that the relation
* is dirty and must be fsync'd before next checkpoint.
*
+ * segno specifies which segment (not block!) of the relation needs to be
+ * fsync'd. (Since the valid range is much less than BlockNumber, we can
+ * use high values for special flags; that's all internal to md.c, which
+ * see for details.)
+ *
* If we are unable to pass over the request (at present, this can happen
* if the shared memory queue is full), we return false. That forces
* the backend to do its own fsync. We hope that will be even more seldom.
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index e3cc00ca1a9..e8396b698d4 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.126 2007/01/17 00:17:21 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.127 2007/01/17 16:25:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -31,6 +31,10 @@
/* interval for calling AbsorbFsyncRequests in mdsync */
#define FSYNCS_PER_ABSORB 10
+/* special values for the segno arg to RememberFsyncRequest */
+#define FORGET_RELATION_FSYNC (InvalidBlockNumber)
+#define FORGET_DATABASE_FSYNC (InvalidBlockNumber-1)
+
/*
* On Windows, we have to interpret EACCES as possibly meaning the same as
* ENOENT, because if a file is unlinked-but-not-yet-gone on that platform,
@@ -258,30 +262,7 @@ mdunlink(RelFileNode rnode, bool isRedo)
* We have to clean out any pending fsync requests for the doomed relation,
* else the next mdsync() will fail.
*/
- if (pendingOpsTable)
- {
- /* standalone backend or startup process: fsync state is local */
- RememberFsyncRequest(rnode, InvalidBlockNumber);
- }
- else if (IsUnderPostmaster)
- {
- /*
- * Notify the bgwriter about it. If we fail to queue the revoke
- * message, we have to sleep and try again ... ugly, but hopefully
- * won't happen often.
- *
- * XXX should we CHECK_FOR_INTERRUPTS in this loop? Escaping with
- * an error would leave the no-longer-used file still present on
- * disk, which would be bad, so I'm inclined to assume that the
- * bgwriter will always empty the queue soon.
- */
- while (!ForwardFsyncRequest(rnode, InvalidBlockNumber))
- pg_usleep(10000L); /* 10 msec seems a good number */
- /*
- * Note we don't wait for the bgwriter to actually absorb the
- * revoke message; see mdsync() for the implications.
- */
- }
+ ForgetRelationFsyncRequests(rnode);
path = relpath(rnode);
@@ -894,7 +875,8 @@ mdsync(void)
* what we will do is retry the whole process after absorbing fsync
* request messages again. Since mdunlink() queues a "revoke" message
* before actually unlinking, the fsync request is guaranteed to be gone
- * the second time if it really was this case.
+ * the second time if it really was this case. DROP DATABASE likewise
+ * has to tell us to forget fsync requests before it starts deletions.
*/
do {
HASH_SEQ_STATUS hstat;
@@ -1043,17 +1025,58 @@ register_dirty_segment(SMgrRelation reln, MdfdVec *seg)
* We stuff the fsync request into the local hash table for execution
* during the bgwriter's next checkpoint.
*
- * segno == InvalidBlockNumber is a "revoke" request: remove any pending
- * fsync requests for the whole relation.
+ * The range of possible segment numbers is way less than the range of
+ * BlockNumber, so we can reserve high values of segno for special purposes.
+ * We define two: FORGET_RELATION_FSYNC means to drop pending fsyncs for
+ * a relation, and FORGET_DATABASE_FSYNC means to drop pending fsyncs for
+ * a whole database. (These are a tad slow because the hash table has to be
+ * searched linearly, but it doesn't seem worth rethinking the table structure
+ * for them.)
*/
void
RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
{
Assert(pendingOpsTable);
- if (segno != InvalidBlockNumber)
+ if (segno == FORGET_RELATION_FSYNC)
+ {
+ /* Remove any pending requests for the entire relation */
+ HASH_SEQ_STATUS hstat;
+ PendingOperationEntry *entry;
+
+ hash_seq_init(&hstat, pendingOpsTable);
+ while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
+ {
+ if (RelFileNodeEquals(entry->tag.rnode, rnode))
+ {
+ /* Okay, delete this entry */
+ if (hash_search(pendingOpsTable, &entry->tag,
+ HASH_REMOVE, NULL) == NULL)
+ elog(ERROR, "pendingOpsTable corrupted");
+ }
+ }
+ }
+ else if (segno == FORGET_DATABASE_FSYNC)
+ {
+ /* Remove any pending requests for the entire database */
+ HASH_SEQ_STATUS hstat;
+ PendingOperationEntry *entry;
+
+ hash_seq_init(&hstat, pendingOpsTable);
+ while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
+ {
+ if (entry->tag.rnode.dbNode == rnode.dbNode)
+ {
+ /* Okay, delete this entry */
+ if (hash_search(pendingOpsTable, &entry->tag,
+ HASH_REMOVE, NULL) == NULL)
+ elog(ERROR, "pendingOpsTable corrupted");
+ }
+ }
+ }
+ else
{
- /* Enter a request to fsync this segment */
+ /* Normal case: enter a request to fsync this segment */
PendingOperationTag key;
PendingOperationEntry *entry;
bool found;
@@ -1070,29 +1093,66 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
if (!found) /* new entry, so initialize it */
entry->failures = 0;
}
- else
+}
+
+/*
+ * ForgetRelationFsyncRequests -- ensure any fsyncs for a rel are forgotten
+ */
+void
+ForgetRelationFsyncRequests(RelFileNode rnode)
+{
+ if (pendingOpsTable)
+ {
+ /* standalone backend or startup process: fsync state is local */
+ RememberFsyncRequest(rnode, FORGET_RELATION_FSYNC);
+ }
+ else if (IsUnderPostmaster)
{
/*
- * Remove any pending requests for the entire relation. (This is a
- * tad slow but it doesn't seem worth rethinking the table structure.)
+ * Notify the bgwriter about it. If we fail to queue the revoke
+ * message, we have to sleep and try again ... ugly, but hopefully
+ * won't happen often.
+ *
+ * XXX should we CHECK_FOR_INTERRUPTS in this loop? Escaping with
+ * an error would leave the no-longer-used file still present on
+ * disk, which would be bad, so I'm inclined to assume that the
+ * bgwriter will always empty the queue soon.
*/
- HASH_SEQ_STATUS hstat;
- PendingOperationEntry *entry;
+ while (!ForwardFsyncRequest(rnode, FORGET_RELATION_FSYNC))
+ pg_usleep(10000L); /* 10 msec seems a good number */
+ /*
+ * Note we don't wait for the bgwriter to actually absorb the
+ * revoke message; see mdsync() for the implications.
+ */
+ }
+}
- hash_seq_init(&hstat, pendingOpsTable);
- while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
- {
- if (RelFileNodeEquals(entry->tag.rnode, rnode))
- {
- /* Okay, delete this entry */
- if (hash_search(pendingOpsTable, &entry->tag,
- HASH_REMOVE, NULL) == NULL)
- elog(ERROR, "pendingOpsTable corrupted");
- }
- }
+/*
+ * ForgetDatabaseFsyncRequests -- ensure any fsyncs for a DB are forgotten
+ */
+void
+ForgetDatabaseFsyncRequests(Oid dbid)
+{
+ RelFileNode rnode;
+
+ rnode.dbNode = dbid;
+ rnode.spcNode = 0;
+ rnode.relNode = 0;
+
+ if (pendingOpsTable)
+ {
+ /* standalone backend or startup process: fsync state is local */
+ RememberFsyncRequest(rnode, FORGET_DATABASE_FSYNC);
+ }
+ else if (IsUnderPostmaster)
+ {
+ /* see notes in ForgetRelationFsyncRequests */
+ while (!ForwardFsyncRequest(rnode, FORGET_DATABASE_FSYNC))
+ pg_usleep(10000L); /* 10 msec seems a good number */
}
}
+
/*
* _fdvec_alloc() -- Make a MdfdVec object.
*/
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index ffa135c0284..3beb14febaf 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.57 2007/01/05 22:19:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.58 2007/01/17 16:25:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -106,6 +106,8 @@ extern void mdimmedsync(SMgrRelation reln);
extern void mdsync(void);
extern void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno);
+extern void ForgetRelationFsyncRequests(RelFileNode rnode);
+extern void ForgetDatabaseFsyncRequests(Oid dbid);
/* smgrtype.c */
extern Datum smgrout(PG_FUNCTION_ARGS);