aboutsummaryrefslogtreecommitdiff
path: root/src/backend/storage/buffer/bufmgr.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2004-05-31 03:48:10 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2004-05-31 03:48:10 +0000
commit9b178555fc1f5087c120ff4d26380395bc655a03 (patch)
tree3578c76707795c2b25910ea42b36928eb6d4d742 /src/backend/storage/buffer/bufmgr.c
parentf024086db30f26905e4c877a6795c1ab95f4ab12 (diff)
downloadpostgresql-9b178555fc1f5087c120ff4d26380395bc655a03.tar.gz
postgresql-9b178555fc1f5087c120ff4d26380395bc655a03.zip
Per previous discussions, get rid of use of sync(2) in favor of
explicitly fsync'ing every (non-temp) file we have written since the last checkpoint. In the vast majority of cases, the burden of the fsyncs should fall on the bgwriter process not on backends. (To this end, we assume that an fsync issued by the bgwriter will force out blocks written to the same file by other processes using other file descriptors. Anyone have a problem with that?) This makes the world safe for WIN32, which ain't even got sync(2), and really makes the world safe for Unixen as well, because sync(2) never had the semantics we need: it offers no way to wait for the requested I/O to finish. Along the way, fix a bug I recently introduced in xlog recovery: file truncation replay failed to clear bufmgr buffers for the dropped blocks, which could result in 'PANIC: heap_delete_redo: no block' later on in xlog replay.
Diffstat (limited to 'src/backend/storage/buffer/bufmgr.c')
-rw-r--r--src/backend/storage/buffer/bufmgr.c19
1 files changed, 13 insertions, 6 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index f718e33cd59..2386bc89bf3 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.166 2004/05/29 22:48:19 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.167 2004/05/31 03:48:02 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1044,6 +1044,9 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
* bothering to write them out first. This is NOT rollback-able,
* and so should be used only with extreme caution!
*
+ * There is no particularly good reason why this doesn't have a
+ * firstDelBlock parameter, except that current callers don't need it.
+ *
* We assume that the caller holds an exclusive lock on the relation,
* which should assure that no new buffers will be acquired for the rel
* meanwhile.
@@ -1052,14 +1055,15 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
void
DropRelationBuffers(Relation rel)
{
- DropRelFileNodeBuffers(rel->rd_node, rel->rd_istemp);
+ DropRelFileNodeBuffers(rel->rd_node, rel->rd_istemp, 0);
}
/* ---------------------------------------------------------------------
* DropRelFileNodeBuffers
*
* This is the same as DropRelationBuffers, except that the target
- * relation is specified by RelFileNode and temp status.
+ * relation is specified by RelFileNode and temp status, and one
+ * may specify the first block to drop.
*
* This is NOT rollback-able. One legitimate use is to clear the
* buffer cache of buffers for a relation that is being deleted
@@ -1067,7 +1071,8 @@ DropRelationBuffers(Relation rel)
* --------------------------------------------------------------------
*/
void
-DropRelFileNodeBuffers(RelFileNode rnode, bool istemp)
+DropRelFileNodeBuffers(RelFileNode rnode, bool istemp,
+ BlockNumber firstDelBlock)
{
int i;
BufferDesc *bufHdr;
@@ -1077,7 +1082,8 @@ DropRelFileNodeBuffers(RelFileNode rnode, bool istemp)
for (i = 0; i < NLocBuffer; i++)
{
bufHdr = &LocalBufferDescriptors[i];
- if (RelFileNodeEquals(bufHdr->tag.rnode, rnode))
+ if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
+ bufHdr->tag.blockNum >= firstDelBlock)
{
bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->cntxDirty = false;
@@ -1094,7 +1100,8 @@ DropRelFileNodeBuffers(RelFileNode rnode, bool istemp)
{
bufHdr = &BufferDescriptors[i - 1];
recheck:
- if (RelFileNodeEquals(bufHdr->tag.rnode, rnode))
+ if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
+ bufHdr->tag.blockNum >= firstDelBlock)
{
/*
* If there is I/O in progress, better wait till it's done;