aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2004-04-21 18:06:30 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2004-04-21 18:06:30 +0000
commit95a03e9cdf7e0e33c2655dd20d2b64db191f3a21 (patch)
tree44819c567d27fd6ede1abbf42df4899a88de400c
parentaeee856564837a7d53da6c02f871c5b9f6631ebf (diff)
downloadpostgresql-95a03e9cdf7e0e33c2655dd20d2b64db191f3a21.tar.gz
postgresql-95a03e9cdf7e0e33c2655dd20d2b64db191f3a21.zip
Another round of code cleanup on bufmgr. Use BM_VALID flag to keep track
of whether we have successfully read data into a buffer; this makes the error behavior a bit more transparent (IMHO anyway), and also makes it work correctly for local buffers which don't use Start/TerminateBufferIO. Collapse three separate functions for writing a shared buffer into one. This overlaps a bit with cleanups that Neil proposed awhile back, but seems not to have committed yet.
-rw-r--r--src/backend/storage/buffer/buf_init.c6
-rw-r--r--src/backend/storage/buffer/bufmgr.c658
-rw-r--r--src/backend/storage/buffer/freelist.c20
-rw-r--r--src/backend/storage/buffer/localbuf.c34
-rw-r--r--src/include/storage/buf_internals.h30
-rw-r--r--src/include/storage/bufmgr.h4
6 files changed, 326 insertions, 426 deletions
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index e0aa0e93e86..8bbfb49752e 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/buffer/buf_init.c,v 1.63 2004/04/19 23:27:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/buffer/buf_init.c,v 1.64 2004/04/21 18:06:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -133,11 +133,11 @@ InitBufferPool(void)
buf->bufNext = i + 1;
- CLEAR_BUFFERTAG(&(buf->tag));
+ CLEAR_BUFFERTAG(buf->tag);
buf->buf_id = i;
buf->data = MAKE_OFFSET(block);
- buf->flags = (BM_DELETED | BM_VALID);
+ buf->flags = 0;
buf->refcount = 0;
buf->io_in_progress_lock = LWLockAssign();
buf->cntx_lock = LWLockAssign();
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index a80435b7ec2..b57ac072445 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -8,20 +8,14 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.161 2004/04/19 23:27:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.162 2004/04/21 18:06:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
- *
- * BufferAlloc() -- lookup a buffer in the buffer table. If
- * it isn't there add it, but do not read data into memory.
- * This is used when we are about to reinitialize the
- * buffer so don't care what the current disk contents are.
- * BufferAlloc() also pins the new buffer in memory.
- *
- * ReadBuffer() -- like BufferAlloc() but reads the data
- * on a buffer cache miss.
+ * ReadBuffer() -- find or create a buffer holding the requested page,
+ * and pin it so that no one can destroy it while this process
+ * is using it.
*
* ReleaseBuffer() -- unpin the buffer
*
@@ -31,7 +25,7 @@
*
* WriteBuffer() -- WriteNoReleaseBuffer() + ReleaseBuffer()
*
- * BufferSync() -- flush all dirty buffers in the buffer pool.
+ * BufferSync() -- flush all (or some) dirty buffers in the buffer pool.
*
* InitBufferPool() -- Init the buffer module.
*
@@ -76,25 +70,19 @@ long NDirectFileRead; /* some I/O's are direct file access.
* bypass bufmgr */
long NDirectFileWrite; /* e.g., I/O in psort and hashjoin. */
-/*
- * Macro : BUFFER_IS_BROKEN
- * Note that write error doesn't mean the buffer broken
-*/
-#define BUFFER_IS_BROKEN(buf) ((buf->flags & BM_IO_ERROR) && !(buf->flags & BM_DIRTY))
-
static void PinBuffer(BufferDesc *buf);
static void UnpinBuffer(BufferDesc *buf);
static void WaitIO(BufferDesc *buf);
static void StartBufferIO(BufferDesc *buf, bool forInput);
-static void TerminateBufferIO(BufferDesc *buf);
+static void TerminateBufferIO(BufferDesc *buf, int err_flag);
static void ContinueBufferIO(BufferDesc *buf, bool forInput);
static void buffer_write_error_callback(void *arg);
static Buffer ReadBufferInternal(Relation reln, BlockNumber blockNum,
bool bufferLockHeld);
static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
bool *foundPtr);
-static void BufferReplace(BufferDesc *bufHdr);
+static void FlushBuffer(BufferDesc *buf, SMgrRelation reln);
static void write_buffer(Buffer buffer, bool unpin);
@@ -107,8 +95,8 @@ static void write_buffer(Buffer buffer, bool unpin);
* relation at the same time!)
*
* Returns: the buffer number for the buffer containing
- * the block read, or NULL on an error. If successful,
- * the returned buffer has been pinned.
+ * the block read. The returned buffer has been pinned.
+ * Does not return on error --- elog's instead.
*
* Assume when this function is called, that reln has been
* opened already.
@@ -149,16 +137,11 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
pgstat_count_buffer_read(&reln->pgstat_info, reln);
/* Substitute proper block number if caller asked for P_NEW */
if (isExtend)
- {
blockNum = reln->rd_nblocks;
- reln->rd_nblocks++;
- }
+
bufHdr = LocalBufferAlloc(reln, blockNum, &found);
if (found)
- {
LocalBufferHitCount++;
- pgstat_count_buffer_hit(&reln->pgstat_info, reln);
- }
}
else
{
@@ -169,7 +152,6 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
{
/* must be sure we have accurate file length! */
blockNum = reln->rd_nblocks = smgrnblocks(reln->rd_smgr);
- reln->rd_nblocks++;
}
/*
@@ -180,48 +162,44 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
bufHdr = BufferAlloc(reln, blockNum, &found);
if (found)
- {
BufferHitCount++;
- pgstat_count_buffer_hit(&reln->pgstat_info, reln);
- }
}
/* At this point we do NOT hold the bufmgr lock. */
- if (!bufHdr)
- return InvalidBuffer;
-
- /* if it's already in the buffer pool, we're done */
+ /* if it was already in the buffer pool, we're done */
if (found)
{
- /* That is, we're done if we expected to be able to find it ... */
- if (!isExtend)
- return BufferDescriptorGetBuffer(bufHdr);
+ /* Just need to update stats before we exit */
+ pgstat_count_buffer_hit(&reln->pgstat_info, reln);
- /*
- * If we found a buffer when we were expecting to extend the
- * relation, the implication is that a buffer was already created
- * for the next page position, but then smgrextend failed to write
- * the page. We'd better try the smgrextend again. But since
- * BufferAlloc won't have done StartBufferIO, we must do that
- * first.
- */
- if (!isLocalBuf)
- {
- LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
- StartBufferIO(bufHdr, false);
- LWLockRelease(BufMgrLock);
- }
+ if (VacuumCostActive)
+ VacuumCostBalance += VacuumCostPageHit;
+
+ return BufferDescriptorGetBuffer(bufHdr);
}
/*
- * if we have gotten to this point, the relation must be open in the smgr.
+ * if we have gotten to this point, we have allocated a buffer for the
+ * page but its contents are not yet valid. IO_IN_PROGRESS is set for
+ * it, if it's a shared buffer.
+ *
+ * Note: if smgrextend fails, we will end up with a buffer that is
+ * allocated but not marked BM_VALID. P_NEW will still select the same
+ * block number (because the relation didn't get any longer on disk)
+ * and so future attempts to extend the relation will find the same
+ * buffer (if it's not been recycled) but come right back here to try
+ * smgrextend again.
*/
+ Assert(!(bufHdr->flags & BM_VALID));
+
if (isExtend)
{
/* new buffers are zero-filled */
MemSet((char *) MAKE_PTR(bufHdr->data), 0, BLCKSZ);
smgrextend(reln->rd_smgr, blockNum, (char *) MAKE_PTR(bufHdr->data));
+ /* After successful extend, increment relation length */
+ reln->rd_nblocks++;
}
else
{
@@ -253,29 +231,41 @@ ReadBufferInternal(Relation reln, BlockNumber blockNum,
if (isLocalBuf)
{
- /* No shared buffer state to update... */
- return BufferDescriptorGetBuffer(bufHdr);
+ /* Only need to adjust flags */
+ bufHdr->flags |= BM_VALID;
}
+ else
+ {
+ /* lock buffer manager again to update IO IN PROGRESS */
+ LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
- /* lock buffer manager again to update IO IN PROGRESS */
- LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
+ /* IO Succeeded, so mark data valid */
+ bufHdr->flags |= BM_VALID;
- /* IO Succeeded. clear the flags, finish buffer update */
- bufHdr->flags &= ~(BM_IO_ERROR | BM_IO_IN_PROGRESS);
+ /* If anyone was waiting for IO to complete, wake them up now */
+ TerminateBufferIO(bufHdr, 0);
- /* If anyone was waiting for IO to complete, wake them up now */
- TerminateBufferIO(bufHdr);
+ LWLockRelease(BufMgrLock);
+ }
- LWLockRelease(BufMgrLock);
+ if (VacuumCostActive)
+ VacuumCostBalance += VacuumCostPageMiss;
return BufferDescriptorGetBuffer(bufHdr);
}
/*
- * BufferAlloc -- Get a buffer from the buffer pool but don't
- * read it. If successful, the returned buffer is pinned.
+ * BufferAlloc -- subroutine for ReadBuffer. Handles lookup of a shared
+ * buffer. If no buffer exists already, selects a replacement
+ * victim and evicts the old page, but does NOT read in new page.
*
- * Returns: descriptor for buffer
+ * The returned buffer is pinned and is already marked as holding the
+ * desired page. If it already did have the desired page, *foundPtr is
+ * set TRUE. Otherwise, *foundPtr is set FALSE and the buffer is marked
+ * as IO_IN_PROGRESS; ReadBuffer will now need to do I/O to fill it.
+ *
+ * *foundPtr is actually redundant with the buffer's BM_VALID flag, but
+ * we keep it for simplicity in ReadBuffer.
*
* BufMgrLock must be held at entry. When this routine returns,
* the BufMgrLock is guaranteed NOT to be held.
@@ -285,67 +275,51 @@ BufferAlloc(Relation reln,
BlockNumber blockNum,
bool *foundPtr)
{
+ BufferTag newTag; /* identity of requested block */
BufferDesc *buf,
*buf2;
- BufferTag newTag; /* identity of requested block */
int cdb_found_index,
cdb_replace_index;
- bool inProgress; /* buffer undergoing IO */
+ bool inProgress; /* did we already do StartBufferIO? */
/* create a tag so we can lookup the buffer */
- INIT_BUFFERTAG(&newTag, reln, blockNum);
+ INIT_BUFFERTAG(newTag, reln, blockNum);
/* see if the block is in the buffer pool already */
buf = StrategyBufferLookup(&newTag, false, &cdb_found_index);
if (buf != NULL)
{
/*
- * Found it. Now, (a) pin the buffer so no one steals it from the
- * buffer pool, (b) check IO_IN_PROGRESS, someone may be faulting
- * the buffer into the buffer pool.
+ * Found it. Now, pin the buffer so no one can steal it from the
+ * buffer pool, and check to see if someone else is still reading
+ * data into the buffer. (Formerly, we'd always block here if
+ * IO_IN_PROGRESS is set, but there's no need to wait when someone
+ * is writing rather than reading.)
*/
+ *foundPtr = TRUE;
PinBuffer(buf);
- inProgress = (buf->flags & BM_IO_IN_PROGRESS);
- *foundPtr = TRUE;
- if (inProgress) /* confirm end of IO */
- {
- WaitIO(buf);
- inProgress = (buf->flags & BM_IO_IN_PROGRESS);
- }
- if (BUFFER_IS_BROKEN(buf))
+ if (!(buf->flags & BM_VALID))
{
- /*
- * I couldn't understand the following old comment. If there's
- * no IO for the buffer and the buffer is BROKEN, it should be
- * read again. So start a new buffer IO here.
- *
- * wierd race condition:
- *
- * We were waiting for someone else to read the buffer. While we
- * were waiting, the reader boof'd in some way, so the
- * contents of the buffer are still invalid. By saying that
- * we didn't find it, we can make the caller reinitialize the
- * buffer. If two processes are waiting for this block, both
- * will read the block. The second one to finish may
- * overwrite any updates made by the first. (Assume higher
- * level synchronization prevents this from happening).
- *
- * This is never going to happen, don't worry about it.
- */
- *foundPtr = FALSE;
- StartBufferIO(buf, true);
+ if (buf->flags & BM_IO_IN_PROGRESS)
+ {
+ /* someone else is reading it, wait for them */
+ WaitIO(buf);
+ }
+ if (!(buf->flags & BM_VALID))
+ {
+ /*
+ * If we get here, previous attempts to read the buffer
+ * must have failed ... but we shall bravely try again.
+ */
+ *foundPtr = FALSE;
+ StartBufferIO(buf, true);
+ }
}
LWLockRelease(BufMgrLock);
- /*
- * Do the cost accounting for vacuum
- */
- if (VacuumCostActive)
- VacuumCostBalance += VacuumCostPageHit;
-
return buf;
}
@@ -354,10 +328,10 @@ BufferAlloc(Relation reln,
/*
* Didn't find it in the buffer pool. We'll have to initialize a new
* buffer. First, grab one from the free list. If it's dirty, flush
- * it to disk. Remember to unlock BufMgrLock while doing the IOs.
+ * it to disk. Remember to unlock BufMgrLock while doing the IO.
*/
inProgress = FALSE;
- for (buf = NULL; buf == NULL;)
+ do
{
buf = StrategyGetBuffer(&cdb_replace_index);
@@ -374,130 +348,100 @@ BufferAlloc(Relation reln,
buf->refcount = 1;
PrivateRefCount[BufferDescriptorGetBuffer(buf) - 1] = 1;
- if (buf->flags & BM_DIRTY || buf->cntxDirty)
+ if ((buf->flags & BM_VALID) &&
+ (buf->flags & BM_DIRTY || buf->cntxDirty))
{
/*
- * skip write error buffers
+ * Set BM_IO_IN_PROGRESS to show the buffer is being written.
+ * It cannot already be set because the buffer would be pinned
+ * if someone were writing it.
+ *
+ * Note: it's okay to grab the io_in_progress lock while holding
+ * BufMgrLock. All code paths that acquire this lock pin the
+ * buffer first; since no one had it pinned (it just came off the
+ * free list), no one else can have the lock.
*/
- if ((buf->flags & BM_IO_ERROR) != 0)
- {
- UnpinBuffer(buf);
- buf = NULL;
- continue;
- }
+ StartBufferIO(buf, false);
- /*
- * Set BM_IO_IN_PROGRESS to keep anyone from doing anything
- * with the contents of the buffer while we write it out. We
- * don't really care if they try to read it, but if they can
- * complete a BufferAlloc on it they can then scribble into
- * it, and we'd really like to avoid that while we are
- * flushing the buffer. Setting this flag should block them
- * in WaitIO until we're done.
- */
inProgress = TRUE;
/*
- * All code paths that acquire this lock pin the buffer first;
- * since no one had it pinned (it just came off the free
- * list), no one else can have this lock.
- */
- StartBufferIO(buf, false);
-
- /*
* Write the buffer out, being careful to release BufMgrLock
- * before starting the I/O.
- */
- BufferReplace(buf);
-
- /*
- * BM_JUST_DIRTIED cleared by BufferReplace and shouldn't
- * be set by anyone. - vadim 01/17/97
- */
- if (buf->flags & BM_JUST_DIRTIED)
- {
- elog(PANIC, "content of block %u of %u/%u changed while flushing",
- buf->tag.blockNum,
- buf->tag.rnode.tblNode, buf->tag.rnode.relNode);
- }
-
- buf->flags &= ~BM_DIRTY;
- buf->cntxDirty = false;
-
- /*
- * Somebody could have pinned the buffer while we were doing
- * the I/O and had given up the BufMgrLock (though they would
- * be waiting for us to clear the BM_IO_IN_PROGRESS flag).
- * That's why this is a loop -- if so, we need to clear the
- * I/O flags, remove our pin and start all over again.
- *
- * People may be making buffers free at any time, so there's no
- * reason to think that we have an immediate disaster on our
- * hands.
+ * while doing the I/O.
*/
- if (buf && buf->refcount > 1)
- {
- inProgress = FALSE;
- buf->flags &= ~BM_IO_IN_PROGRESS;
- TerminateBufferIO(buf);
- UnpinBuffer(buf);
- buf = NULL;
- }
+ FlushBuffer(buf, NULL);
/*
* Somebody could have allocated another buffer for the same
- * block we are about to read in. (While we flush out the
+ * block we are about to read in. While we flush out the
* dirty buffer, we don't hold the lock and someone could have
* allocated another buffer for the same block. The problem is
- * we haven't gotten around to insert the new tag into the
- * buffer table. So we need to check here. -ay 3/95
+ * we haven't yet inserted the new tag into the buffer table.
+ * So we need to check here. -ay 3/95
+ *
+ * Another reason we have to do this is to update cdb_found_index,
+ * since the CDB could have disappeared from B1/B2 list while
+ * we were writing.
*/
buf2 = StrategyBufferLookup(&newTag, true, &cdb_found_index);
if (buf2 != NULL)
{
/*
- * Found it. Someone has already done what we're about to
+ * Found it. Someone has already done what we were about to
* do. We'll just handle this as if it were found in the
- * buffer pool in the first place.
+ * buffer pool in the first place. First, give up the
+ * buffer we were planning to use.
*/
- if (buf != NULL)
- {
- buf->flags &= ~BM_IO_IN_PROGRESS;
- TerminateBufferIO(buf);
- /* give up old buffer since we don't need it any more */
- UnpinBuffer(buf);
- }
+ TerminateBufferIO(buf, 0);
+ UnpinBuffer(buf);
- PinBuffer(buf2);
- inProgress = (buf2->flags & BM_IO_IN_PROGRESS);
+ buf = buf2;
+
+ /* remaining code should match code at top of routine */
*foundPtr = TRUE;
- if (inProgress)
- {
- WaitIO(buf2);
- inProgress = (buf2->flags & BM_IO_IN_PROGRESS);
- }
- if (BUFFER_IS_BROKEN(buf2))
+ PinBuffer(buf);
+
+ if (!(buf->flags & BM_VALID))
{
- *foundPtr = FALSE;
- StartBufferIO(buf2, true);
+ if (buf->flags & BM_IO_IN_PROGRESS)
+ {
+ /* someone else is reading it, wait for them */
+ WaitIO(buf);
+ }
+ if (!(buf->flags & BM_VALID))
+ {
+ /*
+ * If we get here, previous attempts to read the buffer
+ * must have failed ... but we shall bravely try again.
+ */
+ *foundPtr = FALSE;
+ StartBufferIO(buf, true);
+ }
}
LWLockRelease(BufMgrLock);
- /*
- * Do the cost accounting for vacuum. (XXX perhaps better
- * to consider this a miss? We didn't have to do the read,
- * but we did have to write ...)
- */
- if (VacuumCostActive)
- VacuumCostBalance += VacuumCostPageHit;
+ return buf;
+ }
- return buf2;
+ /*
+ * Somebody could have pinned the buffer while we were doing
+ * the I/O and had given up the BufMgrLock. If so, we can't
+ * recycle this buffer --- we need to clear the I/O flags,
+ * remove our pin and choose a new victim buffer. Similarly,
+ * we have to start over if somebody re-dirtied the buffer.
+ */
+ if (buf->refcount > 1 || buf->flags & BM_DIRTY || buf->cntxDirty)
+ {
+ TerminateBufferIO(buf, 0);
+ UnpinBuffer(buf);
+ inProgress = FALSE;
+ buf = NULL;
}
}
- }
+ } while (buf == NULL);
/*
* At this point we should have the sole pin on a non-dirty buffer and
@@ -506,16 +450,19 @@ BufferAlloc(Relation reln,
/*
* Tell the buffer replacement strategy that we are replacing the
- * buffer content. Then rename the buffer.
+ * buffer content. Then rename the buffer. Clearing BM_VALID here
+ * is necessary, clearing the dirtybits is just paranoia.
*/
StrategyReplaceBuffer(buf, &newTag, cdb_found_index, cdb_replace_index);
buf->tag = newTag;
+ buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
+ buf->cntxDirty = false;
/*
* Buffer contents are currently invalid. Have to mark IO IN PROGRESS
- * so no one fiddles with them until the read completes. If this
- * routine has been called simply to allocate a buffer, no io will be
- * attempted, so the flag isnt set.
+ * so no one fiddles with them until the read completes. We may have
+ * already marked it, in which case we just flip from write to read
+ * status.
*/
if (!inProgress)
StartBufferIO(buf, true);
@@ -524,12 +471,6 @@ BufferAlloc(Relation reln,
LWLockRelease(BufMgrLock);
- /*
- * Do the cost accounting for vacuum
- */
- if (VacuumCostActive)
- VacuumCostBalance += VacuumCostPageMiss;
-
return buf;
}
@@ -553,11 +494,13 @@ write_buffer(Buffer buffer, bool release)
bufHdr = &BufferDescriptors[buffer - 1];
+ Assert(PrivateRefCount[buffer - 1] > 0);
+
LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
Assert(bufHdr->refcount > 0);
/*
- * If the buffer is not dirty yet, do vacuum cost accounting.
+ * If the buffer was not dirty already, do vacuum cost accounting.
*/
if (!(bufHdr->flags & BM_DIRTY) && VacuumCostActive)
VacuumCostBalance += VacuumCostPageDirty;
@@ -714,7 +657,6 @@ BufferSync(int percent, int maxpages)
BufferTag *buftags;
int num_buffer_dirty;
int i;
- ErrorContextCallback errcontext;
/*
* Get a list of all currently dirty buffers and how many there are.
@@ -741,25 +683,14 @@ BufferSync(int percent, int maxpages)
if (maxpages > 0 && num_buffer_dirty > maxpages)
num_buffer_dirty = maxpages;
- /* Setup error traceback support for ereport() */
- errcontext.callback = buffer_write_error_callback;
- errcontext.arg = NULL;
- errcontext.previous = error_context_stack;
- error_context_stack = &errcontext;
-
/*
* Loop over buffers to be written. Note the BufMgrLock is held at
- * loop top, but is released and reacquired intraloop, so we aren't
- * holding it long.
+ * loop top, but is released and reacquired within FlushBuffer,
+ * so we aren't holding it long.
*/
for (i = 0; i < num_buffer_dirty; i++)
{
BufferDesc *bufHdr = dirty_buffers[i];
- Buffer buffer;
- XLogRecPtr recptr;
- SMgrRelation reln;
-
- errcontext.arg = bufHdr;
/*
* Check it is still the same page and still needs writing.
@@ -773,9 +704,9 @@ BufferSync(int percent, int maxpages)
*/
if (!(bufHdr->flags & BM_VALID))
continue;
- if (!BUFFERTAGS_EQUAL(&bufHdr->tag, &buftags[i]))
+ if (!BUFFERTAGS_EQUAL(bufHdr->tag, buftags[i]))
continue;
- if (!(bufHdr->flags & BM_DIRTY) && !(bufHdr->cntxDirty))
+ if (!(bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty))
continue;
/*
@@ -788,9 +719,9 @@ BufferSync(int percent, int maxpages)
/* Still need writing? */
if (!(bufHdr->flags & BM_VALID))
continue;
- if (!BUFFERTAGS_EQUAL(&bufHdr->tag, &buftags[i]))
+ if (!BUFFERTAGS_EQUAL(bufHdr->tag, buftags[i]))
continue;
- if (!(bufHdr->flags & BM_DIRTY) && !(bufHdr->cntxDirty))
+ if (!(bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty))
continue;
}
@@ -800,78 +731,16 @@ BufferSync(int percent, int maxpages)
* avoid conflicts with FlushRelationBuffers.
*/
PinBuffer(bufHdr);
- StartBufferIO(bufHdr, false); /* output IO start */
-
- /* Release BufMgrLock while doing xlog work */
- LWLockRelease(BufMgrLock);
-
- buffer = BufferDescriptorGetBuffer(bufHdr);
-
- /*
- * Protect buffer content against concurrent update
- */
- LockBuffer(buffer, BUFFER_LOCK_SHARE);
-
- /*
- * Force XLOG flush for buffer' LSN
- */
- recptr = BufferGetLSN(bufHdr);
- XLogFlush(recptr);
-
- /*
- * Now it's safe to write buffer to disk. Note that no one else
- * should have been able to write it while we were busy with
- * locking and log flushing because we set the IO flag.
- *
- * Before we issue the actual write command, clear the just-dirtied
- * flag. This lets us recognize concurrent changes (note that only
- * hint-bit changes are possible since we hold the buffer shlock).
- */
- LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
- Assert(bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty);
- bufHdr->flags &= ~BM_JUST_DIRTIED;
- LWLockRelease(BufMgrLock);
-
- /* Find smgr relation for buffer */
- reln = smgropen(bufHdr->tag.rnode);
-
- /* And write... */
- smgrwrite(reln,
- bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
-
- /*
- * Note that it's safe to change cntxDirty here because of we
- * protect it from upper writers by share lock and from other
- * bufmgr routines by BM_IO_IN_PROGRESS
- */
- bufHdr->cntxDirty = false;
+ StartBufferIO(bufHdr, false);
- /*
- * Release the per-buffer readlock, reacquire BufMgrLock.
- */
- LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+ FlushBuffer(bufHdr, NULL);
- LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
-
- bufHdr->flags &= ~BM_IO_IN_PROGRESS; /* mark IO finished */
- TerminateBufferIO(bufHdr); /* Sync IO finished */
- BufferFlushCount++;
-
- /*
- * If this buffer was marked by someone as DIRTY while we were
- * flushing it out we must not clear DIRTY flag - vadim 01/17/97
- */
- if (!(bufHdr->flags & BM_JUST_DIRTIED))
- bufHdr->flags &= ~BM_DIRTY;
+ TerminateBufferIO(bufHdr, 0);
UnpinBuffer(bufHdr);
}
LWLockRelease(BufMgrLock);
- /* Pop the error context stack */
- error_context_stack = errcontext.previous;
-
pfree(dirty_buffers);
pfree(buftags);
@@ -1123,51 +992,96 @@ BufferGetFileNode(Buffer buffer)
}
/*
- * BufferReplace
+ * FlushBuffer
+ * Physically write out a shared buffer.
*
- * Write out the buffer corresponding to 'bufHdr'.
+ * NOTE: this actually just passes the buffer contents to the kernel; the
+ * real write to disk won't happen until the kernel feels like it. This
+ * is okay from our point of view since we can redo the changes from WAL.
+ * However, we will need to force the changes to disk via sync/fsync
+ * before we can checkpoint WAL.
*
- * BufMgrLock must be held at entry, and the buffer must be pinned.
+ * BufMgrLock must be held at entry, and the buffer must be pinned. The
+ * caller is also responsible for doing StartBufferIO/TerminateBufferIO.
+ *
+ * If the caller has an smgr reference for the buffer's relation, pass it
+ * as the second parameter. If not, pass NULL. (Do not open relation
+ * while holding BufMgrLock!)
*/
static void
-BufferReplace(BufferDesc *bufHdr)
+FlushBuffer(BufferDesc *buf, SMgrRelation reln)
{
- SMgrRelation reln;
+ Buffer buffer;
XLogRecPtr recptr;
ErrorContextCallback errcontext;
+ /* Transpose cntxDirty into flags while holding BufMgrLock */
+ buf->cntxDirty = false;
+ buf->flags |= BM_DIRTY;
+
/* To check if block content changed while flushing. - vadim 01/17/97 */
- bufHdr->flags &= ~BM_JUST_DIRTIED;
+ buf->flags &= ~BM_JUST_DIRTIED;
+ /* Release BufMgrLock while doing xlog work */
LWLockRelease(BufMgrLock);
/* Setup error traceback support for ereport() */
errcontext.callback = buffer_write_error_callback;
- errcontext.arg = bufHdr;
+ errcontext.arg = buf;
errcontext.previous = error_context_stack;
error_context_stack = &errcontext;
+ /* Find smgr relation for buffer while holding minimal locks */
+ if (reln == NULL)
+ reln = smgropen(buf->tag.rnode);
+
+ buffer = BufferDescriptorGetBuffer(buf);
+
/*
- * No need to lock buffer context - no one should be able to end
- * ReadBuffer
+ * Protect buffer content against concurrent update. (Note that
+ * hint-bit updates can still occur while the write is in progress,
+ * but we assume that that will not invalidate the data written.)
*/
- recptr = BufferGetLSN(bufHdr);
- XLogFlush(recptr);
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
- /* Find smgr relation for buffer */
- reln = smgropen(bufHdr->tag.rnode);
+ /*
+ * Force XLOG flush for buffer' LSN. This implements the basic WAL
+ * rule that log updates must hit disk before any of the data-file
+ * changes they describe do.
+ */
+ recptr = BufferGetLSN(buf);
+ XLogFlush(recptr);
- /* And write... */
+ /*
+ * Now it's safe to write buffer to disk. Note that no one else
+ * should have been able to write it while we were busy with
+ * locking and log flushing because caller has set the IO flag.
+ *
+ * It would be better to clear BM_JUST_DIRTIED right here, but we'd
+ * have to reacquire the BufMgrLock and it doesn't seem worth it.
+ */
smgrwrite(reln,
- bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
+ buf->tag.blockNum,
+ (char *) MAKE_PTR(buf->data));
/* Pop the error context stack */
error_context_stack = errcontext.previous;
+ /*
+ * Release the per-buffer readlock, reacquire BufMgrLock.
+ */
+ LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+
LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
BufferFlushCount++;
+
+ /*
+ * If this buffer was marked by someone as DIRTY while we were
+ * flushing it out we must not clear DIRTY flag - vadim 01/17/97
+ */
+ if (!(buf->flags & BM_JUST_DIRTIED))
+ buf->flags &= ~BM_DIRTY;
}
/*
@@ -1490,6 +1404,7 @@ blockNum=%u, flags=0x%x, refcount=%d %ld)",
* In all cases, the caller should be holding AccessExclusiveLock on
* the target relation to ensure that no other backend is busy reading
* more blocks of the relation (or might do so before we commit).
+ * This should also ensure that no one is busy dirtying these blocks.
*
* Formerly, we considered it an error condition if we found dirty
* buffers here. However, since BufferSync no longer forces out all
@@ -1497,7 +1412,7 @@ blockNum=%u, flags=0x%x, refcount=%d %ld)",
* to still be present in the cache due to failure of an earlier
* transaction. So, must flush dirty buffers without complaint.
*
- * Returns: 0 - Ok, -1 - FAILED TO WRITE DIRTY BUFFER, -2 - PINNED
+ * Returns: 0 - Ok, -1 - FAILED TO CLEAR DIRTY BIT, -2 - PINNED
*
* XXX currently it sequentially searches the buffer pool, should be
* changed to more clever ways of searching.
@@ -1508,38 +1423,41 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
{
int i;
BufferDesc *bufHdr;
- XLogRecPtr recptr;
- ErrorContextCallback errcontext;
-
- /* Setup error traceback support for ereport() */
- errcontext.callback = buffer_write_error_callback;
- errcontext.arg = NULL;
- errcontext.previous = error_context_stack;
- error_context_stack = &errcontext;
if (rel->rd_istemp)
{
for (i = 0; i < NLocBuffer; i++)
{
bufHdr = &LocalBufferDescriptors[i];
- errcontext.arg = bufHdr;
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
{
- if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
+ if ((bufHdr->flags & BM_VALID) &&
+ (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty))
{
- /* Open it at the smgr level if not already done */
+ ErrorContextCallback errcontext;
+
+ /* Setup error traceback support for ereport() */
+ errcontext.callback = buffer_write_error_callback;
+ errcontext.arg = bufHdr;
+ errcontext.previous = error_context_stack;
+ error_context_stack = &errcontext;
+
+ /* Open rel at the smgr level if not already done */
if (rel->rd_smgr == NULL)
rel->rd_smgr = smgropen(rel->rd_node);
smgrwrite(rel->rd_smgr,
bufHdr->tag.blockNum,
(char *) MAKE_PTR(bufHdr->data));
+
bufHdr->flags &= ~(BM_DIRTY | BM_JUST_DIRTIED);
bufHdr->cntxDirty = false;
+
+ /* Pop the error context stack */
+ error_context_stack = errcontext.previous;
}
if (LocalRefCount[i] > 0)
{
- error_context_stack = errcontext.previous;
elog(WARNING, "FlushRelationBuffers(\"%s\" (local), %u): block %u is referenced (%ld)",
RelationGetRelationName(rel), firstDelBlock,
bufHdr->tag.blockNum, LocalRefCount[i]);
@@ -1550,9 +1468,6 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
}
}
- /* Pop the error context stack */
- error_context_stack = errcontext.previous;
-
return 0;
}
@@ -1561,67 +1476,37 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
for (i = 0; i < NBuffers; i++)
{
bufHdr = &BufferDescriptors[i];
- errcontext.arg = bufHdr;
if (RelFileNodeEquals(bufHdr->tag.rnode, rel->rd_node))
{
- if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
+ if ((bufHdr->flags & BM_VALID) &&
+ (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty))
{
PinBuffer(bufHdr);
+ /* Someone else might be flushing buffer */
if (bufHdr->flags & BM_IO_IN_PROGRESS)
WaitIO(bufHdr);
- LWLockRelease(BufMgrLock);
-
- /*
- * Force XLOG flush for buffer' LSN
- */
- recptr = BufferGetLSN(bufHdr);
- XLogFlush(recptr);
-
- /*
- * Now it's safe to write buffer to disk
- */
-
- LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
- if (bufHdr->flags & BM_IO_IN_PROGRESS)
- WaitIO(bufHdr);
-
+ /* Still dirty? */
if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
{
- bufHdr->flags &= ~BM_JUST_DIRTIED;
- StartBufferIO(bufHdr, false); /* output IO start */
-
- LWLockRelease(BufMgrLock);
-
- /* Open it at the smgr level if not already done */
- if (rel->rd_smgr == NULL)
- rel->rd_smgr = smgropen(rel->rd_node);
-
- smgrwrite(rel->rd_smgr,
- bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
-
- BufferFlushCount++;
+ StartBufferIO(bufHdr, false);
- LWLockAcquire(BufMgrLock, LW_EXCLUSIVE);
- bufHdr->flags &= ~BM_IO_IN_PROGRESS;
- TerminateBufferIO(bufHdr);
- Assert(!(bufHdr->flags & BM_JUST_DIRTIED));
- bufHdr->flags &= ~BM_DIRTY;
+ FlushBuffer(bufHdr, rel->rd_smgr);
- /*
- * Note that it's safe to change cntxDirty here
- * because of we protect it from upper writers by
- * AccessExclusiveLock and from other bufmgr routines
- * by BM_IO_IN_PROGRESS
- */
- bufHdr->cntxDirty = false;
+ TerminateBufferIO(bufHdr, 0);
}
UnpinBuffer(bufHdr);
+ if (bufHdr->flags & BM_DIRTY || bufHdr->cntxDirty)
+ {
+ LWLockRelease(BufMgrLock);
+ elog(WARNING, "FlushRelationBuffers(\"%s\", %u): block %u was re-dirtied",
+ RelationGetRelationName(rel), firstDelBlock,
+ bufHdr->tag.blockNum);
+ return -1;
+ }
}
if (bufHdr->refcount != 0)
{
LWLockRelease(BufMgrLock);
- error_context_stack = errcontext.previous;
elog(WARNING, "FlushRelationBuffers(\"%s\", %u): block %u is referenced (private %ld, global %d)",
RelationGetRelationName(rel), firstDelBlock,
bufHdr->tag.blockNum,
@@ -1635,9 +1520,6 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
LWLockRelease(BufMgrLock);
- /* Pop the error context stack */
- error_context_stack = errcontext.previous;
-
return 0;
}
@@ -1645,7 +1527,7 @@ FlushRelationBuffers(Relation rel, BlockNumber firstDelBlock)
* ReleaseBuffer -- remove the pin on a buffer without
* marking it dirty.
*/
-int
+void
ReleaseBuffer(Buffer buffer)
{
BufferDesc *bufHdr;
@@ -1654,15 +1536,16 @@ ReleaseBuffer(Buffer buffer)
{
Assert(LocalRefCount[-buffer - 1] > 0);
LocalRefCount[-buffer - 1]--;
- return STATUS_OK;
+ return;
}
if (BAD_BUFFER_ID(buffer))
- return STATUS_ERROR;
+ elog(ERROR, "bad buffer id: %d", buffer);
bufHdr = &BufferDescriptors[buffer - 1];
Assert(PrivateRefCount[buffer - 1] > 0);
+
if (PrivateRefCount[buffer - 1] > 1)
PrivateRefCount[buffer - 1]--;
else
@@ -1671,8 +1554,6 @@ ReleaseBuffer(Buffer buffer)
UnpinBuffer(bufHdr);
LWLockRelease(BufMgrLock);
}
-
- return STATUS_OK;
}
#ifdef NOT_USED
@@ -2021,16 +1902,24 @@ StartBufferIO(BufferDesc *buf, bool forInput)
* (Assumptions)
* My process is executing IO for the buffer
* BufMgrLock is held
+ * BM_IO_IN_PROGRESS mask is set for the buffer
* The buffer is Pinned
*
+ * err_flag must be 0 for successful completion and BM_IO_ERROR for failure.
+ *
* Because BufMgrLock is held, we are already in an interrupt holdoff here,
* and do not need another.
*/
static void
-TerminateBufferIO(BufferDesc *buf)
+TerminateBufferIO(BufferDesc *buf, int err_flag)
{
Assert(buf == InProgressBuf);
+ Assert(buf->flags & BM_IO_IN_PROGRESS);
+ buf->flags &= ~(BM_IO_IN_PROGRESS | BM_IO_ERROR);
+ buf->flags |= err_flag;
+
LWLockRelease(buf->io_in_progress_lock);
+
InProgressBuf = NULL;
}
@@ -2062,7 +1951,8 @@ InitBufferIO(void)
/*
* Clean up any active buffer I/O after an error.
- * BufMgrLock isn't held when this function is called.
+ * BufMgrLock isn't held when this function is called,
+ * but we haven't yet released buffer pins, so the buffer is still pinned.
*
* If I/O was in progress, we always set BM_IO_ERROR.
*/
@@ -2086,9 +1976,9 @@ AbortBufferIO(void)
Assert(buf->flags & BM_IO_IN_PROGRESS);
if (IsForInput)
{
- Assert(!(buf->flags & BM_DIRTY) && !(buf->cntxDirty));
- /* Don't think that buffer is valid */
- StrategyInvalidateBuffer(buf);
+ Assert(!(buf->flags & BM_DIRTY || buf->cntxDirty));
+ /* We'd better not think buffer is valid yet */
+ Assert(!(buf->flags & BM_VALID));
}
else
{
@@ -2106,9 +1996,7 @@ AbortBufferIO(void)
}
buf->flags |= BM_DIRTY;
}
- buf->flags |= BM_IO_ERROR;
- buf->flags &= ~BM_IO_IN_PROGRESS;
- TerminateBufferIO(buf);
+ TerminateBufferIO(buf, BM_IO_ERROR);
LWLockRelease(BufMgrLock);
}
}
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index c14d4464974..b59bfb9543d 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -12,7 +12,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.42 2004/04/19 23:27:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.43 2004/04/21 18:06:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -501,7 +501,7 @@ StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag,
/* Assert that the buffer remembered in cdb_found is the one */
/* the buffer manager is currently faulting in */
- Assert(BUFFERTAGS_EQUAL(&(cdb_found->buf_tag), newTag));
+ Assert(BUFFERTAGS_EQUAL(cdb_found->buf_tag, *newTag));
if (cdb_replace_index >= 0)
{
@@ -513,7 +513,7 @@ StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag,
Assert(cdb_replace->list == STRAT_LIST_T1 ||
cdb_replace->list == STRAT_LIST_T2);
Assert(cdb_replace->buf_id == buf->buf_id);
- Assert(BUFFERTAGS_EQUAL(&(cdb_replace->buf_tag), &(buf->tag)));
+ Assert(BUFFERTAGS_EQUAL(cdb_replace->buf_tag, buf->tag));
/*
* Under normal circumstances we move the evicted T list entry to
@@ -606,7 +606,7 @@ StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag,
Assert(cdb_replace->list == STRAT_LIST_T1 ||
cdb_replace->list == STRAT_LIST_T2);
Assert(cdb_replace->buf_id == buf->buf_id);
- Assert(BUFFERTAGS_EQUAL(&(cdb_replace->buf_tag), &(buf->tag)));
+ Assert(BUFFERTAGS_EQUAL(cdb_replace->buf_tag, buf->tag));
if (cdb_replace->list == STRAT_LIST_T1)
{
@@ -673,7 +673,7 @@ StrategyInvalidateBuffer(BufferDesc *buf)
BufferStrategyCDB *cdb;
/* The buffer cannot be dirty or pinned */
- Assert(!(buf->flags & BM_DIRTY));
+ Assert(!(buf->flags & BM_DIRTY) || !(buf->flags & BM_VALID));
Assert(buf->refcount == 0);
/*
@@ -695,16 +695,18 @@ StrategyInvalidateBuffer(BufferDesc *buf)
* Clear out the CDB's buffer tag and association with the buffer
* and add it to the list of unused CDB's
*/
- CLEAR_BUFFERTAG(&(cdb->buf_tag));
+ CLEAR_BUFFERTAG(cdb->buf_tag);
cdb->buf_id = -1;
cdb->next = StrategyControl->listUnusedCDB;
StrategyControl->listUnusedCDB = cdb_id;
/*
* Clear out the buffer's tag and add it to the list of
- * currently unused buffers.
+ * currently unused buffers. We must do this to ensure that linear
+ * scans of the buffer array don't think the buffer is valid.
*/
- CLEAR_BUFFERTAG(&(buf->tag));
+ CLEAR_BUFFERTAG(buf->tag);
+ buf->flags &= ~(BM_VALID | BM_DIRTY);
buf->bufNext = StrategyControl->listFreeBuffers;
StrategyControl->listFreeBuffers = buf->buf_id;
}
@@ -864,7 +866,7 @@ StrategyInitialize(bool init)
{
StrategyCDB[i].next = i + 1;
StrategyCDB[i].list = STRAT_LIST_UNUSED;
- CLEAR_BUFFERTAG(&(StrategyCDB[i].buf_tag));
+ CLEAR_BUFFERTAG(StrategyCDB[i].buf_tag);
StrategyCDB[i].buf_id = -1;
}
StrategyCDB[NBuffers * 2 - 1].next = -1;
diff --git a/src/backend/storage/buffer/localbuf.c b/src/backend/storage/buffer/localbuf.c
index bcbedc9c651..17f86ce44e4 100644
--- a/src/backend/storage/buffer/localbuf.c
+++ b/src/backend/storage/buffer/localbuf.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.52 2004/02/10 01:55:25 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.53 2004/04/21 18:06:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -36,19 +36,25 @@ static int nextFreeLocalBuf = 0;
/*
* LocalBufferAlloc -
* allocate a local buffer. We do round robin allocation for now.
+ *
+ * API is similar to bufmgr.c's BufferAlloc, except that we do not need
+ * to have the BufMgrLock since this is all local. Also, IO_IN_PROGRESS
+ * does not get set.
*/
BufferDesc *
LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
{
+ BufferTag newTag; /* identity of requested block */
int i;
- BufferDesc *bufHdr = NULL;
+ BufferDesc *bufHdr;
+
+ INIT_BUFFERTAG(newTag, reln, blockNum);
/* a low tech search for now -- not optimized for scans */
for (i = 0; i < NLocBuffer; i++)
{
- if (LocalBufferDescriptors[i].tag.rnode.relNode ==
- reln->rd_node.relNode &&
- LocalBufferDescriptors[i].tag.blockNum == blockNum)
+ bufHdr = &LocalBufferDescriptors[i];
+ if (BUFFERTAGS_EQUAL(bufHdr->tag, newTag))
{
#ifdef LBDEBUG
fprintf(stderr, "LB ALLOC (%u,%d) %d\n",
@@ -56,8 +62,14 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
#endif
LocalRefCount[i]++;
- *foundPtr = TRUE;
- return &LocalBufferDescriptors[i];
+ if (bufHdr->flags & BM_VALID)
+ *foundPtr = TRUE;
+ else
+ {
+ /* Previous read attempt must have failed; try again */
+ *foundPtr = FALSE;
+ }
+ return bufHdr;
}
}
@@ -67,6 +79,7 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
#endif
/* need to get a new buffer (round robin for now) */
+ bufHdr = NULL;
for (i = 0; i < NLocBuffer; i++)
{
int b = (nextFreeLocalBuf + i) % NLocBuffer;
@@ -108,7 +121,7 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
*
* Note this path cannot be taken for a buffer that was previously in
* use, so it's okay to do it (and possibly error out) before marking
- * the buffer as valid.
+ * the buffer as not dirty.
*/
if (bufHdr->data == (SHMEM_OFFSET) 0)
{
@@ -135,9 +148,8 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
/*
* it's all ours now.
*/
- bufHdr->tag.rnode = reln->rd_node;
- bufHdr->tag.blockNum = blockNum;
- bufHdr->flags &= ~BM_DIRTY;
+ bufHdr->tag = newTag;
+ bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
bufHdr->cntxDirty = false;
*foundPtr = FALSE;
diff --git a/src/include/storage/buf_internals.h b/src/include/storage/buf_internals.h
index fc83396ff0d..6db4afd58bd 100644
--- a/src/include/storage/buf_internals.h
+++ b/src/include/storage/buf_internals.h
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.69 2004/04/19 23:27:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.70 2004/04/21 18:06:29 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -24,13 +24,12 @@
/*
* Flags for buffer descriptors
*/
-#define BM_DIRTY (1 << 0)
-#define BM_VALID (1 << 1)
-#define BM_DELETED (1 << 2)
-#define BM_IO_IN_PROGRESS (1 << 3)
-#define BM_IO_ERROR (1 << 4)
-#define BM_JUST_DIRTIED (1 << 5)
-#define BM_PIN_COUNT_WAITER (1 << 6)
+#define BM_DIRTY (1 << 0) /* data needs writing */
+#define BM_VALID (1 << 1) /* data is valid */
+#define BM_IO_IN_PROGRESS (1 << 2) /* read or write in progress */
+#define BM_IO_ERROR (1 << 3) /* previous I/O failed */
+#define BM_JUST_DIRTIED (1 << 4) /* dirtied since write started */
+#define BM_PIN_COUNT_WAITER (1 << 5) /* have waiter for sole pin */
typedef bits16 BufFlags;
@@ -54,22 +53,21 @@ typedef struct buftag
#define CLEAR_BUFFERTAG(a) \
( \
- (a)->rnode.tblNode = InvalidOid, \
- (a)->rnode.relNode = InvalidOid, \
- (a)->blockNum = InvalidBlockNumber \
+ (a).rnode.tblNode = InvalidOid, \
+ (a).rnode.relNode = InvalidOid, \
+ (a).blockNum = InvalidBlockNumber \
)
#define INIT_BUFFERTAG(a,xx_reln,xx_blockNum) \
( \
- (a)->blockNum = (xx_blockNum), \
- (a)->rnode = (xx_reln)->rd_node \
+ (a).rnode = (xx_reln)->rd_node, \
+ (a).blockNum = (xx_blockNum) \
)
#define BUFFERTAGS_EQUAL(a,b) \
( \
- (a)->rnode.tblNode == (b)->rnode.tblNode && \
- (a)->rnode.relNode == (b)->rnode.relNode && \
- (a)->blockNum == (b)->blockNum \
+ RelFileNodeEquals((a).rnode, (b).rnode) && \
+ (a).blockNum == (b).blockNum \
)
/*
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index e671e4dac23..bfc76175834 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.75 2004/02/04 01:24:53 wieck Exp $
+ * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.76 2004/04/21 18:06:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -142,7 +142,7 @@ extern long *LocalRefCount;
* prototypes for functions in bufmgr.c
*/
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
-extern int ReleaseBuffer(Buffer buffer);
+extern void ReleaseBuffer(Buffer buffer);
extern void WriteBuffer(Buffer buffer);
extern void WriteNoReleaseBuffer(Buffer buffer);
extern Buffer ReleaseAndReadBuffer(Buffer buffer, Relation relation,