diff options
author | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2008-10-31 15:05:00 +0000 |
---|---|---|
committer | Heikki Linnakangas <heikki.linnakangas@iki.fi> | 2008-10-31 15:05:00 +0000 |
commit | 19c8dc839b64a43958f08108b85ce8ca98d06a8b (patch) | |
tree | be6bff739a33bd11e4915f47f30038840d397cd5 /src/backend/storage | |
parent | 29077051deae30b7704a3e3f2cf7d0a7e3a4130a (diff) | |
download | postgresql-19c8dc839b64a43958f08108b85ce8ca98d06a8b.tar.gz postgresql-19c8dc839b64a43958f08108b85ce8ca98d06a8b.zip |
Unite ReadBufferWithFork, ReadBufferWithStrategy, and ZeroOrReadBuffer
functions into one ReadBufferExtended function, that takes the strategy
and mode as argument. There's three modes, RBM_NORMAL which is the default
used by plain ReadBuffer(), RBM_ZERO, which replaces ZeroOrReadBuffer, and
a new mode RBM_ZERO_ON_ERROR, which allows callers to read corrupt pages
without throwing an error. The FSM needs the new mode to recover from
corrupt pages, which could happend if we crash after extending an FSM file,
and the new page is "torn".
Add fork number to some error messages in bufmgr.c, that still lacked it.
Diffstat (limited to 'src/backend/storage')
-rw-r--r-- | src/backend/storage/buffer/bufmgr.c | 169 | ||||
-rw-r--r-- | src/backend/storage/freespace/freespace.c | 35 |
2 files changed, 97 insertions, 107 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 86281c11288..67f46857238 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.239 2008/10/20 21:11:15 alvherre Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.240 2008/10/31 15:05:00 heikki Exp $ * *------------------------------------------------------------------------- */ @@ -72,11 +72,10 @@ static bool IsForInput; static volatile BufferDesc *PinCountWaitBuf = NULL; -static Buffer ReadBuffer_relcache(Relation reln, ForkNumber forkNum, - BlockNumber blockNum, bool zeroPage, BufferAccessStrategy strategy); static Buffer ReadBuffer_common(SMgrRelation reln, bool isLocalBuf, - ForkNumber forkNum, BlockNumber blockNum, - bool zeroPage, BufferAccessStrategy strategy, bool *hit); + ForkNumber forkNum, BlockNumber blockNum, + ReadBufferMode mode , BufferAccessStrategy strategy, + bool *hit); static bool PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy); static void PinBuffer_Locked(volatile BufferDesc *buf); static void UnpinBuffer(volatile BufferDesc *buf, bool fixOwner); @@ -96,7 +95,17 @@ static void AtProcExit_Buffers(int code, Datum arg); /* - * ReadBuffer -- returns a buffer containing the requested + * ReadBuffer -- a shorthand for ReadBufferExtended, for reading from main + * fork with RBM_NORMAL mode and default strategy. + */ +Buffer +ReadBuffer(Relation reln, BlockNumber blockNum) +{ + return ReadBufferExtended(reln, MAIN_FORKNUM, blockNum, RBM_NORMAL, NULL); +} + +/* + * ReadBufferExtended -- returns a buffer containing the requested * block of the requested relation. If the blknum * requested is P_NEW, extend the relation file and * allocate a new block. (Caller is responsible for @@ -107,75 +116,29 @@ static void AtProcExit_Buffers(int code, Datum arg); * the block read. The returned buffer has been pinned. * Does not return on error --- elog's instead. * - * Assume when this function is called, that reln has been - * opened already. - */ -Buffer -ReadBuffer(Relation reln, BlockNumber blockNum) -{ - return ReadBuffer_relcache(reln, MAIN_FORKNUM, blockNum, false, NULL); -} - -/* - * ReadBufferWithFork -- same as ReadBuffer, but for accessing relation - * forks other than MAIN_FORKNUM. - */ -Buffer -ReadBufferWithFork(Relation reln, ForkNumber forkNum, BlockNumber blockNum) -{ - return ReadBuffer_relcache(reln, forkNum, blockNum, false, NULL); -} - -/* - * ReadBufferWithStrategy -- same as ReadBuffer, except caller can specify - * a nondefault buffer access strategy. See buffer/README for details. - */ -Buffer -ReadBufferWithStrategy(Relation reln, BlockNumber blockNum, - BufferAccessStrategy strategy) -{ - return ReadBuffer_relcache(reln, MAIN_FORKNUM, blockNum, false, strategy); -} - -/* - * ReadOrZeroBuffer -- like ReadBuffer, but if the page isn't in buffer - * cache already, it's filled with zeros instead of reading it from - * disk. Useful when the caller intends to fill the page from scratch, - * since this saves I/O and avoids unnecessary failure if the - * page-on-disk has corrupt page headers. - * - * Caution: do not use this to read a page that is beyond the relation's - * current physical EOF; that is likely to cause problems in md.c when - * the page is modified and written out. P_NEW is OK, though. - */ -Buffer -ReadOrZeroBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum) -{ - return ReadBuffer_relcache(reln, forkNum, blockNum, true, NULL); -} - -/* - * ReadBufferWithoutRelcache -- like ReadBuffer, but doesn't require a - * relcache entry for the relation. If zeroPage is true, this behaves - * like ReadOrZeroBuffer rather than ReadBuffer. + * Assume when this function is called, that reln has been opened already. + * + * In RBM_NORMAL mode, the page is read from disk, and the page header is + * validated. An error is thrown if the page header is not valid. + * + * RBM_ZERO_ON_ERROR is like the normal mode, but if the page header is not + * valid, the page is zeroed instead of throwing an error. This is intended + * for non-critical data, where the caller is prepared to repair errors. + * + * In RBM_ZERO mode, if the page isn't in buffer cache already, it's filled + * with zeros instead of reading it from disk. Useful when the caller is + * going to fill the page from scratch, since this saves I/O and avoids + * unnecessary failure if the page-on-disk has corrupt page headers. + * Caution: do not use this mode to read a page that is beyond the relation's + * current physical EOF; that is likely to cause problems in md.c when + * the page is modified and written out. P_NEW is OK, though. + * + * If strategy is not NULL, a nondefault buffer access strategy is used. + * See buffer/README for details. */ Buffer -ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp, - ForkNumber forkNum, BlockNumber blockNum, bool zeroPage) -{ - bool hit; - - SMgrRelation smgr = smgropen(rnode); - return ReadBuffer_common(smgr, isTemp, forkNum, blockNum, zeroPage, NULL, &hit); -} - -/* - * ReadBuffer_relcache -- common logic for ReadBuffer-variants that - * operate on a Relation. - */ -static Buffer -ReadBuffer_relcache(Relation reln, ForkNumber forkNum, BlockNumber blockNum, - bool zeroPage, BufferAccessStrategy strategy) +ReadBufferExtended(Relation reln, ForkNumber forkNum, BlockNumber blockNum, + ReadBufferMode mode, BufferAccessStrategy strategy) { bool hit; Buffer buf; @@ -189,12 +152,30 @@ ReadBuffer_relcache(Relation reln, ForkNumber forkNum, BlockNumber blockNum, */ pgstat_count_buffer_read(reln); buf = ReadBuffer_common(reln->rd_smgr, reln->rd_istemp, forkNum, blockNum, - zeroPage, strategy, &hit); + mode, strategy, &hit); if (hit) pgstat_count_buffer_hit(reln); return buf; } + +/* + * ReadBufferWithoutRelcache -- like ReadBufferExtended, but doesn't require + * a relcache entry for the relation. + */ +Buffer +ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp, + ForkNumber forkNum, BlockNumber blockNum, + ReadBufferMode mode, BufferAccessStrategy strategy) +{ + bool hit; + + SMgrRelation smgr = smgropen(rnode); + return ReadBuffer_common(smgr, isTemp, forkNum, blockNum, mode, strategy, + &hit); +} + + /* * ReadBuffer_common -- common logic for all ReadBuffer variants * @@ -202,7 +183,7 @@ ReadBuffer_relcache(Relation reln, ForkNumber forkNum, BlockNumber blockNum, */ static Buffer ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum, - BlockNumber blockNum, bool zeroPage, + BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy, bool *hit) { volatile BufferDesc *bufHdr; @@ -295,8 +276,8 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum, bufBlock = isLocalBuf ? LocalBufHdrGetBlock(bufHdr) : BufHdrGetBlock(bufHdr); if (!PageIsNew((Page) bufBlock)) ereport(ERROR, - (errmsg("unexpected data beyond EOF in block %u of relation %u/%u/%u", - blockNum, smgr->smgr_rnode.spcNode, smgr->smgr_rnode.dbNode, smgr->smgr_rnode.relNode), + (errmsg("unexpected data beyond EOF in block %u of relation %u/%u/%u/%u", + blockNum, smgr->smgr_rnode.spcNode, smgr->smgr_rnode.dbNode, smgr->smgr_rnode.relNode, forkNum), errhint("This has been seen to occur with buggy kernels; consider updating your system."))); /* @@ -356,7 +337,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum, * Read in the page, unless the caller intends to overwrite it and * just wants us to allocate a buffer. */ - if (zeroPage) + if (mode == RBM_ZERO) MemSet((char *) bufBlock, 0, BLCKSZ); else { @@ -365,24 +346,25 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum, /* check for garbage data */ if (!PageHeaderIsValid((PageHeader) bufBlock)) { - if (zero_damaged_pages) + if (mode == RBM_ZERO_ON_ERROR || zero_damaged_pages) { ereport(WARNING, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("invalid page header in block %u of relation %u/%u/%u; zeroing out page", + errmsg("invalid page header in block %u of relation %u/%u/%u/%u; zeroing out page", blockNum, smgr->smgr_rnode.spcNode, smgr->smgr_rnode.dbNode, - smgr->smgr_rnode.relNode))); + smgr->smgr_rnode.relNode, + forkNum))); MemSet((char *) bufBlock, 0, BLCKSZ); } else ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("invalid page header in block %u of relation %u/%u/%u", + errmsg("invalid page header in block %u of relation %u/%u/%u/%u", blockNum, smgr->smgr_rnode.spcNode, smgr->smgr_rnode.dbNode, - smgr->smgr_rnode.relNode))); + smgr->smgr_rnode.relNode, forkNum))); } } } @@ -1679,10 +1661,10 @@ PrintBufferLeakWarning(Buffer buffer) /* theoretically we should lock the bufhdr here */ elog(WARNING, "buffer refcount leak: [%03d] " - "(rel=%u/%u/%u, blockNum=%u, flags=0x%x, refcount=%u %d)", + "(rel=%u/%u/%u, forkNum=%u, blockNum=%u, flags=0x%x, refcount=%u %d)", buffer, buf->tag.rnode.spcNode, buf->tag.rnode.dbNode, - buf->tag.rnode.relNode, + buf->tag.rnode.relNode, buf->tag.forkNum, buf->tag.blockNum, buf->flags, buf->refcount, loccount); } @@ -1991,11 +1973,11 @@ PrintBufferDescs(void) { /* theoretically we should lock the bufhdr here */ elog(LOG, - "[%02d] (freeNext=%d, rel=%u/%u/%u, " + "[%02d] (freeNext=%d, rel=%u/%u/%u, forkNum=%u, " "blockNum=%u, flags=0x%x, refcount=%u %d)", i, buf->freeNext, buf->tag.rnode.spcNode, buf->tag.rnode.dbNode, - buf->tag.rnode.relNode, + buf->tag.rnode.relNode, buf->tag.forkNum, buf->tag.blockNum, buf->flags, buf->refcount, PrivateRefCount[i]); } @@ -2015,11 +1997,11 @@ PrintPinnedBufs(void) { /* theoretically we should lock the bufhdr here */ elog(LOG, - "[%02d] (freeNext=%d, rel=%u/%u/%u, " + "[%02d] (freeNext=%d, rel=%u/%u/%u, forkNum=%u, " "blockNum=%u, flags=0x%x, refcount=%u %d)", i, buf->freeNext, buf->tag.rnode.spcNode, buf->tag.rnode.dbNode, - buf->tag.rnode.relNode, + buf->tag.rnode.relNode, buf->tag.forkNum, buf->tag.blockNum, buf->flags, buf->refcount, PrivateRefCount[i]); } @@ -2654,11 +2636,11 @@ AbortBufferIO(void) /* Buffer is pinned, so we can read tag without spinlock */ ereport(WARNING, (errcode(ERRCODE_IO_ERROR), - errmsg("could not write block %u of %u/%u/%u", + errmsg("could not write block %u of %u/%u/%u/%u", buf->tag.blockNum, buf->tag.rnode.spcNode, buf->tag.rnode.dbNode, - buf->tag.rnode.relNode), + buf->tag.rnode.relNode, buf->tag.forkNum), errdetail("Multiple failures --- write error might be permanent."))); } } @@ -2676,9 +2658,10 @@ buffer_write_error_callback(void *arg) /* Buffer is pinned, so we can read the tag without locking the spinlock */ if (bufHdr != NULL) - errcontext("writing block %u of relation %u/%u/%u", + errcontext("writing block %u of relation %u/%u/%u/%u", bufHdr->tag.blockNum, bufHdr->tag.rnode.spcNode, bufHdr->tag.rnode.dbNode, - bufHdr->tag.rnode.relNode); + bufHdr->tag.rnode.relNode, + bufHdr->tag.forkNum); } diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index 9872e5c7222..724e87fa204 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.64 2008/10/01 14:59:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/freespace/freespace.c,v 1.65 2008/10/31 15:05:00 heikki Exp $ * * * NOTES: @@ -504,6 +504,7 @@ static Buffer fsm_readbuf(Relation rel, FSMAddress addr, bool extend) { BlockNumber blkno = fsm_logical_to_physical(addr); + Buffer buf; RelationOpenSmgr(rel); @@ -518,7 +519,18 @@ fsm_readbuf(Relation rel, FSMAddress addr, bool extend) else return InvalidBuffer; } - return ReadBufferWithFork(rel, FSM_FORKNUM, blkno); + + /* + * Use ZERO_ON_ERROR mode, and initialize the page if necessary. The FSM + * information is not accurate anyway, so it's better to clear corrupt + * pages than error out. Since the FSM changes are not WAL-logged, the + * so-called torn page problem on crash can lead to pages with corrupt + * headers, for example. + */ + buf = ReadBufferExtended(rel, FSM_FORKNUM, blkno, RBM_ZERO_ON_ERROR, NULL); + if (PageIsNew(BufferGetPage(buf))) + PageInit(BufferGetPage(buf), BLCKSZ, 0); + return buf; } /* @@ -779,23 +791,18 @@ fsm_redo_truncate(xl_fsm_truncate *xlrec) * replay of the smgr truncation record to remove completely unused * pages. */ - buf = XLogReadBufferWithFork(xlrec->node, FSM_FORKNUM, fsmblk, false); + buf = XLogReadBufferExtended(xlrec->node, FSM_FORKNUM, fsmblk, + RBM_ZERO_ON_ERROR); if (BufferIsValid(buf)) { - fsm_truncate_avail(BufferGetPage(buf), first_removed_slot); + Page page = BufferGetPage(buf); + + if (PageIsNew(page)) + PageInit(page, BLCKSZ, 0); + fsm_truncate_avail(page, first_removed_slot); MarkBufferDirty(buf); UnlockReleaseBuffer(buf); } - else - { - /* - * The page doesn't exist. Because FSM extensions are not WAL-logged, - * it's normal to have a truncation record for a page that doesn't - * exist. Tell xlogutils.c not to PANIC at the end of recovery - * because of the missing page - */ - XLogTruncateRelation(xlrec->node, FSM_FORKNUM, fsmblk); - } } void |