aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/hash/hashpage.c26
-rw-r--r--src/backend/access/nbtree/nbtsort.c25
-rw-r--r--src/backend/commands/tablecmds.c4
-rw-r--r--src/backend/storage/smgr/md.c507
-rw-r--r--src/backend/storage/smgr/smgr.c213
-rw-r--r--src/include/storage/smgr.h25
6 files changed, 402 insertions, 398 deletions
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index 0f643836a1c..b9569e58af4 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.61 2006/11/19 21:33:23 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.62 2007/01/03 18:11:01 tgl Exp $
*
* NOTES
* Postgres hash pages look like ordinary relation pages. The opaque
@@ -533,10 +533,8 @@ fail:
*
* This does not need to initialize the new bucket pages; we'll do that as
* each one is used by _hash_expandtable(). But we have to extend the logical
- * EOF to the end of the splitpoint; otherwise the first overflow page
- * allocated beyond the splitpoint will represent a noncontiguous access,
- * which can confuse md.c (and will probably be forbidden by future changes
- * to md.c).
+ * EOF to the end of the splitpoint; this keeps smgr's idea of the EOF in
+ * sync with ours, so that overflow-page allocation works correctly.
*
* We do this by writing a page of zeroes at the end of the splitpoint range.
* We expect that the filesystem will ensure that the intervening pages read
@@ -559,7 +557,6 @@ _hash_alloc_buckets(Relation rel, uint32 nblocks)
{
BlockNumber firstblock;
BlockNumber lastblock;
- BlockNumber endblock;
char zerobuf[BLCKSZ];
/*
@@ -577,24 +574,9 @@ _hash_alloc_buckets(Relation rel, uint32 nblocks)
if (lastblock < firstblock || lastblock == InvalidBlockNumber)
return InvalidBlockNumber;
- /* Note: we assume RelationGetNumberOfBlocks did RelationOpenSmgr for us */
-
MemSet(zerobuf, 0, sizeof(zerobuf));
- /*
- * XXX If the extension results in creation of new segment files,
- * we have to make sure that each non-last file is correctly filled out to
- * RELSEG_SIZE blocks. This ought to be done inside mdextend, but
- * changing the smgr API seems best left for development cycle not late
- * beta. Temporary fix for bug #2737.
- */
-#ifndef LET_OS_MANAGE_FILESIZE
- for (endblock = firstblock | (RELSEG_SIZE - 1);
- endblock < lastblock;
- endblock += RELSEG_SIZE)
- smgrextend(rel->rd_smgr, endblock, zerobuf, rel->rd_istemp);
-#endif
-
+ /* Note: we assume RelationGetNumberOfBlocks did RelationOpenSmgr for us */
smgrextend(rel->rd_smgr, lastblock, zerobuf, rel->rd_istemp);
return firstblock;
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 4951dca2182..4f886e8b07e 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -36,9 +36,9 @@
* that is of no value (since other backends have no interest in them yet)
* and it created locking problems for CHECKPOINT, because the upper-level
* pages were held exclusive-locked for long periods. Now we just build
- * the pages in local memory and smgrwrite() them as we finish them. They
- * will need to be re-read into shared buffers on first use after the build
- * finishes.
+ * the pages in local memory and smgrwrite or smgrextend them as we finish
+ * them. They will need to be re-read into shared buffers on first use after
+ * the build finishes.
*
* Since the index will never be used unless it is completely built,
* from a crash-recovery point of view there is no need to WAL-log the
@@ -57,7 +57,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.107 2006/10/04 00:29:49 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.108 2007/01/03 18:11:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -309,9 +309,9 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
{
if (!wstate->btws_zeropage)
wstate->btws_zeropage = (Page) palloc0(BLCKSZ);
- smgrwrite(wstate->index->rd_smgr, wstate->btws_pages_written++,
- (char *) wstate->btws_zeropage,
- true);
+ smgrextend(wstate->index->rd_smgr, wstate->btws_pages_written++,
+ (char *) wstate->btws_zeropage,
+ true);
}
/*
@@ -319,10 +319,17 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
* index, because there's no need for smgr to schedule an fsync for this
* write; we'll do it ourselves before ending the build.
*/
- smgrwrite(wstate->index->rd_smgr, blkno, (char *) page, true);
-
if (blkno == wstate->btws_pages_written)
+ {
+ /* extending the file... */
+ smgrextend(wstate->index->rd_smgr, blkno, (char *) page, true);
wstate->btws_pages_written++;
+ }
+ else
+ {
+ /* overwriting a block we zero-filled before */
+ smgrwrite(wstate->index->rd_smgr, blkno, (char *) page, true);
+ }
pfree(page);
}
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index c30aa69c555..5de8e96f5fd 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.208 2006/12/30 21:21:53 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.209 2007/01/03 18:11:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -6083,7 +6083,7 @@ copy_relation_data(Relation rel, SMgrRelation dst)
* rel, because there's no need for smgr to schedule an fsync for this
* write; we'll do it ourselves below.
*/
- smgrwrite(dst, blkno, buf, true);
+ smgrextend(dst, blkno, buf, true);
}
/*
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index f58ab03ce42..e0899a54600 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.123 2006/11/20 01:07:56 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.124 2007/01/03 18:11:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -22,6 +22,7 @@
#include "miscadmin.h"
#include "postmaster/bgwriter.h"
#include "storage/fd.h"
+#include "storage/bufmgr.h"
#include "storage/smgr.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
@@ -108,9 +109,16 @@ typedef struct
static HTAB *pendingOpsTable = NULL;
+typedef enum /* behavior for mdopen & _mdfd_getseg */
+{
+ EXTENSION_FAIL, /* ereport if segment not present */
+ EXTENSION_RETURN_NULL, /* return NULL if not present */
+ EXTENSION_CREATE /* create new segments as needed */
+} ExtensionBehavior;
+
/* local routines */
-static MdfdVec *mdopen(SMgrRelation reln, bool allowNotFound);
-static bool register_dirty_segment(SMgrRelation reln, MdfdVec *seg);
+static MdfdVec *mdopen(SMgrRelation reln, ExtensionBehavior behavior);
+static void register_dirty_segment(SMgrRelation reln, MdfdVec *seg);
static MdfdVec *_fdvec_alloc(void);
#ifndef LET_OS_MANAGE_FILESIZE
@@ -118,14 +126,14 @@ static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno,
int oflags);
#endif
static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno,
- bool allowNotFound);
-static BlockNumber _mdnblocks(File file, Size blcksz);
+ bool isTemp, ExtensionBehavior behavior);
+static BlockNumber _mdnblocks(SMgrRelation reln, MdfdVec *seg);
/*
* mdinit() -- Initialize private state for magnetic disk storage manager.
*/
-bool
+void
mdinit(void)
{
MdCxt = AllocSetContextCreate(TopMemoryContext,
@@ -154,8 +162,6 @@ mdinit(void)
&hash_ctl,
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
}
-
- return true;
}
/*
@@ -163,14 +169,14 @@ mdinit(void)
*
* If isRedo is true, it's okay for the relation to exist already.
*/
-bool
+void
mdcreate(SMgrRelation reln, bool isRedo)
{
char *path;
File fd;
if (isRedo && reln->md_fd != NULL)
- return true; /* created and opened already... */
+ return; /* created and opened already... */
Assert(reln->md_fd == NULL);
@@ -193,11 +199,15 @@ mdcreate(SMgrRelation reln, bool isRedo)
if (fd < 0)
{
pfree(path);
- /* be sure to return the error reported by create, not open */
+ /* be sure to report the error reported by create, not open */
errno = save_errno;
- return false;
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not create relation %u/%u/%u: %m",
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode)));
}
- errno = 0;
}
pfree(path);
@@ -209,8 +219,6 @@ mdcreate(SMgrRelation reln, bool isRedo)
#ifndef LET_OS_MANAGE_FILESIZE
reln->md_fd->mdfd_chain = NULL;
#endif
-
- return true;
}
/*
@@ -220,12 +228,12 @@ mdcreate(SMgrRelation reln, bool isRedo)
* there won't be an SMgrRelation hashtable entry anymore.
*
* If isRedo is true, it's okay for the relation to be already gone.
+ * Also, any failure should be reported as WARNING not ERROR, because
+ * we are usually not in a transaction anymore when this is called.
*/
-bool
+void
mdunlink(RelFileNode rnode, bool isRedo)
{
- bool status = true;
- int save_errno = 0;
char *path;
path = relpath(rnode);
@@ -234,15 +242,17 @@ mdunlink(RelFileNode rnode, bool isRedo)
if (unlink(path) < 0)
{
if (!isRedo || errno != ENOENT)
- {
- status = false;
- save_errno = errno;
- }
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("could not remove relation %u/%u/%u: %m",
+ rnode.spcNode,
+ rnode.dbNode,
+ rnode.relNode)));
}
#ifndef LET_OS_MANAGE_FILESIZE
/* Delete the additional segments, if any */
- if (status)
+ else
{
char *segpath = (char *) palloc(strlen(path) + 12);
BlockNumber segno;
@@ -258,10 +268,13 @@ mdunlink(RelFileNode rnode, bool isRedo)
{
/* ENOENT is expected after the last segment... */
if (errno != ENOENT)
- {
- status = false;
- save_errno = errno;
- }
+ ereport(WARNING,
+ (errcode_for_file_access(),
+ errmsg("could not remove segment %u of relation %u/%u/%u: %m",
+ segno,
+ rnode.spcNode,
+ rnode.dbNode,
+ rnode.relNode)));
break;
}
}
@@ -270,29 +283,44 @@ mdunlink(RelFileNode rnode, bool isRedo)
#endif
pfree(path);
-
- errno = save_errno;
- return status;
}
/*
* mdextend() -- Add a block to the specified relation.
*
- * The semantics are basically the same as mdwrite(): write at the
- * specified position. However, we are expecting to extend the
- * relation (ie, blocknum is >= the current EOF), and so in case of
- * failure we clean up by truncating.
- *
- * This routine returns true or false, with errno set as appropriate.
+ * The semantics are nearly the same as mdwrite(): write at the
+ * specified position. However, this is to be used for the case of
+ * extending a relation (i.e., blocknum is at or beyond the current
+ * EOF). Note that we assume writing a block beyond current EOF
+ * causes intervening file space to become filled with zeroes.
*/
-bool
+void
mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
{
long seekpos;
int nbytes;
MdfdVec *v;
- v = _mdfd_getseg(reln, blocknum, false);
+ /* This assert is too expensive to have on normally ... */
+#ifdef CHECK_WRITE_VS_EXTEND
+ Assert(blocknum >= mdnblocks(reln));
+#endif
+
+ /*
+ * If a relation manages to grow to 2^32-1 blocks, refuse to extend it
+ * any more --- we mustn't create a block whose number
+ * actually is InvalidBlockNumber.
+ */
+ if (blocknum == InvalidBlockNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("cannot extend relation %u/%u/%u beyond %u blocks",
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode,
+ InvalidBlockNumber)));
+
+ v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_CREATE);
#ifndef LET_OS_MANAGE_FILESIZE
seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
@@ -302,52 +330,64 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
#endif
/*
- * Note: because caller obtained blocknum by calling _mdnblocks, which did
- * a seek(SEEK_END), this seek is often redundant and will be optimized
- * away by fd.c. It's not redundant, however, if there is a partial page
- * at the end of the file. In that case we want to try to overwrite the
- * partial page with a full page. It's also not redundant if bufmgr.c had
- * to dump another buffer of the same file to make room for the new page's
- * buffer.
+ * Note: because caller usually obtained blocknum by calling mdnblocks,
+ * which did a seek(SEEK_END), this seek is often redundant and will be
+ * optimized away by fd.c. It's not redundant, however, if there is a
+ * partial page at the end of the file. In that case we want to try to
+ * overwrite the partial page with a full page. It's also not redundant
+ * if bufmgr.c had to dump another buffer of the same file to make room
+ * for the new page's buffer.
*/
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
- return false;
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not seek to block %u of relation %u/%u/%u: %m",
+ blocknum,
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode)));
if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
{
- if (nbytes > 0)
- {
- int save_errno = errno;
-
- /* Remove the partially-written page */
- FileTruncate(v->mdfd_vfd, seekpos);
- FileSeek(v->mdfd_vfd, seekpos, SEEK_SET);
- errno = save_errno;
- }
- return false;
+ if (nbytes < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not extend relation %u/%u/%u: %m",
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode),
+ errhint("Check free disk space.")));
+ /* short write: complain appropriately */
+ ereport(ERROR,
+ (errcode(ERRCODE_DISK_FULL),
+ errmsg("could not extend relation %u/%u/%u: wrote only %d of %d bytes at block %u",
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode,
+ nbytes, BLCKSZ, blocknum),
+ errhint("Check free disk space.")));
}
if (!isTemp)
- {
- if (!register_dirty_segment(reln, v))
- return false;
- }
+ register_dirty_segment(reln, v);
#ifndef LET_OS_MANAGE_FILESIZE
- Assert(_mdnblocks(v->mdfd_vfd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
+ Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE));
#endif
-
- return true;
}
/*
- * mdopen() -- Open the specified relation. ereport's on failure.
- * (Optionally, can return NULL instead of ereport for ENOENT.)
+ * mdopen() -- Open the specified relation.
*
* Note we only open the first segment, when there are multiple segments.
+ *
+ * If first segment is not present, either ereport or return NULL according
+ * to "behavior". We treat EXTENSION_CREATE the same as EXTENSION_FAIL;
+ * EXTENSION_CREATE means it's OK to extend an existing relation, not to
+ * invent one out of whole cloth.
*/
static MdfdVec *
-mdopen(SMgrRelation reln, bool allowNotFound)
+mdopen(SMgrRelation reln, ExtensionBehavior behavior)
{
MdfdVec *mdfd;
char *path;
@@ -374,7 +414,7 @@ mdopen(SMgrRelation reln, bool allowNotFound)
if (fd < 0)
{
pfree(path);
- if (allowNotFound && errno == ENOENT)
+ if (behavior == EXTENSION_RETURN_NULL && errno == ENOENT)
return NULL;
ereport(ERROR,
(errcode_for_file_access(),
@@ -393,7 +433,7 @@ mdopen(SMgrRelation reln, bool allowNotFound)
mdfd->mdfd_segno = 0;
#ifndef LET_OS_MANAGE_FILESIZE
mdfd->mdfd_chain = NULL;
- Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
+ Assert(_mdnblocks(reln, mdfd) <= ((BlockNumber) RELSEG_SIZE));
#endif
return mdfd;
@@ -401,17 +441,15 @@ mdopen(SMgrRelation reln, bool allowNotFound)
/*
* mdclose() -- Close the specified relation, if it isn't closed already.
- *
- * Returns true or false with errno set as appropriate.
*/
-bool
+void
mdclose(SMgrRelation reln)
{
MdfdVec *v = reln->md_fd;
/* No work if already closed */
if (v == NULL)
- return true;
+ return;
reln->md_fd = NULL; /* prevent dangling pointer after error */
@@ -432,22 +470,19 @@ mdclose(SMgrRelation reln)
FileClose(v->mdfd_vfd);
pfree(v);
#endif
-
- return true;
}
/*
* mdread() -- Read the specified block from a relation.
*/
-bool
+void
mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
{
- bool status;
long seekpos;
int nbytes;
MdfdVec *v;
- v = _mdfd_getseg(reln, blocknum, false);
+ v = _mdfd_getseg(reln, blocknum, false, EXTENSION_FAIL);
#ifndef LET_OS_MANAGE_FILESIZE
seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
@@ -457,39 +492,66 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
#endif
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
- return false;
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not seek to block %u of relation %u/%u/%u: %m",
+ blocknum,
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode)));
- status = true;
if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
{
+ if (nbytes < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read block %u of relation %u/%u/%u: %m",
+ blocknum,
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode)));
/*
- * If we are at or past EOF, return zeroes without complaining. Also
- * substitute zeroes if we found a partial block at EOF.
- *
- * XXX this is really ugly, bad design. However the current
- * implementation of hash indexes requires it, because hash index
- * pages are initialized out-of-order.
+ * Short read: we are at or past EOF, or we read a partial block at
+ * EOF. Normally this is an error; upper levels should never try to
+ * read a nonexistent block. However, if zero_damaged_pages is ON
+ * or we are InRecovery, we should instead return zeroes without
+ * complaining. This allows, for example, the case of trying to
+ * update a block that was later truncated away.
*/
- if (nbytes == 0 ||
- (nbytes > 0 && mdnblocks(reln) == blocknum))
+ if (zero_damaged_pages || InRecovery)
MemSet(buffer, 0, BLCKSZ);
else
- status = false;
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("could not read block %u of relation %u/%u/%u: read only %d of %d bytes",
+ blocknum,
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode,
+ nbytes, BLCKSZ)));
}
-
- return status;
}
/*
* mdwrite() -- Write the supplied block at the appropriate location.
+ *
+ * This is to be used only for updating already-existing blocks of a
+ * relation (ie, those before the current EOF). To extend a relation,
+ * use mdextend().
*/
-bool
+void
mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
{
long seekpos;
+ int nbytes;
MdfdVec *v;
- v = _mdfd_getseg(reln, blocknum, false);
+ /* This assert is too expensive to have on normally ... */
+#ifdef CHECK_WRITE_VS_EXTEND
+ Assert(blocknum < mdnblocks(reln));
+#endif
+
+ v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_FAIL);
#ifndef LET_OS_MANAGE_FILESIZE
seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
@@ -499,18 +561,38 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
#endif
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
- return false;
-
- if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
- return false;
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not seek to block %u of relation %u/%u/%u: %m",
+ blocknum,
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode)));
- if (!isTemp)
+ if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
{
- if (!register_dirty_segment(reln, v))
- return false;
+ if (nbytes < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not write block %u of relation %u/%u/%u: %m",
+ blocknum,
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode)));
+ /* short write: complain appropriately */
+ ereport(ERROR,
+ (errcode(ERRCODE_DISK_FULL),
+ errmsg("could not write block %u of relation %u/%u/%u: wrote only %d of %d bytes",
+ blocknum,
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode,
+ nbytes, BLCKSZ),
+ errhint("Check free disk space.")));
}
- return true;
+ if (!isTemp)
+ register_dirty_segment(reln, v);
}
/*
@@ -520,13 +602,11 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
* and added to the mdfd_chain list. If this routine has not been
* called, then only segments up to the last one actually touched
* are present in the chain.
- *
- * Returns # of blocks, or InvalidBlockNumber on error.
*/
BlockNumber
mdnblocks(SMgrRelation reln)
{
- MdfdVec *v = mdopen(reln, false);
+ MdfdVec *v = mdopen(reln, EXTENSION_FAIL);
#ifndef LET_OS_MANAGE_FILESIZE
BlockNumber nblocks;
@@ -552,7 +632,7 @@ mdnblocks(SMgrRelation reln)
for (;;)
{
- nblocks = _mdnblocks(v->mdfd_vfd, BLCKSZ);
+ nblocks = _mdnblocks(reln, v);
if (nblocks > ((BlockNumber) RELSEG_SIZE))
elog(FATAL, "segment too big");
if (nblocks < ((BlockNumber) RELSEG_SIZE))
@@ -573,22 +653,26 @@ mdnblocks(SMgrRelation reln)
*/
v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT);
if (v->mdfd_chain == NULL)
- return InvalidBlockNumber; /* failed? */
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open segment %u of relation %u/%u/%u: %m",
+ segno,
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode)));
}
v = v->mdfd_chain;
}
#else
- return _mdnblocks(v->mdfd_vfd, BLCKSZ);
+ return _mdnblocks(reln, v);
#endif
}
/*
* mdtruncate() -- Truncate relation to specified number of blocks.
- *
- * Returns # of blocks or InvalidBlockNumber on error.
*/
-BlockNumber
+void
mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
{
MdfdVec *v;
@@ -603,14 +687,22 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
* that truncation loop will get them all!
*/
curnblk = mdnblocks(reln);
- if (curnblk == InvalidBlockNumber)
- return InvalidBlockNumber; /* mdnblocks failed */
if (nblocks > curnblk)
- return InvalidBlockNumber; /* bogus request */
+ {
+ /* Bogus request ... but no complaint if InRecovery */
+ if (InRecovery)
+ return;
+ ereport(ERROR,
+ (errmsg("could not truncate relation %u/%u/%u to %u blocks: it's only %u blocks now",
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode,
+ nblocks, curnblk)));
+ }
if (nblocks == curnblk)
- return nblocks; /* no work */
+ return; /* no work */
- v = mdopen(reln, false);
+ v = mdopen(reln, EXTENSION_FAIL);
#ifndef LET_OS_MANAGE_FILESIZE
priorblocks = 0;
@@ -626,12 +718,15 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
* not delete it, for reasons explained in the header comments.
*/
if (FileTruncate(v->mdfd_vfd, 0) < 0)
- return InvalidBlockNumber;
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode,
+ nblocks)));
if (!isTemp)
- {
- if (!register_dirty_segment(reln, v))
- return InvalidBlockNumber;
- }
+ register_dirty_segment(reln, v);
v = v->mdfd_chain;
Assert(ov != reln->md_fd); /* we never drop the 1st segment */
pfree(ov);
@@ -649,12 +744,15 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
BlockNumber lastsegblocks = nblocks - priorblocks;
if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0)
- return InvalidBlockNumber;
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode,
+ nblocks)));
if (!isTemp)
- {
- if (!register_dirty_segment(reln, v))
- return InvalidBlockNumber;
- }
+ register_dirty_segment(reln, v);
v = v->mdfd_chain;
ov->mdfd_chain = NULL;
}
@@ -670,15 +768,16 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
}
#else
if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0)
- return InvalidBlockNumber;
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode,
+ nblocks)));
if (!isTemp)
- {
- if (!register_dirty_segment(reln, v))
- return InvalidBlockNumber;
- }
+ register_dirty_segment(reln, v);
#endif
-
- return nblocks;
}
/*
@@ -687,7 +786,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
* Note that only writes already issued are synced; this routine knows
* nothing of dirty buffers that may exist inside the buffer manager.
*/
-bool
+void
mdimmedsync(SMgrRelation reln)
{
MdfdVec *v;
@@ -698,24 +797,32 @@ mdimmedsync(SMgrRelation reln)
* that fsync loop will get them all!
*/
curnblk = mdnblocks(reln);
- if (curnblk == InvalidBlockNumber)
- return false; /* mdnblocks failed */
- v = mdopen(reln, false);
+ v = mdopen(reln, EXTENSION_FAIL);
#ifndef LET_OS_MANAGE_FILESIZE
while (v != NULL)
{
if (FileSync(v->mdfd_vfd) < 0)
- return false;
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
+ v->mdfd_segno,
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode)));
v = v->mdfd_chain;
}
#else
if (FileSync(v->mdfd_vfd) < 0)
- return false;
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
+ v->mdfd_segno,
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode)));
#endif
-
- return true;
}
/*
@@ -724,7 +831,7 @@ mdimmedsync(SMgrRelation reln)
* This is only called during checkpoints, and checkpoints should only
* occur in processes that have created a pendingOpsTable.
*/
-bool
+void
mdsync(void)
{
HASH_SEQ_STATUS hstat;
@@ -732,7 +839,7 @@ mdsync(void)
int absorb_counter;
if (!pendingOpsTable)
- return false;
+ elog(ERROR, "cannot sync without a pendingOpsTable");
/*
* If we are in the bgwriter, the sync had better include all fsync
@@ -795,21 +902,18 @@ mdsync(void)
*/
seg = _mdfd_getseg(reln,
entry->segno * ((BlockNumber) RELSEG_SIZE),
- true);
+ false, EXTENSION_RETURN_NULL);
if (seg)
{
if (FileSync(seg->mdfd_vfd) < 0 &&
errno != ENOENT)
- {
- ereport(LOG,
+ ereport(ERROR,
(errcode_for_file_access(),
errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
entry->segno,
entry->rnode.spcNode,
entry->rnode.dbNode,
entry->rnode.relNode)));
- return false;
- }
}
}
@@ -818,8 +922,6 @@ mdsync(void)
HASH_REMOVE, NULL) == NULL)
elog(ERROR, "pendingOpsTable corrupted");
}
-
- return true;
}
/*
@@ -830,11 +932,8 @@ mdsync(void)
* to the background writer process. If that fails, just do the fsync
* locally before returning (we expect this will not happen often enough
* to be a performance problem).
- *
- * A false result implies I/O failure during local fsync. errno will be
- * valid for error reporting.
*/
-static bool
+static void
register_dirty_segment(SMgrRelation reln, MdfdVec *seg)
{
if (pendingOpsTable)
@@ -847,17 +946,21 @@ register_dirty_segment(SMgrRelation reln, MdfdVec *seg)
entry.segno = seg->mdfd_segno;
(void) hash_search(pendingOpsTable, &entry, HASH_ENTER, NULL);
- return true;
}
else
{
if (ForwardFsyncRequest(reln->smgr_rnode, seg->mdfd_segno))
- return true;
- }
+ return; /* passed it off successfully */
- if (FileSync(seg->mdfd_vfd) < 0)
- return false;
- return true;
+ if (FileSync(seg->mdfd_vfd) < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
+ seg->mdfd_segno,
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode)));
+ }
}
/*
@@ -931,7 +1034,7 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
v->mdfd_vfd = fd;
v->mdfd_segno = segno;
v->mdfd_chain = NULL;
- Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
+ Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE));
/* all done */
return v;
@@ -940,51 +1043,66 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
/*
* _mdfd_getseg() -- Find the segment of the relation holding the
- * specified block. ereport's on failure.
- * (Optionally, can return NULL instead of ereport for ENOENT.)
+ * specified block.
+ *
+ * If the segment doesn't exist, we ereport, return NULL, or create the
+ * segment, according to "behavior". Note: isTemp need only be correct
+ * in the EXTENSION_CREATE case.
*/
static MdfdVec *
-_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool allowNotFound)
+_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
+ ExtensionBehavior behavior)
{
- MdfdVec *v = mdopen(reln, allowNotFound);
+ MdfdVec *v = mdopen(reln, behavior);
#ifndef LET_OS_MANAGE_FILESIZE
- BlockNumber segstogo;
+ BlockNumber targetseg;
BlockNumber nextsegno;
if (!v)
- return NULL; /* only possible if allowNotFound */
+ return NULL; /* only possible if EXTENSION_RETURN_NULL */
- for (segstogo = blkno / ((BlockNumber) RELSEG_SIZE), nextsegno = 1;
- segstogo > 0;
- nextsegno++, segstogo--)
+ targetseg = blkno / ((BlockNumber) RELSEG_SIZE);
+ for (nextsegno = 1; nextsegno <= targetseg; nextsegno++)
{
+ Assert(nextsegno == v->mdfd_segno + 1);
+
if (v->mdfd_chain == NULL)
{
/*
- * We will create the next segment only if the target block is
- * within it. This prevents Sorcerer's Apprentice syndrome if a
- * bug at higher levels causes us to be handed a ridiculously
- * large blkno --- otherwise we could create many thousands of
- * empty segment files before reaching the "target" block. We
- * should never need to create more than one new segment per call,
- * so this restriction seems reasonable.
+ * Normally we will create new segments only if authorized by
+ * the caller (i.e., we are doing mdextend()). But when doing
+ * WAL recovery, create segments anyway; this allows cases such as
+ * replaying WAL data that has a write into a high-numbered
+ * segment of a relation that was later deleted. We want to go
+ * ahead and create the segments so we can finish out the replay.
*
- * BUT: when doing WAL recovery, disable this logic and create
- * segments unconditionally. In this case it seems better to
- * assume the given blkno is good (it presumably came from a
- * CRC-checked WAL record); furthermore this lets us cope in the
- * case where we are replaying WAL data that has a write into a
- * high-numbered segment of a relation that was later deleted. We
- * want to go ahead and create the segments so we can finish out
- * the replay.
+ * We have to maintain the invariant that segments before the
+ * last active segment are of size RELSEG_SIZE; therefore, pad
+ * them out with zeroes if needed. (This only matters if caller
+ * is extending the relation discontiguously, but that can happen
+ * in hash indexes.)
*/
- v->mdfd_chain = _mdfd_openseg(reln,
- nextsegno,
- (segstogo == 1 || InRecovery) ? O_CREAT : 0);
+ if (behavior == EXTENSION_CREATE || InRecovery)
+ {
+ if (_mdnblocks(reln, v) < RELSEG_SIZE)
+ {
+ char *zerobuf = palloc0(BLCKSZ);
+
+ mdextend(reln, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
+ zerobuf, isTemp);
+ pfree(zerobuf);
+ }
+ v->mdfd_chain = _mdfd_openseg(reln, nextsegno, O_CREAT);
+ }
+ else
+ {
+ /* We won't create segment if not existent */
+ v->mdfd_chain = _mdfd_openseg(reln, nextsegno, 0);
+ }
if (v->mdfd_chain == NULL)
{
- if (allowNotFound && errno == ENOENT)
+ if (behavior == EXTENSION_RETURN_NULL && errno == ENOENT)
return NULL;
ereport(ERROR,
(errcode_for_file_access(),
@@ -1007,12 +1125,19 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool allowNotFound)
* Get number of blocks present in a single disk file
*/
static BlockNumber
-_mdnblocks(File file, Size blcksz)
+_mdnblocks(SMgrRelation reln, MdfdVec *seg)
{
long len;
- len = FileSeek(file, 0L, SEEK_END);
+ len = FileSeek(seg->mdfd_vfd, 0L, SEEK_END);
if (len < 0)
- return 0; /* on failure, assume file is empty */
- return (BlockNumber) (len / blcksz);
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not seek to end of segment %u of relation %u/%u/%u: %m",
+ seg->mdfd_segno,
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode)));
+ /* note that this calculation will ignore any partial block at EOF */
+ return (BlockNumber) (len / BLCKSZ);
}
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 0ceb800b363..1a3a00f2951 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.101 2006/10/04 00:29:58 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.102 2007/01/03 18:11:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -31,30 +31,33 @@
/*
* This struct of function pointers defines the API between smgr.c and
* any individual storage manager module. Note that smgr subfunctions are
- * generally expected to return TRUE on success, FALSE on error. (For
- * nblocks and truncate we instead say that returning InvalidBlockNumber
- * indicates an error.)
+ * generally expected to report problems via elog(ERROR). An exception is
+ * that smgr_unlink should use elog(WARNING), rather than erroring out,
+ * because we normally unlink relations during post-commit/abort cleanup,
+ * and so it's too late to raise an error. Also, various conditions that
+ * would normally be errors should be allowed during bootstrap and/or WAL
+ * recovery --- see comments in md.c for details.
*/
typedef struct f_smgr
{
- bool (*smgr_init) (void); /* may be NULL */
- bool (*smgr_shutdown) (void); /* may be NULL */
- bool (*smgr_close) (SMgrRelation reln);
- bool (*smgr_create) (SMgrRelation reln, bool isRedo);
- bool (*smgr_unlink) (RelFileNode rnode, bool isRedo);
- bool (*smgr_extend) (SMgrRelation reln, BlockNumber blocknum,
+ void (*smgr_init) (void); /* may be NULL */
+ void (*smgr_shutdown) (void); /* may be NULL */
+ void (*smgr_close) (SMgrRelation reln);
+ void (*smgr_create) (SMgrRelation reln, bool isRedo);
+ void (*smgr_unlink) (RelFileNode rnode, bool isRedo);
+ void (*smgr_extend) (SMgrRelation reln, BlockNumber blocknum,
char *buffer, bool isTemp);
- bool (*smgr_read) (SMgrRelation reln, BlockNumber blocknum,
+ void (*smgr_read) (SMgrRelation reln, BlockNumber blocknum,
char *buffer);
- bool (*smgr_write) (SMgrRelation reln, BlockNumber blocknum,
+ void (*smgr_write) (SMgrRelation reln, BlockNumber blocknum,
char *buffer, bool isTemp);
BlockNumber (*smgr_nblocks) (SMgrRelation reln);
- BlockNumber (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks,
- bool isTemp);
- bool (*smgr_immedsync) (SMgrRelation reln);
- bool (*smgr_commit) (void); /* may be NULL */
- bool (*smgr_abort) (void); /* may be NULL */
- bool (*smgr_sync) (void); /* may be NULL */
+ void (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks,
+ bool isTemp);
+ void (*smgr_immedsync) (SMgrRelation reln);
+ void (*smgr_commit) (void); /* may be NULL */
+ void (*smgr_abort) (void); /* may be NULL */
+ void (*smgr_sync) (void); /* may be NULL */
} f_smgr;
@@ -152,12 +155,7 @@ smgrinit(void)
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_init)
- {
- if (!(*(smgrsw[i].smgr_init)) ())
- elog(FATAL, "smgr initialization failed on %s: %m",
- DatumGetCString(DirectFunctionCall1(smgrout,
- Int16GetDatum(i))));
- }
+ (*(smgrsw[i].smgr_init)) ();
}
/* register the shutdown proc */
@@ -175,12 +173,7 @@ smgrshutdown(int code, Datum arg)
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_shutdown)
- {
- if (!(*(smgrsw[i].smgr_shutdown)) ())
- elog(FATAL, "smgr shutdown failed on %s: %m",
- DatumGetCString(DirectFunctionCall1(smgrout,
- Int16GetDatum(i))));
- }
+ (*(smgrsw[i].smgr_shutdown)) ();
}
}
@@ -256,13 +249,7 @@ smgrclose(SMgrRelation reln)
{
SMgrRelation *owner;
- if (!(*(smgrsw[reln->smgr_which].smgr_close)) (reln))
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not close relation %u/%u/%u: %m",
- reln->smgr_rnode.spcNode,
- reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode)));
+ (*(smgrsw[reln->smgr_which].smgr_close)) (reln);
owner = reln->smgr_owner;
@@ -354,13 +341,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
reln->smgr_rnode.dbNode,
isRedo);
- if (!(*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo))
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not create relation %u/%u/%u: %m",
- reln->smgr_rnode.spcNode,
- reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode)));
+ (*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo);
if (isRedo)
return;
@@ -482,38 +463,26 @@ smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo)
/*
* And delete the physical files.
*
- * Note: we treat deletion failure as a WARNING, not an error, because
- * we've already decided to commit or abort the current xact.
+ * Note: smgr_unlink must treat deletion failure as a WARNING, not an
+ * ERROR, because we've already decided to commit or abort the current
+ * xact.
*/
- if (!(*(smgrsw[which].smgr_unlink)) (rnode, isRedo))
- ereport(WARNING,
- (errcode_for_file_access(),
- errmsg("could not remove relation %u/%u/%u: %m",
- rnode.spcNode,
- rnode.dbNode,
- rnode.relNode)));
+ (*(smgrsw[which].smgr_unlink)) (rnode, isRedo);
}
/*
* smgrextend() -- Add a new block to a file.
*
- * The semantics are basically the same as smgrwrite(): write at the
- * specified position. However, we are expecting to extend the
- * relation (ie, blocknum is the current EOF), and so in case of
- * failure we clean up by truncating.
+ * The semantics are nearly the same as smgrwrite(): write at the
+ * specified position. However, this is to be used for the case of
+ * extending a relation (i.e., blocknum is at or beyond the current
+ * EOF). Note that we assume writing a block beyond current EOF
+ * causes intervening file space to become filled with zeroes.
*/
void
smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
{
- if (!(*(smgrsw[reln->smgr_which].smgr_extend)) (reln, blocknum, buffer,
- isTemp))
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not extend relation %u/%u/%u: %m",
- reln->smgr_rnode.spcNode,
- reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode),
- errhint("Check free disk space.")));
+ (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, blocknum, buffer, isTemp);
}
/*
@@ -527,19 +496,16 @@ smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
void
smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
{
- if (!(*(smgrsw[reln->smgr_which].smgr_read)) (reln, blocknum, buffer))
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not read block %u of relation %u/%u/%u: %m",
- blocknum,
- reln->smgr_rnode.spcNode,
- reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode)));
+ (*(smgrsw[reln->smgr_which].smgr_read)) (reln, blocknum, buffer);
}
/*
* smgrwrite() -- Write the supplied buffer out.
*
+ * This is to be used only for updating already-existing blocks of a
+ * relation (ie, those before the current EOF). To extend a relation,
+ * use smgrextend().
+ *
* This is not a synchronous write -- the block is not necessarily
* on disk at return, only dumped out to the kernel. However,
* provisions will be made to fsync the write before the next checkpoint.
@@ -551,60 +517,26 @@ smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
void
smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
{
- if (!(*(smgrsw[reln->smgr_which].smgr_write)) (reln, blocknum, buffer,
- isTemp))
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not write block %u of relation %u/%u/%u: %m",
- blocknum,
- reln->smgr_rnode.spcNode,
- reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode)));
+ (*(smgrsw[reln->smgr_which].smgr_write)) (reln, blocknum, buffer, isTemp);
}
/*
* smgrnblocks() -- Calculate the number of blocks in the
* supplied relation.
- *
- * Returns the number of blocks on success, aborts the current
- * transaction on failure.
*/
BlockNumber
smgrnblocks(SMgrRelation reln)
{
- BlockNumber nblocks;
-
- nblocks = (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln);
-
- /*
- * NOTE: if a relation ever did grow to 2^32-1 blocks, this code would
- * fail --- but that's a good thing, because it would stop us from
- * extending the rel another block and having a block whose number
- * actually is InvalidBlockNumber.
- */
- if (nblocks == InvalidBlockNumber)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not count blocks of relation %u/%u/%u: %m",
- reln->smgr_rnode.spcNode,
- reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode)));
-
- return nblocks;
+ return (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln);
}
/*
* smgrtruncate() -- Truncate supplied relation to the specified number
* of blocks
- *
- * Returns the number of blocks on success, aborts the current
- * transaction on failure.
*/
-BlockNumber
+void
smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
{
- BlockNumber newblks;
-
/*
* Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
* just drop them without bothering to write the contents.
@@ -619,16 +551,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
FreeSpaceMapTruncateRel(&reln->smgr_rnode, nblocks);
/* Do the truncation */
- newblks = (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, nblocks,
- isTemp);
- if (newblks == InvalidBlockNumber)
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
- reln->smgr_rnode.spcNode,
- reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode,
- nblocks)));
+ (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, nblocks, isTemp);
if (!isTemp)
{
@@ -642,7 +565,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
XLogRecData rdata;
xl_smgr_truncate xlrec;
- xlrec.blkno = newblks;
+ xlrec.blkno = nblocks;
xlrec.rnode = reln->smgr_rnode;
rdata.data = (char *) &xlrec;
@@ -653,8 +576,6 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLOG_NO_TRAN,
&rdata);
}
-
- return newblks;
}
/*
@@ -683,13 +604,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
void
smgrimmedsync(SMgrRelation reln)
{
- if (!(*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln))
- ereport(ERROR,
- (errcode_for_file_access(),
- errmsg("could not sync relation %u/%u/%u: %m",
- reln->smgr_rnode.spcNode,
- reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode)));
+ (*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln);
}
@@ -843,12 +758,7 @@ smgrcommit(void)
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_commit)
- {
- if (!(*(smgrsw[i].smgr_commit)) ())
- elog(ERROR, "transaction commit failed on %s: %m",
- DatumGetCString(DirectFunctionCall1(smgrout,
- Int16GetDatum(i))));
- }
+ (*(smgrsw[i].smgr_commit)) ();
}
}
@@ -863,12 +773,7 @@ smgrabort(void)
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_abort)
- {
- if (!(*(smgrsw[i].smgr_abort)) ())
- elog(ERROR, "transaction abort failed on %s: %m",
- DatumGetCString(DirectFunctionCall1(smgrout,
- Int16GetDatum(i))));
- }
+ (*(smgrsw[i].smgr_abort)) ();
}
}
@@ -883,12 +788,7 @@ smgrsync(void)
for (i = 0; i < NSmgr; i++)
{
if (smgrsw[i].smgr_sync)
- {
- if (!(*(smgrsw[i].smgr_sync)) ())
- elog(ERROR, "storage sync failed on %s: %m",
- DatumGetCString(DirectFunctionCall1(smgrout,
- Int16GetDatum(i))));
- }
+ (*(smgrsw[i].smgr_sync)) ();
}
}
@@ -910,7 +810,6 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
{
xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record);
SMgrRelation reln;
- BlockNumber newblks;
reln = smgropen(xlrec->rnode);
@@ -931,17 +830,9 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
FreeSpaceMapTruncateRel(&reln->smgr_rnode, xlrec->blkno);
/* Do the truncation */
- newblks = (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln,
- xlrec->blkno,
- false);
- if (newblks == InvalidBlockNumber)
- ereport(WARNING,
- (errcode_for_file_access(),
- errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
- reln->smgr_rnode.spcNode,
- reln->smgr_rnode.dbNode,
- reln->smgr_rnode.relNode,
- xlrec->blkno)));
+ (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln,
+ xlrec->blkno,
+ false);
/* Also tell xlogutils.c about it */
XLogTruncateRelation(xlrec->rnode, xlrec->blkno);
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index b768a5b5304..1c8963ec212 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.55 2006/03/24 04:32:13 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.56 2007/01/03 18:11:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -72,7 +72,7 @@ extern void smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
extern void smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer,
bool isTemp);
extern BlockNumber smgrnblocks(SMgrRelation reln);
-extern BlockNumber smgrtruncate(SMgrRelation reln, BlockNumber nblocks,
+extern void smgrtruncate(SMgrRelation reln, BlockNumber nblocks,
bool isTemp);
extern void smgrimmedsync(SMgrRelation reln);
extern void smgrDoPendingDeletes(bool isCommit);
@@ -91,20 +91,19 @@ extern void smgr_desc(StringInfo buf, uint8 xl_info, char *rec);
/* internals: move me elsewhere -- ay 7/94 */
/* in md.c */
-extern bool mdinit(void);
-extern bool mdclose(SMgrRelation reln);
-extern bool mdcreate(SMgrRelation reln, bool isRedo);
-extern bool mdunlink(RelFileNode rnode, bool isRedo);
-extern bool mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer,
+extern void mdinit(void);
+extern void mdclose(SMgrRelation reln);
+extern void mdcreate(SMgrRelation reln, bool isRedo);
+extern void mdunlink(RelFileNode rnode, bool isRedo);
+extern void mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer,
bool isTemp);
-extern bool mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
-extern bool mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer,
+extern void mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
+extern void mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer,
bool isTemp);
extern BlockNumber mdnblocks(SMgrRelation reln);
-extern BlockNumber mdtruncate(SMgrRelation reln, BlockNumber nblocks,
- bool isTemp);
-extern bool mdimmedsync(SMgrRelation reln);
-extern bool mdsync(void);
+extern void mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp);
+extern void mdimmedsync(SMgrRelation reln);
+extern void mdsync(void);
extern void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno);