6 files changed, 402 insertions, 398 deletions
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index 0f643836a1c..b9569e58af4 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.61 2006/11/19 21:33:23 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.62 2007/01/03 18:11:01 tgl Exp $
  *
  * NOTES
  *	  Postgres hash pages look like ordinary relation pages.  The opaque
@@ -533,10 +533,8 @@ fail:
  *
  * This does not need to initialize the new bucket pages; we'll do that as
  * each one is used by _hash_expandtable().  But we have to extend the logical
- * EOF to the end of the splitpoint; otherwise the first overflow page
- * allocated beyond the splitpoint will represent a noncontiguous access,
- * which can confuse md.c (and will probably be forbidden by future changes
- * to md.c).
+ * EOF to the end of the splitpoint; this keeps smgr's idea of the EOF in
+ * sync with ours, so that overflow-page allocation works correctly.
  *
  * We do this by writing a page of zeroes at the end of the splitpoint range.
  * We expect that the filesystem will ensure that the intervening pages read
@@ -559,7 +557,6 @@ _hash_alloc_buckets(Relation rel, uint32 nblocks)
 {
 	BlockNumber	firstblock;
 	BlockNumber	lastblock;
-	BlockNumber	endblock;
 	char		zerobuf[BLCKSZ];
 
 	/*
@@ -577,24 +574,9 @@ _hash_alloc_buckets(Relation rel, uint32 nblocks)
 	if (lastblock < firstblock || lastblock == InvalidBlockNumber)
 		return InvalidBlockNumber;
 
-	/* Note: we assume RelationGetNumberOfBlocks did RelationOpenSmgr for us */
-
 	MemSet(zerobuf, 0, sizeof(zerobuf));
 
-	/*
-	 * XXX If the extension results in creation of new segment files,
-	 * we have to make sure that each non-last file is correctly filled out to
-	 * RELSEG_SIZE blocks.  This ought to be done inside mdextend, but
-	 * changing the smgr API seems best left for development cycle not late
-	 * beta.  Temporary fix for bug #2737.
-	 */
-#ifndef LET_OS_MANAGE_FILESIZE
-	for (endblock = firstblock | (RELSEG_SIZE - 1);
-		 endblock < lastblock;
-		 endblock += RELSEG_SIZE)
-		smgrextend(rel->rd_smgr, endblock, zerobuf, rel->rd_istemp);
-#endif
-
+	/* Note: we assume RelationGetNumberOfBlocks did RelationOpenSmgr for us */
 	smgrextend(rel->rd_smgr, lastblock, zerobuf, rel->rd_istemp);
 
 	return firstblock;
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c
index 4951dca2182..4f886e8b07e 100644
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -36,9 +36,9 @@
  * that is of no value (since other backends have no interest in them yet)
  * and it created locking problems for CHECKPOINT, because the upper-level
  * pages were held exclusive-locked for long periods.  Now we just build
- * the pages in local memory and smgrwrite() them as we finish them.  They
- * will need to be re-read into shared buffers on first use after the build
- * finishes.
+ * the pages in local memory and smgrwrite or smgrextend them as we finish
+ * them.  They will need to be re-read into shared buffers on first use after
+ * the build finishes.
  *
  * Since the index will never be used unless it is completely built,
  * from a crash-recovery point of view there is no need to WAL-log the
@@ -57,7 +57,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.107 2006/10/04 00:29:49 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.108 2007/01/03 18:11:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -309,9 +309,9 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
 	{
 		if (!wstate->btws_zeropage)
 			wstate->btws_zeropage = (Page) palloc0(BLCKSZ);
-		smgrwrite(wstate->index->rd_smgr, wstate->btws_pages_written++,
-				  (char *) wstate->btws_zeropage,
-				  true);
+		smgrextend(wstate->index->rd_smgr, wstate->btws_pages_written++,
+				   (char *) wstate->btws_zeropage,
+				   true);
 	}
 
 	/*
@@ -319,10 +319,17 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
 	 * index, because there's no need for smgr to schedule an fsync for this
 	 * write; we'll do it ourselves before ending the build.
 	 */
-	smgrwrite(wstate->index->rd_smgr, blkno, (char *) page, true);
-
 	if (blkno == wstate->btws_pages_written)
+	{
+		/* extending the file... */
+		smgrextend(wstate->index->rd_smgr, blkno, (char *) page, true);
 		wstate->btws_pages_written++;
+	}
+	else
+	{
+		/* overwriting a block we zero-filled before */
+		smgrwrite(wstate->index->rd_smgr, blkno, (char *) page, true);
+	}
 
 	pfree(page);
 }
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index c30aa69c555..5de8e96f5fd 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.208 2006/12/30 21:21:53 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.209 2007/01/03 18:11:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -6083,7 +6083,7 @@ copy_relation_data(Relation rel, SMgrRelation dst)
 		 * rel, because there's no need for smgr to schedule an fsync for this
 		 * write; we'll do it ourselves below.
 		 */
-		smgrwrite(dst, blkno, buf, true);
+		smgrextend(dst, blkno, buf, true);
 	}
 
 	/*
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index f58ab03ce42..e0899a54600 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.123 2006/11/20 01:07:56 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.124 2007/01/03 18:11:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -22,6 +22,7 @@
 #include "miscadmin.h"
 #include "postmaster/bgwriter.h"
 #include "storage/fd.h"
+#include "storage/bufmgr.h"
 #include "storage/smgr.h"
 #include "utils/hsearch.h"
 #include "utils/memutils.h"
@@ -108,9 +109,16 @@ typedef struct
 static HTAB *pendingOpsTable = NULL;
 
 
+typedef enum					/* behavior for mdopen & _mdfd_getseg */
+{
+	EXTENSION_FAIL,				/* ereport if segment not present */
+	EXTENSION_RETURN_NULL,		/* return NULL if not present */
+	EXTENSION_CREATE			/* create new segments as needed */
+} ExtensionBehavior;
+
 /* local routines */
-static MdfdVec *mdopen(SMgrRelation reln, bool allowNotFound);
-static bool register_dirty_segment(SMgrRelation reln, MdfdVec *seg);
+static MdfdVec *mdopen(SMgrRelation reln, ExtensionBehavior behavior);
+static void register_dirty_segment(SMgrRelation reln, MdfdVec *seg);
 static MdfdVec *_fdvec_alloc(void);
 
 #ifndef LET_OS_MANAGE_FILESIZE
@@ -118,14 +126,14 @@ static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno,
 			  int oflags);
 #endif
 static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno,
-			 bool allowNotFound);
-static BlockNumber _mdnblocks(File file, Size blcksz);
+							 bool isTemp, ExtensionBehavior behavior);
+static BlockNumber _mdnblocks(SMgrRelation reln, MdfdVec *seg);
 
 
 /*
  *	mdinit() -- Initialize private state for magnetic disk storage manager.
  */
-bool
+void
 mdinit(void)
 {
 	MdCxt = AllocSetContextCreate(TopMemoryContext,
@@ -154,8 +162,6 @@ mdinit(void)
 									  &hash_ctl,
 								   HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
 	}
-
-	return true;
 }
 
 /*
@@ -163,14 +169,14 @@ mdinit(void)
  *
  * If isRedo is true, it's okay for the relation to exist already.
  */
-bool
+void
 mdcreate(SMgrRelation reln, bool isRedo)
 {
 	char	   *path;
 	File		fd;
 
 	if (isRedo && reln->md_fd != NULL)
-		return true;			/* created and opened already... */
+		return;					/* created and opened already... */
 
 	Assert(reln->md_fd == NULL);
 
@@ -193,11 +199,15 @@ mdcreate(SMgrRelation reln, bool isRedo)
 		if (fd < 0)
 		{
 			pfree(path);
-			/* be sure to return the error reported by create, not open */
+			/* be sure to report the error reported by create, not open */
 			errno = save_errno;
-			return false;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not create relation %u/%u/%u: %m",
+							reln->smgr_rnode.spcNode,
+							reln->smgr_rnode.dbNode,
+							reln->smgr_rnode.relNode)));
 		}
-		errno = 0;
 	}
 
 	pfree(path);
@@ -209,8 +219,6 @@ mdcreate(SMgrRelation reln, bool isRedo)
 #ifndef LET_OS_MANAGE_FILESIZE
 	reln->md_fd->mdfd_chain = NULL;
 #endif
-
-	return true;
 }
 
 /*
@@ -220,12 +228,12 @@ mdcreate(SMgrRelation reln, bool isRedo)
  * there won't be an SMgrRelation hashtable entry anymore.
  *
  * If isRedo is true, it's okay for the relation to be already gone.
+ * Also, any failure should be reported as WARNING not ERROR, because
+ * we are usually not in a transaction anymore when this is called.
  */
-bool
+void
 mdunlink(RelFileNode rnode, bool isRedo)
 {
-	bool		status = true;
-	int			save_errno = 0;
 	char	   *path;
 
 	path = relpath(rnode);
@@ -234,15 +242,17 @@ mdunlink(RelFileNode rnode, bool isRedo)
 	if (unlink(path) < 0)
 	{
 		if (!isRedo || errno != ENOENT)
-		{
-			status = false;
-			save_errno = errno;
-		}
+			ereport(WARNING,
+					(errcode_for_file_access(),
+					 errmsg("could not remove relation %u/%u/%u: %m",
+							rnode.spcNode,
+							rnode.dbNode,
+							rnode.relNode)));
 	}
 
 #ifndef LET_OS_MANAGE_FILESIZE
 	/* Delete the additional segments, if any */
-	if (status)
+	else
 	{
 		char	   *segpath = (char *) palloc(strlen(path) + 12);
 		BlockNumber segno;
@@ -258,10 +268,13 @@ mdunlink(RelFileNode rnode, bool isRedo)
 			{
 				/* ENOENT is expected after the last segment... */
 				if (errno != ENOENT)
-				{
-					status = false;
-					save_errno = errno;
-				}
+					ereport(WARNING,
+							(errcode_for_file_access(),
+							 errmsg("could not remove segment %u of relation %u/%u/%u: %m",
+									segno,
+									rnode.spcNode,
+									rnode.dbNode,
+									rnode.relNode)));
 				break;
 			}
 		}
@@ -270,29 +283,44 @@ mdunlink(RelFileNode rnode, bool isRedo)
 #endif
 
 	pfree(path);
-
-	errno = save_errno;
-	return status;
 }
 
 /*
  *	mdextend() -- Add a block to the specified relation.
  *
- *		The semantics are basically the same as mdwrite(): write at the
- *		specified position.  However, we are expecting to extend the
- *		relation (ie, blocknum is >= the current EOF), and so in case of
- *		failure we clean up by truncating.
- *
- *		This routine returns true or false, with errno set as appropriate.
+ *		The semantics are nearly the same as mdwrite(): write at the
+ *		specified position.  However, this is to be used for the case of
+ *		extending a relation (i.e., blocknum is at or beyond the current
+ *		EOF).  Note that we assume writing a block beyond current EOF
+ *		causes intervening file space to become filled with zeroes.
  */
-bool
+void
 mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
 {
 	long		seekpos;
 	int			nbytes;
 	MdfdVec    *v;
 
-	v = _mdfd_getseg(reln, blocknum, false);
+	/* This assert is too expensive to have on normally ... */
+#ifdef CHECK_WRITE_VS_EXTEND
+	Assert(blocknum >= mdnblocks(reln));
+#endif
+
+	/*
+	 * If a relation manages to grow to 2^32-1 blocks, refuse to extend it
+	 * any more --- we mustn't create a block whose number
+	 * actually is InvalidBlockNumber.
+	 */
+	if (blocknum == InvalidBlockNumber)
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("cannot extend relation %u/%u/%u beyond %u blocks",
+						reln->smgr_rnode.spcNode,
+						reln->smgr_rnode.dbNode,
+						reln->smgr_rnode.relNode,
+						InvalidBlockNumber)));
+
+	v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_CREATE);
 
 #ifndef LET_OS_MANAGE_FILESIZE
 	seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
@@ -302,52 +330,64 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
 #endif
 
 	/*
-	 * Note: because caller obtained blocknum by calling _mdnblocks, which did
-	 * a seek(SEEK_END), this seek is often redundant and will be optimized
-	 * away by fd.c.  It's not redundant, however, if there is a partial page
-	 * at the end of the file.	In that case we want to try to overwrite the
-	 * partial page with a full page.  It's also not redundant if bufmgr.c had
-	 * to dump another buffer of the same file to make room for the new page's
-	 * buffer.
+	 * Note: because caller usually obtained blocknum by calling mdnblocks,
+	 * which did a seek(SEEK_END), this seek is often redundant and will be
+	 * optimized away by fd.c.  It's not redundant, however, if there is a
+	 * partial page at the end of the file. In that case we want to try to
+	 * overwrite the partial page with a full page.  It's also not redundant
+	 * if bufmgr.c had to dump another buffer of the same file to make room
+	 * for the new page's buffer.
 	 */
 	if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
-		return false;
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not seek to block %u of relation %u/%u/%u: %m",
+						blocknum,
+						reln->smgr_rnode.spcNode,
+						reln->smgr_rnode.dbNode,
+						reln->smgr_rnode.relNode)));
 
 	if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
 	{
-		if (nbytes > 0)
-		{
-			int			save_errno = errno;
-
-			/* Remove the partially-written page */
-			FileTruncate(v->mdfd_vfd, seekpos);
-			FileSeek(v->mdfd_vfd, seekpos, SEEK_SET);
-			errno = save_errno;
-		}
-		return false;
+		if (nbytes < 0)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not extend relation %u/%u/%u: %m",
+							reln->smgr_rnode.spcNode,
+							reln->smgr_rnode.dbNode,
+							reln->smgr_rnode.relNode),
+					 errhint("Check free disk space.")));
+		/* short write: complain appropriately */
+		ereport(ERROR,
+				(errcode(ERRCODE_DISK_FULL),
+				 errmsg("could not extend relation %u/%u/%u: wrote only %d of %d bytes at block %u",
+						reln->smgr_rnode.spcNode,
+						reln->smgr_rnode.dbNode,
+						reln->smgr_rnode.relNode,
+						nbytes, BLCKSZ, blocknum),
+				 errhint("Check free disk space.")));
 	}
 
 	if (!isTemp)
-	{
-		if (!register_dirty_segment(reln, v))
-			return false;
-	}
+		register_dirty_segment(reln, v);
 
 #ifndef LET_OS_MANAGE_FILESIZE
-	Assert(_mdnblocks(v->mdfd_vfd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
+	Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE));
 #endif
-
-	return true;
 }
 
 /*
- *	mdopen() -- Open the specified relation.  ereport's on failure.
- *		(Optionally, can return NULL instead of ereport for ENOENT.)
+ *	mdopen() -- Open the specified relation.
  *
  * Note we only open the first segment, when there are multiple segments.
+ *
+ * If first segment is not present, either ereport or return NULL according
+ * to "behavior".  We treat EXTENSION_CREATE the same as EXTENSION_FAIL;
+ * EXTENSION_CREATE means it's OK to extend an existing relation, not to
+ * invent one out of whole cloth.
  */
 static MdfdVec *
-mdopen(SMgrRelation reln, bool allowNotFound)
+mdopen(SMgrRelation reln, ExtensionBehavior behavior)
 {
 	MdfdVec    *mdfd;
 	char	   *path;
@@ -374,7 +414,7 @@ mdopen(SMgrRelation reln, bool allowNotFound)
 		if (fd < 0)
 		{
 			pfree(path);
-			if (allowNotFound && errno == ENOENT)
+			if (behavior == EXTENSION_RETURN_NULL && errno == ENOENT)
 				return NULL;
 			ereport(ERROR,
 					(errcode_for_file_access(),
@@ -393,7 +433,7 @@ mdopen(SMgrRelation reln, bool allowNotFound)
 	mdfd->mdfd_segno = 0;
 #ifndef LET_OS_MANAGE_FILESIZE
 	mdfd->mdfd_chain = NULL;
-	Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
+	Assert(_mdnblocks(reln, mdfd) <= ((BlockNumber) RELSEG_SIZE));
 #endif
 
 	return mdfd;
@@ -401,17 +441,15 @@ mdopen(SMgrRelation reln, bool allowNotFound)
 
 /*
  *	mdclose() -- Close the specified relation, if it isn't closed already.
- *
- *		Returns true or false with errno set as appropriate.
  */
-bool
+void
 mdclose(SMgrRelation reln)
 {
 	MdfdVec    *v = reln->md_fd;
 
 	/* No work if already closed */
 	if (v == NULL)
-		return true;
+		return;
 
 	reln->md_fd = NULL;			/* prevent dangling pointer after error */
 
@@ -432,22 +470,19 @@ mdclose(SMgrRelation reln)
 		FileClose(v->mdfd_vfd);
 	pfree(v);
 #endif
-
-	return true;
 }
 
 /*
  *	mdread() -- Read the specified block from a relation.
  */
-bool
+void
 mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
 {
-	bool		status;
 	long		seekpos;
 	int			nbytes;
 	MdfdVec    *v;
 
-	v = _mdfd_getseg(reln, blocknum, false);
+	v = _mdfd_getseg(reln, blocknum, false, EXTENSION_FAIL);
 
 #ifndef LET_OS_MANAGE_FILESIZE
 	seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
@@ -457,39 +492,66 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
 #endif
 
 	if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
-		return false;
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not seek to block %u of relation %u/%u/%u: %m",
+						blocknum,
+						reln->smgr_rnode.spcNode,
+						reln->smgr_rnode.dbNode,
+						reln->smgr_rnode.relNode)));
 
-	status = true;
 	if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
 	{
+		if (nbytes < 0)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not read block %u of relation %u/%u/%u: %m",
+							blocknum,
+							reln->smgr_rnode.spcNode,
+							reln->smgr_rnode.dbNode,
+							reln->smgr_rnode.relNode)));
 		/*
-		 * If we are at or past EOF, return zeroes without complaining. Also
-		 * substitute zeroes if we found a partial block at EOF.
-		 *
-		 * XXX this is really ugly, bad design.  However the current
-		 * implementation of hash indexes requires it, because hash index
-		 * pages are initialized out-of-order.
+		 * Short read: we are at or past EOF, or we read a partial block at
+		 * EOF.  Normally this is an error; upper levels should never try to
+		 * read a nonexistent block.  However, if zero_damaged_pages is ON
+		 * or we are InRecovery, we should instead return zeroes without
+		 * complaining.  This allows, for example, the case of trying to
+		 * update a block that was later truncated away.
 		 */
-		if (nbytes == 0 ||
-			(nbytes > 0 && mdnblocks(reln) == blocknum))
+		if (zero_damaged_pages || InRecovery)
 			MemSet(buffer, 0, BLCKSZ);
 		else
-			status = false;
+			ereport(ERROR,
+					(errcode(ERRCODE_DATA_CORRUPTED),
+					 errmsg("could not read block %u of relation %u/%u/%u: read only %d of %d bytes",
+							blocknum,
+							reln->smgr_rnode.spcNode,
+							reln->smgr_rnode.dbNode,
+							reln->smgr_rnode.relNode,
+							nbytes, BLCKSZ)));
 	}
-
-	return status;
 }
 
 /*
  *	mdwrite() -- Write the supplied block at the appropriate location.
+ *
+ *		This is to be used only for updating already-existing blocks of a
+ *		relation (ie, those before the current EOF).  To extend a relation,
+ *		use mdextend().
  */
-bool
+void
 mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
 {
 	long		seekpos;
+	int			nbytes;
 	MdfdVec    *v;
 
-	v = _mdfd_getseg(reln, blocknum, false);
+	/* This assert is too expensive to have on normally ... */
+#ifdef CHECK_WRITE_VS_EXTEND
+	Assert(blocknum < mdnblocks(reln));
+#endif
+
+	v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_FAIL);
 
 #ifndef LET_OS_MANAGE_FILESIZE
 	seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
@@ -499,18 +561,38 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
 #endif
 
 	if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
-		return false;
-
-	if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
-		return false;
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not seek to block %u of relation %u/%u/%u: %m",
+						blocknum,
+						reln->smgr_rnode.spcNode,
+						reln->smgr_rnode.dbNode,
+						reln->smgr_rnode.relNode)));
 
-	if (!isTemp)
+	if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
 	{
-		if (!register_dirty_segment(reln, v))
-			return false;
+		if (nbytes < 0)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not write block %u of relation %u/%u/%u: %m",
+							blocknum,
+							reln->smgr_rnode.spcNode,
+							reln->smgr_rnode.dbNode,
+							reln->smgr_rnode.relNode)));
+		/* short write: complain appropriately */
+		ereport(ERROR,
+				(errcode(ERRCODE_DISK_FULL),
+				 errmsg("could not write block %u of relation %u/%u/%u: wrote only %d of %d bytes",
+						blocknum,
+						reln->smgr_rnode.spcNode,
+						reln->smgr_rnode.dbNode,
+						reln->smgr_rnode.relNode,
+						nbytes, BLCKSZ),
+				 errhint("Check free disk space.")));
 	}
 
-	return true;
+	if (!isTemp)
+		register_dirty_segment(reln, v);
 }
 
 /*
@@ -520,13 +602,11 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
  *		and added to the mdfd_chain list.  If this routine has not been
  *		called, then only segments up to the last one actually touched
  *		are present in the chain.
- *
- *		Returns # of blocks, or InvalidBlockNumber on error.
  */
 BlockNumber
 mdnblocks(SMgrRelation reln)
 {
-	MdfdVec    *v = mdopen(reln, false);
+	MdfdVec    *v = mdopen(reln, EXTENSION_FAIL);
 
 #ifndef LET_OS_MANAGE_FILESIZE
 	BlockNumber nblocks;
@@ -552,7 +632,7 @@ mdnblocks(SMgrRelation reln)
 
 	for (;;)
 	{
-		nblocks = _mdnblocks(v->mdfd_vfd, BLCKSZ);
+		nblocks = _mdnblocks(reln, v);
 		if (nblocks > ((BlockNumber) RELSEG_SIZE))
 			elog(FATAL, "segment too big");
 		if (nblocks < ((BlockNumber) RELSEG_SIZE))
@@ -573,22 +653,26 @@ mdnblocks(SMgrRelation reln)
 			 */
 			v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT);
 			if (v->mdfd_chain == NULL)
-				return InvalidBlockNumber;		/* failed? */
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not open segment %u of relation %u/%u/%u: %m",
+								segno,
+								reln->smgr_rnode.spcNode,
+								reln->smgr_rnode.dbNode,
+								reln->smgr_rnode.relNode)));
 		}
 
 		v = v->mdfd_chain;
 	}
 #else
-	return _mdnblocks(v->mdfd_vfd, BLCKSZ);
+	return _mdnblocks(reln, v);
 #endif
 }
 
 /*
  *	mdtruncate() -- Truncate relation to specified number of blocks.
- *
- *		Returns # of blocks or InvalidBlockNumber on error.
  */
-BlockNumber
+void
 mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
 {
 	MdfdVec    *v;
@@ -603,14 +687,22 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
 	 * that truncation loop will get them all!
 	 */
 	curnblk = mdnblocks(reln);
-	if (curnblk == InvalidBlockNumber)
-		return InvalidBlockNumber;		/* mdnblocks failed */
 	if (nblocks > curnblk)
-		return InvalidBlockNumber;		/* bogus request */
+	{
+		/* Bogus request ... but no complaint if InRecovery */
+		if (InRecovery)
+			return;
+		ereport(ERROR,
+				(errmsg("could not truncate relation %u/%u/%u to %u blocks: it's only %u blocks now",
+						reln->smgr_rnode.spcNode,
+						reln->smgr_rnode.dbNode,
+						reln->smgr_rnode.relNode,
+						nblocks, curnblk)));
+	}
 	if (nblocks == curnblk)
-		return nblocks;			/* no work */
+		return;					/* no work */
 
-	v = mdopen(reln, false);
+	v = mdopen(reln, EXTENSION_FAIL);
 
 #ifndef LET_OS_MANAGE_FILESIZE
 	priorblocks = 0;
@@ -626,12 +718,15 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
 			 * not delete it, for reasons explained in the header comments.
 			 */
 			if (FileTruncate(v->mdfd_vfd, 0) < 0)
-				return InvalidBlockNumber;
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
+								reln->smgr_rnode.spcNode,
+								reln->smgr_rnode.dbNode,
+								reln->smgr_rnode.relNode,
+								nblocks)));
 			if (!isTemp)
-			{
-				if (!register_dirty_segment(reln, v))
-					return InvalidBlockNumber;
-			}
+				register_dirty_segment(reln, v);
 			v = v->mdfd_chain;
 			Assert(ov != reln->md_fd);	/* we never drop the 1st segment */
 			pfree(ov);
@@ -649,12 +744,15 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
 			BlockNumber lastsegblocks = nblocks - priorblocks;
 
 			if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0)
-				return InvalidBlockNumber;
+				ereport(ERROR,
+						(errcode_for_file_access(),
+						 errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
+								reln->smgr_rnode.spcNode,
+								reln->smgr_rnode.dbNode,
+								reln->smgr_rnode.relNode,
+								nblocks)));
 			if (!isTemp)
-			{
-				if (!register_dirty_segment(reln, v))
-					return InvalidBlockNumber;
-			}
+				register_dirty_segment(reln, v);
 			v = v->mdfd_chain;
 			ov->mdfd_chain = NULL;
 		}
@@ -670,15 +768,16 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
 	}
 #else
 	if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0)
-		return InvalidBlockNumber;
+		ereport(ERROR,
+				(errcode_for_file_access(),
+			  errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
+					 reln->smgr_rnode.spcNode,
+					 reln->smgr_rnode.dbNode,
+					 reln->smgr_rnode.relNode,
+					 nblocks)));
 	if (!isTemp)
-	{
-		if (!register_dirty_segment(reln, v))
-			return InvalidBlockNumber;
-	}
+		register_dirty_segment(reln, v);
 #endif
-
-	return nblocks;
 }
 
 /*
@@ -687,7 +786,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
  * Note that only writes already issued are synced; this routine knows
  * nothing of dirty buffers that may exist inside the buffer manager.
  */
-bool
+void
 mdimmedsync(SMgrRelation reln)
 {
 	MdfdVec    *v;
@@ -698,24 +797,32 @@ mdimmedsync(SMgrRelation reln)
 	 * that fsync loop will get them all!
 	 */
 	curnblk = mdnblocks(reln);
-	if (curnblk == InvalidBlockNumber)
-		return false;			/* mdnblocks failed */
 
-	v = mdopen(reln, false);
+	v = mdopen(reln, EXTENSION_FAIL);
 
 #ifndef LET_OS_MANAGE_FILESIZE
 	while (v != NULL)
 	{
 		if (FileSync(v->mdfd_vfd) < 0)
-			return false;
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
+							v->mdfd_segno,
+							reln->smgr_rnode.spcNode,
+							reln->smgr_rnode.dbNode,
+							reln->smgr_rnode.relNode)));
 		v = v->mdfd_chain;
 	}
 #else
 	if (FileSync(v->mdfd_vfd) < 0)
-		return false;
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
+						v->mdfd_segno,
+						reln->smgr_rnode.spcNode,
+						reln->smgr_rnode.dbNode,
+						reln->smgr_rnode.relNode)));
 #endif
-
-	return true;
 }
 
 /*
@@ -724,7 +831,7 @@ mdimmedsync(SMgrRelation reln)
  * This is only called during checkpoints, and checkpoints should only
  * occur in processes that have created a pendingOpsTable.
  */
-bool
+void
 mdsync(void)
 {
 	HASH_SEQ_STATUS hstat;
@@ -732,7 +839,7 @@ mdsync(void)
 	int			absorb_counter;
 
 	if (!pendingOpsTable)
-		return false;
+		elog(ERROR, "cannot sync without a pendingOpsTable");
 
 	/*
 	 * If we are in the bgwriter, the sync had better include all fsync
@@ -795,21 +902,18 @@ mdsync(void)
 			 */
 			seg = _mdfd_getseg(reln,
 							   entry->segno * ((BlockNumber) RELSEG_SIZE),
-							   true);
+							   false, EXTENSION_RETURN_NULL);
 			if (seg)
 			{
 				if (FileSync(seg->mdfd_vfd) < 0 &&
 					errno != ENOENT)
-				{
-					ereport(LOG,
+					ereport(ERROR,
 							(errcode_for_file_access(),
 							 errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
 									entry->segno,
 									entry->rnode.spcNode,
 									entry->rnode.dbNode,
 									entry->rnode.relNode)));
-					return false;
-				}
 			}
 		}
 
@@ -818,8 +922,6 @@ mdsync(void)
 						HASH_REMOVE, NULL) == NULL)
 			elog(ERROR, "pendingOpsTable corrupted");
 	}
-
-	return true;
 }
 
 /*
@@ -830,11 +932,8 @@ mdsync(void)
  * to the background writer process.  If that fails, just do the fsync
  * locally before returning (we expect this will not happen often enough
  * to be a performance problem).
- *
- * A false result implies I/O failure during local fsync.  errno will be
- * valid for error reporting.
  */
-static bool
+static void
 register_dirty_segment(SMgrRelation reln, MdfdVec *seg)
 {
 	if (pendingOpsTable)
@@ -847,17 +946,21 @@ register_dirty_segment(SMgrRelation reln, MdfdVec *seg)
 		entry.segno = seg->mdfd_segno;
 
 		(void) hash_search(pendingOpsTable, &entry, HASH_ENTER, NULL);
-		return true;
 	}
 	else
 	{
 		if (ForwardFsyncRequest(reln->smgr_rnode, seg->mdfd_segno))
-			return true;
-	}
+			return;				/* passed it off successfully */
 
-	if (FileSync(seg->mdfd_vfd) < 0)
-		return false;
-	return true;
+		if (FileSync(seg->mdfd_vfd) < 0)
+			ereport(ERROR,
+					(errcode_for_file_access(),
+					 errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
+							seg->mdfd_segno,
+							reln->smgr_rnode.spcNode,
+							reln->smgr_rnode.dbNode,
+							reln->smgr_rnode.relNode)));
+	}
 }
 
 /*
@@ -931,7 +1034,7 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
 	v->mdfd_vfd = fd;
 	v->mdfd_segno = segno;
 	v->mdfd_chain = NULL;
-	Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
+	Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE));
 
 	/* all done */
 	return v;
@@ -940,51 +1043,66 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
 
 /*
  *	_mdfd_getseg() -- Find the segment of the relation holding the
- *		specified block.  ereport's on failure.
- *		(Optionally, can return NULL instead of ereport for ENOENT.)
+ *		specified block.
+ *
+ * If the segment doesn't exist, we ereport, return NULL, or create the
+ * segment, according to "behavior".  Note: isTemp need only be correct
+ * in the EXTENSION_CREATE case.
  */
 static MdfdVec *
-_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool allowNotFound)
+_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
+			 ExtensionBehavior behavior)
 {
-	MdfdVec    *v = mdopen(reln, allowNotFound);
+	MdfdVec    *v = mdopen(reln, behavior);
 
 #ifndef LET_OS_MANAGE_FILESIZE
-	BlockNumber segstogo;
+	BlockNumber targetseg;
 	BlockNumber nextsegno;
 
 	if (!v)
-		return NULL;			/* only possible if allowNotFound */
+		return NULL;			/* only possible if EXTENSION_RETURN_NULL */
 
-	for (segstogo = blkno / ((BlockNumber) RELSEG_SIZE), nextsegno = 1;
-		 segstogo > 0;
-		 nextsegno++, segstogo--)
+	targetseg = blkno / ((BlockNumber) RELSEG_SIZE);
+	for (nextsegno = 1; nextsegno <= targetseg; nextsegno++)
 	{
+		Assert(nextsegno == v->mdfd_segno + 1);
+
 		if (v->mdfd_chain == NULL)
 		{
 			/*
-			 * We will create the next segment only if the target block is
-			 * within it.  This prevents Sorcerer's Apprentice syndrome if a
-			 * bug at higher levels causes us to be handed a ridiculously
-			 * large blkno --- otherwise we could create many thousands of
-			 * empty segment files before reaching the "target" block.	We
-			 * should never need to create more than one new segment per call,
-			 * so this restriction seems reasonable.
+			 * Normally we will create new segments only if authorized by
+			 * the caller (i.e., we are doing mdextend()).  But when doing
+			 * WAL recovery, create segments anyway; this allows cases such as
+			 * replaying WAL data that has a write into a high-numbered
+			 * segment of a relation that was later deleted.  We want to go
+			 * ahead and create the segments so we can finish out the replay.
 			 *
-			 * BUT: when doing WAL recovery, disable this logic and create
-			 * segments unconditionally.  In this case it seems better to
-			 * assume the given blkno is good (it presumably came from a
-			 * CRC-checked WAL record); furthermore this lets us cope in the
-			 * case where we are replaying WAL data that has a write into a
-			 * high-numbered segment of a relation that was later deleted.	We
-			 * want to go ahead and create the segments so we can finish out
-			 * the replay.
+			 * We have to maintain the invariant that segments before the
+			 * last active segment are of size RELSEG_SIZE; therefore, pad
+			 * them out with zeroes if needed.  (This only matters if caller
+			 * is extending the relation discontiguously, but that can happen
+			 * in hash indexes.)
 			 */
-			v->mdfd_chain = _mdfd_openseg(reln,
-										  nextsegno,
-								(segstogo == 1 || InRecovery) ? O_CREAT : 0);
+			if (behavior == EXTENSION_CREATE || InRecovery)
+			{
+				if (_mdnblocks(reln, v) < RELSEG_SIZE)
+				{
+					char   *zerobuf = palloc0(BLCKSZ);
+
+					mdextend(reln, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
+							 zerobuf, isTemp);
+					pfree(zerobuf);
+				}
+				v->mdfd_chain = _mdfd_openseg(reln, nextsegno, O_CREAT);
+			}
+			else
+			{
+				/* We won't create segment if not existent */
+				v->mdfd_chain = _mdfd_openseg(reln, nextsegno, 0);
+			}
 			if (v->mdfd_chain == NULL)
 			{
-				if (allowNotFound && errno == ENOENT)
+				if (behavior == EXTENSION_RETURN_NULL && errno == ENOENT)
 					return NULL;
 				ereport(ERROR,
 						(errcode_for_file_access(),
@@ -1007,12 +1125,19 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool allowNotFound)
  * Get number of blocks present in a single disk file
  */
 static BlockNumber
-_mdnblocks(File file, Size blcksz)
+_mdnblocks(SMgrRelation reln, MdfdVec *seg)
 {
 	long		len;
 
-	len = FileSeek(file, 0L, SEEK_END);
+	len = FileSeek(seg->mdfd_vfd, 0L, SEEK_END);
 	if (len < 0)
-		return 0;				/* on failure, assume file is empty */
-	return (BlockNumber) (len / blcksz);
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not seek to end of segment %u of relation %u/%u/%u: %m",
+						seg->mdfd_segno,
+						reln->smgr_rnode.spcNode,
+						reln->smgr_rnode.dbNode,
+						reln->smgr_rnode.relNode)));
+	/* note that this calculation will ignore any partial block at EOF */
+	return (BlockNumber) (len / BLCKSZ);
 }
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 0ceb800b363..1a3a00f2951 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -11,7 +11,7 @@
  *
  *
  * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.101 2006/10/04 00:29:58 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.102 2007/01/03 18:11:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -31,30 +31,33 @@
 /*
  * This struct of function pointers defines the API between smgr.c and
  * any individual storage manager module.  Note that smgr subfunctions are
- * generally expected to return TRUE on success, FALSE on error.  (For
- * nblocks and truncate we instead say that returning InvalidBlockNumber
- * indicates an error.)
+ * generally expected to report problems via elog(ERROR).  An exception is
+ * that smgr_unlink should use elog(WARNING), rather than erroring out,
+ * because we normally unlink relations during post-commit/abort cleanup,
+ * and so it's too late to raise an error.  Also, various conditions that
+ * would normally be errors should be allowed during bootstrap and/or WAL
+ * recovery --- see comments in md.c for details.
  */
 typedef struct f_smgr
 {
-	bool		(*smgr_init) (void);	/* may be NULL */
-	bool		(*smgr_shutdown) (void);		/* may be NULL */
-	bool		(*smgr_close) (SMgrRelation reln);
-	bool		(*smgr_create) (SMgrRelation reln, bool isRedo);
-	bool		(*smgr_unlink) (RelFileNode rnode, bool isRedo);
-	bool		(*smgr_extend) (SMgrRelation reln, BlockNumber blocknum,
+	void		(*smgr_init) (void);	/* may be NULL */
+	void		(*smgr_shutdown) (void);		/* may be NULL */
+	void		(*smgr_close) (SMgrRelation reln);
+	void		(*smgr_create) (SMgrRelation reln, bool isRedo);
+	void		(*smgr_unlink) (RelFileNode rnode, bool isRedo);
+	void		(*smgr_extend) (SMgrRelation reln, BlockNumber blocknum,
 											char *buffer, bool isTemp);
-	bool		(*smgr_read) (SMgrRelation reln, BlockNumber blocknum,
+	void		(*smgr_read) (SMgrRelation reln, BlockNumber blocknum,
 										  char *buffer);
-	bool		(*smgr_write) (SMgrRelation reln, BlockNumber blocknum,
+	void		(*smgr_write) (SMgrRelation reln, BlockNumber blocknum,
 										   char *buffer, bool isTemp);
 	BlockNumber (*smgr_nblocks) (SMgrRelation reln);
-	BlockNumber (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks,
-											  bool isTemp);
-	bool		(*smgr_immedsync) (SMgrRelation reln);
-	bool		(*smgr_commit) (void);	/* may be NULL */
-	bool		(*smgr_abort) (void);	/* may be NULL */
-	bool		(*smgr_sync) (void);	/* may be NULL */
+	void		(*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks,
+								  bool isTemp);
+	void		(*smgr_immedsync) (SMgrRelation reln);
+	void		(*smgr_commit) (void);	/* may be NULL */
+	void		(*smgr_abort) (void);	/* may be NULL */
+	void		(*smgr_sync) (void);	/* may be NULL */
 } f_smgr;
 
 
@@ -152,12 +155,7 @@ smgrinit(void)
 	for (i = 0; i < NSmgr; i++)
 	{
 		if (smgrsw[i].smgr_init)
-		{
-			if (!(*(smgrsw[i].smgr_init)) ())
-				elog(FATAL, "smgr initialization failed on %s: %m",
-					 DatumGetCString(DirectFunctionCall1(smgrout,
-														 Int16GetDatum(i))));
-		}
+			(*(smgrsw[i].smgr_init)) ();
 	}
 
 	/* register the shutdown proc */
@@ -175,12 +173,7 @@ smgrshutdown(int code, Datum arg)
 	for (i = 0; i < NSmgr; i++)
 	{
 		if (smgrsw[i].smgr_shutdown)
-		{
-			if (!(*(smgrsw[i].smgr_shutdown)) ())
-				elog(FATAL, "smgr shutdown failed on %s: %m",
-					 DatumGetCString(DirectFunctionCall1(smgrout,
-														 Int16GetDatum(i))));
-		}
+			(*(smgrsw[i].smgr_shutdown)) ();
 	}
 }
 
@@ -256,13 +249,7 @@ smgrclose(SMgrRelation reln)
 {
 	SMgrRelation *owner;
 
-	if (!(*(smgrsw[reln->smgr_which].smgr_close)) (reln))
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not close relation %u/%u/%u: %m",
-						reln->smgr_rnode.spcNode,
-						reln->smgr_rnode.dbNode,
-						reln->smgr_rnode.relNode)));
+	(*(smgrsw[reln->smgr_which].smgr_close)) (reln);
 
 	owner = reln->smgr_owner;
 
@@ -354,13 +341,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
 							reln->smgr_rnode.dbNode,
 							isRedo);
 
-	if (!(*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo))
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not create relation %u/%u/%u: %m",
-						reln->smgr_rnode.spcNode,
-						reln->smgr_rnode.dbNode,
-						reln->smgr_rnode.relNode)));
+	(*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo);
 
 	if (isRedo)
 		return;
@@ -482,38 +463,26 @@ smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo)
 	/*
 	 * And delete the physical files.
 	 *
-	 * Note: we treat deletion failure as a WARNING, not an error, because
-	 * we've already decided to commit or abort the current xact.
+	 * Note: smgr_unlink must treat deletion failure as a WARNING, not an
+	 * ERROR, because we've already decided to commit or abort the current
+	 * xact.
 	 */
-	if (!(*(smgrsw[which].smgr_unlink)) (rnode, isRedo))
-		ereport(WARNING,
-				(errcode_for_file_access(),
-				 errmsg("could not remove relation %u/%u/%u: %m",
-						rnode.spcNode,
-						rnode.dbNode,
-						rnode.relNode)));
+	(*(smgrsw[which].smgr_unlink)) (rnode, isRedo);
 }
 
 /*
  *	smgrextend() -- Add a new block to a file.
  *
- *		The semantics are basically the same as smgrwrite(): write at the
- *		specified position.  However, we are expecting to extend the
- *		relation (ie, blocknum is the current EOF), and so in case of
- *		failure we clean up by truncating.
+ *		The semantics are nearly the same as smgrwrite(): write at the
+ *		specified position.  However, this is to be used for the case of
+ *		extending a relation (i.e., blocknum is at or beyond the current
+ *		EOF).  Note that we assume writing a block beyond current EOF
+ *		causes intervening file space to become filled with zeroes.
  */
 void
 smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
 {
-	if (!(*(smgrsw[reln->smgr_which].smgr_extend)) (reln, blocknum, buffer,
-													isTemp))
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not extend relation %u/%u/%u: %m",
-						reln->smgr_rnode.spcNode,
-						reln->smgr_rnode.dbNode,
-						reln->smgr_rnode.relNode),
-				 errhint("Check free disk space.")));
+	(*(smgrsw[reln->smgr_which].smgr_extend)) (reln, blocknum, buffer, isTemp);
 }
 
 /*
@@ -527,19 +496,16 @@ smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
 void
 smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
 {
-	if (!(*(smgrsw[reln->smgr_which].smgr_read)) (reln, blocknum, buffer))
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not read block %u of relation %u/%u/%u: %m",
-						blocknum,
-						reln->smgr_rnode.spcNode,
-						reln->smgr_rnode.dbNode,
-						reln->smgr_rnode.relNode)));
+	(*(smgrsw[reln->smgr_which].smgr_read)) (reln, blocknum, buffer);
 }
 
 /*
  *	smgrwrite() -- Write the supplied buffer out.
  *
+ *		This is to be used only for updating already-existing blocks of a
+ *		relation (ie, those before the current EOF).  To extend a relation,
+ *		use smgrextend().
+ *
  *		This is not a synchronous write -- the block is not necessarily
  *		on disk at return, only dumped out to the kernel.  However,
  *		provisions will be made to fsync the write before the next checkpoint.
@@ -551,60 +517,26 @@ smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
 void
 smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
 {
-	if (!(*(smgrsw[reln->smgr_which].smgr_write)) (reln, blocknum, buffer,
-												   isTemp))
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not write block %u of relation %u/%u/%u: %m",
-						blocknum,
-						reln->smgr_rnode.spcNode,
-						reln->smgr_rnode.dbNode,
-						reln->smgr_rnode.relNode)));
+	(*(smgrsw[reln->smgr_which].smgr_write)) (reln, blocknum, buffer, isTemp);
 }
 
 /*
  *	smgrnblocks() -- Calculate the number of blocks in the
  *					 supplied relation.
- *
- *		Returns the number of blocks on success, aborts the current
- *		transaction on failure.
  */
 BlockNumber
 smgrnblocks(SMgrRelation reln)
 {
-	BlockNumber nblocks;
-
-	nblocks = (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln);
-
-	/*
-	 * NOTE: if a relation ever did grow to 2^32-1 blocks, this code would
-	 * fail --- but that's a good thing, because it would stop us from
-	 * extending the rel another block and having a block whose number
-	 * actually is InvalidBlockNumber.
-	 */
-	if (nblocks == InvalidBlockNumber)
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not count blocks of relation %u/%u/%u: %m",
-						reln->smgr_rnode.spcNode,
-						reln->smgr_rnode.dbNode,
-						reln->smgr_rnode.relNode)));
-
-	return nblocks;
+	return (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln);
 }
 
 /*
  *	smgrtruncate() -- Truncate supplied relation to the specified number
  *					  of blocks
- *
- *		Returns the number of blocks on success, aborts the current
- *		transaction on failure.
  */
-BlockNumber
+void
 smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
 {
-	BlockNumber newblks;
-
 	/*
 	 * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
 	 * just drop them without bothering to write the contents.
@@ -619,16 +551,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
 	FreeSpaceMapTruncateRel(&reln->smgr_rnode, nblocks);
 
 	/* Do the truncation */
-	newblks = (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, nblocks,
-														   isTemp);
-	if (newblks == InvalidBlockNumber)
-		ereport(ERROR,
-				(errcode_for_file_access(),
-			  errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
-					 reln->smgr_rnode.spcNode,
-					 reln->smgr_rnode.dbNode,
-					 reln->smgr_rnode.relNode,
-					 nblocks)));
+	(*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, nblocks, isTemp);
 
 	if (!isTemp)
 	{
@@ -642,7 +565,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
 		XLogRecData rdata;
 		xl_smgr_truncate xlrec;
 
-		xlrec.blkno = newblks;
+		xlrec.blkno = nblocks;
 		xlrec.rnode = reln->smgr_rnode;
 
 		rdata.data = (char *) &xlrec;
@@ -653,8 +576,6 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
 		lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLOG_NO_TRAN,
 						 &rdata);
 	}
-
-	return newblks;
 }
 
 /*
@@ -683,13 +604,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
 void
 smgrimmedsync(SMgrRelation reln)
 {
-	if (!(*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln))
-		ereport(ERROR,
-				(errcode_for_file_access(),
-				 errmsg("could not sync relation %u/%u/%u: %m",
-						reln->smgr_rnode.spcNode,
-						reln->smgr_rnode.dbNode,
-						reln->smgr_rnode.relNode)));
+	(*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln);
 }
 
 
@@ -843,12 +758,7 @@ smgrcommit(void)
 	for (i = 0; i < NSmgr; i++)
 	{
 		if (smgrsw[i].smgr_commit)
-		{
-			if (!(*(smgrsw[i].smgr_commit)) ())
-				elog(ERROR, "transaction commit failed on %s: %m",
-					 DatumGetCString(DirectFunctionCall1(smgrout,
-														 Int16GetDatum(i))));
-		}
+			(*(smgrsw[i].smgr_commit)) ();
 	}
 }
 
@@ -863,12 +773,7 @@ smgrabort(void)
 	for (i = 0; i < NSmgr; i++)
 	{
 		if (smgrsw[i].smgr_abort)
-		{
-			if (!(*(smgrsw[i].smgr_abort)) ())
-				elog(ERROR, "transaction abort failed on %s: %m",
-					 DatumGetCString(DirectFunctionCall1(smgrout,
-														 Int16GetDatum(i))));
-		}
+			(*(smgrsw[i].smgr_abort)) ();
 	}
 }
 
@@ -883,12 +788,7 @@ smgrsync(void)
 	for (i = 0; i < NSmgr; i++)
 	{
 		if (smgrsw[i].smgr_sync)
-		{
-			if (!(*(smgrsw[i].smgr_sync)) ())
-				elog(ERROR, "storage sync failed on %s: %m",
-					 DatumGetCString(DirectFunctionCall1(smgrout,
-														 Int16GetDatum(i))));
-		}
+			(*(smgrsw[i].smgr_sync)) ();
 	}
 }
 
@@ -910,7 +810,6 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
 	{
 		xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record);
 		SMgrRelation reln;
-		BlockNumber newblks;
 
 		reln = smgropen(xlrec->rnode);
 
@@ -931,17 +830,9 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
 		FreeSpaceMapTruncateRel(&reln->smgr_rnode, xlrec->blkno);
 
 		/* Do the truncation */
-		newblks = (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln,
-															   xlrec->blkno,
-															   false);
-		if (newblks == InvalidBlockNumber)
-			ereport(WARNING,
-					(errcode_for_file_access(),
-			  errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
-					 reln->smgr_rnode.spcNode,
-					 reln->smgr_rnode.dbNode,
-					 reln->smgr_rnode.relNode,
-					 xlrec->blkno)));
+		(*(smgrsw[reln->smgr_which].smgr_truncate)) (reln,
+													 xlrec->blkno,
+													 false);
 
 		/* Also tell xlogutils.c about it */
 		XLogTruncateRelation(xlrec->rnode, xlrec->blkno);
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index b768a5b5304..1c8963ec212 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.55 2006/03/24 04:32:13 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.56 2007/01/03 18:11:01 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -72,7 +72,7 @@ extern void smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
 extern void smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer,
 		  bool isTemp);
 extern BlockNumber smgrnblocks(SMgrRelation reln);
-extern BlockNumber smgrtruncate(SMgrRelation reln, BlockNumber nblocks,
+extern void smgrtruncate(SMgrRelation reln, BlockNumber nblocks,
 			 bool isTemp);
 extern void smgrimmedsync(SMgrRelation reln);
 extern void smgrDoPendingDeletes(bool isCommit);
@@ -91,20 +91,19 @@ extern void smgr_desc(StringInfo buf, uint8 xl_info, char *rec);
 /* internals: move me elsewhere -- ay 7/94 */
 
 /* in md.c */
-extern bool mdinit(void);
-extern bool mdclose(SMgrRelation reln);
-extern bool mdcreate(SMgrRelation reln, bool isRedo);
-extern bool mdunlink(RelFileNode rnode, bool isRedo);
-extern bool mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer,
+extern void mdinit(void);
+extern void mdclose(SMgrRelation reln);
+extern void mdcreate(SMgrRelation reln, bool isRedo);
+extern void mdunlink(RelFileNode rnode, bool isRedo);
+extern void mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer,
 		 bool isTemp);
-extern bool mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
-extern bool mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer,
+extern void mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
+extern void mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer,
 		bool isTemp);
 extern BlockNumber mdnblocks(SMgrRelation reln);
-extern BlockNumber mdtruncate(SMgrRelation reln, BlockNumber nblocks,
-		   bool isTemp);
-extern bool mdimmedsync(SMgrRelation reln);
-extern bool mdsync(void);
+extern void mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp);
+extern void mdimmedsync(SMgrRelation reln);
+extern void mdsync(void);
 
 extern void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno);