aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/storage/buffer/bufmgr.c14
-rw-r--r--src/backend/storage/smgr/md.c187
-rw-r--r--src/backend/storage/smgr/mm.c5
-rw-r--r--src/backend/storage/smgr/smgr.c13
-rw-r--r--src/include/storage/smgr.h11
5 files changed, 95 insertions, 135 deletions
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 0887f3d1ecd..b5eb53b03a3 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.78 2000/04/09 04:43:18 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.79 2000/04/10 23:41:49 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1127,7 +1127,8 @@ BufferSync()
bufHdr->blind.relname,
bufdb, bufrel,
bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
+ (char *) MAKE_PTR(bufHdr->data),
+ true); /* must fsync */
}
else
{
@@ -1529,7 +1530,8 @@ BufferReplace(BufferDesc *bufHdr)
status = smgrblindwrt(DEFAULT_SMGR, bufHdr->blind.dbname,
bufHdr->blind.relname, bufdb, bufrel,
bufHdr->tag.blockNum,
- (char *) MAKE_PTR(bufHdr->data));
+ (char *) MAKE_PTR(bufHdr->data),
+ false); /* no fsync */
}
#ifndef OPTIMIZE_SINGLE
@@ -1544,9 +1546,11 @@ BufferReplace(BufferDesc *bufHdr)
return FALSE;
/* If we had marked this buffer as needing to be fsync'd, we can forget
- * about that, because it's now the storage manager's responsibility.
+ * about that, because it's now the storage manager's responsibility
+ * (but only if we called smgrwrite, not smgrblindwrt).
*/
- ClearBufferDirtiedByMe(BufferDescriptorGetBuffer(bufHdr), bufHdr);
+ if (reln != (Relation) NULL)
+ ClearBufferDirtiedByMe(BufferDescriptorGetBuffer(bufHdr), bufHdr);
BufferFlushCount++;
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 233bbb0ac25..b30b0386af8 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.65 2000/04/09 04:43:20 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.66 2000/04/10 23:41:51 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -48,11 +48,10 @@
typedef struct _MdfdVec
{
int mdfd_vfd; /* fd number in vfd pool */
- int mdfd_flags; /* free, temporary */
+ int mdfd_flags; /* fd status flags */
/* these are the assigned bits in mdfd_flags: */
#define MDFD_FREE (1 << 0)/* unused entry */
-#define MDFD_TEMP (1 << 1)/* close this entry at transaction end */
int mdfd_lstbcnt; /* most recent block count */
int mdfd_nextFree; /* next free vector */
@@ -72,8 +71,8 @@ static void mdclose_fd(int fd);
static int _mdfd_getrelnfd(Relation reln);
static MdfdVec *_mdfd_openseg(Relation reln, int segno, int oflags);
static MdfdVec *_mdfd_getseg(Relation reln, int blkno);
-static MdfdVec *_mdfd_blind_getseg(char *dbname, char *relname,
- Oid dbid, Oid relid, int blkno);
+static int _mdfd_blind_getseg(char *dbname, char *relname,
+ Oid dbid, Oid relid, int blkno);
static int _fdvec_alloc(void);
static void _fdvec_free(int);
static BlockNumber _mdnblocks(File file, Size blcksz);
@@ -572,7 +571,8 @@ mdflush(Relation reln, BlockNumber blocknum, char *buffer)
*
* We have to be able to do this using only the name and OID of
* the database and relation in which the block belongs. Otherwise
- * this is just like mdwrite().
+ * this is much like mdwrite(). If dofsync is TRUE, then we fsync
+ * the file, making it more like mdflush().
*/
int
mdblindwrt(char *dbname,
@@ -580,15 +580,16 @@ mdblindwrt(char *dbname,
Oid dbid,
Oid relid,
BlockNumber blkno,
- char *buffer)
+ char *buffer,
+ bool dofsync)
{
int status;
long seekpos;
- MdfdVec *v;
+ int fd;
- v = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno);
+ fd = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno);
- if (v == NULL)
+ if (fd < 0)
return SM_FAIL;
#ifndef LET_OS_MANAGE_FILESIZE
@@ -601,11 +602,22 @@ mdblindwrt(char *dbname,
seekpos = (long) (BLCKSZ * (blkno));
#endif
- if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
+ if (lseek(fd, seekpos, SEEK_SET) != seekpos)
+ {
+ close(fd);
return SM_FAIL;
+ }
status = SM_SUCCESS;
- if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
+
+ /* write and optionally sync the block */
+ if (write(fd, buffer, BLCKSZ) != BLCKSZ)
+ status = SM_FAIL;
+ else if (dofsync &&
+ pg_fsync(fd) < 0)
+ status = SM_FAIL;
+
+ if (close(fd) < 0)
status = SM_FAIL;
return status;
@@ -633,7 +645,8 @@ mdmarkdirty(Relation reln, BlockNumber blkno)
*
* We have to be able to do this using only the name and OID of
* the database and relation in which the block belongs. Otherwise
- * this is just like mdmarkdirty().
+ * this is much like mdmarkdirty(). However, we do the fsync immediately
+ * rather than building md/fd datastructures to postpone it till later.
*/
int
mdblindmarkdirty(char *dbname,
@@ -642,16 +655,23 @@ mdblindmarkdirty(char *dbname,
Oid relid,
BlockNumber blkno)
{
- MdfdVec *v;
+ int status;
+ int fd;
- v = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno);
+ fd = _mdfd_blind_getseg(dbname, relname, dbid, relid, blkno);
- if (v == NULL)
+ if (fd < 0)
return SM_FAIL;
- FileMarkDirty(v->mdfd_vfd);
+ status = SM_SUCCESS;
- return SM_SUCCESS;
+ if (pg_fsync(fd) < 0)
+ status = SM_FAIL;
+
+ if (close(fd) < 0)
+ status = SM_FAIL;
+
+ return status;
}
/*
@@ -820,24 +840,15 @@ mdcommit()
v = &Md_fdvec[i];
if (v->mdfd_flags & MDFD_FREE)
continue;
- if (v->mdfd_flags & MDFD_TEMP)
- {
- /* Sync and close the file */
- mdclose_fd(i);
- }
- else
- {
- /* Sync, but keep the file entry */
-
+ /* Sync the file entry */
#ifndef LET_OS_MANAGE_FILESIZE
- for ( ; v != (MdfdVec *) NULL; v = v->mdfd_chain)
+ for ( ; v != (MdfdVec *) NULL; v = v->mdfd_chain)
#else
- if (v != (MdfdVec *) NULL)
+ if (v != (MdfdVec *) NULL)
#endif
- {
- if (FileSync(v->mdfd_vfd) < 0)
- return SM_FAIL;
- }
+ {
+ if (FileSync(v->mdfd_vfd) < 0)
+ return SM_FAIL;
}
}
@@ -854,21 +865,9 @@ mdcommit()
int
mdabort()
{
- int i;
- MdfdVec *v;
-
- for (i = 0; i < CurFd; i++)
- {
- v = &Md_fdvec[i];
- if (v->mdfd_flags & MDFD_FREE)
- continue;
- if (v->mdfd_flags & MDFD_TEMP)
- {
- /* Close the file */
- mdclose_fd(i);
- }
- }
-
+ /* We don't actually have to do anything here. fd.c will discard
+ * fsync-needed bits in its AtEOXact_Files() routine.
+ */
return SM_SUCCESS;
}
@@ -1057,102 +1056,52 @@ _mdfd_getseg(Relation reln, int blkno)
return v;
}
-/* Find the segment of the relation holding the specified block.
- * This is the same as _mdfd_getseg() except that we must work
- * "blind" with no Relation struct.
+/*
+ * Find the segment of the relation holding the specified block.
*
- * NOTE: we have no easy way to tell whether a FD already exists for the
- * target relation, so we always make a new one. This should probably
- * be improved somehow, but I doubt it's a significant performance issue
- * under normal circumstances. The FD is marked to be closed at end of xact
- * so that we don't accumulate a lot of dead FDs.
+ * This performs the same work as _mdfd_getseg() except that we must work
+ * "blind" with no Relation struct. We assume that we are not likely to
+ * touch the same relation again soon, so we do not create an FD entry for
+ * the relation --- we just open a kernel file descriptor which will be
+ * used and promptly closed. The return value is the kernel descriptor,
+ * or -1 on failure.
*/
-static MdfdVec *
+static int
_mdfd_blind_getseg(char *dbname, char *relname, Oid dbid, Oid relid,
int blkno)
{
- MdfdVec *v;
char *path;
int fd;
- int vfd;
#ifndef LET_OS_MANAGE_FILESIZE
int segno;
- int targsegno;
#endif
- /* construct the path to the file and open it */
+ /* construct the path to the relation */
path = relpath_blind(dbname, relname, dbid, relid);
-#ifndef __CYGWIN32__
- fd = FileNameOpenFile(path, O_RDWR, 0600);
-#else
- fd = FileNameOpenFile(path, O_RDWR | O_BINARY, 0600);
-#endif
-
- if (fd < 0)
- return NULL;
-
- vfd = _fdvec_alloc();
- if (vfd < 0)
- return NULL;
-
- Md_fdvec[vfd].mdfd_vfd = fd;
- Md_fdvec[vfd].mdfd_flags = MDFD_TEMP;
- Md_fdvec[vfd].mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
#ifndef LET_OS_MANAGE_FILESIZE
- Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL;
-
-#ifdef DIAGNOSTIC
- if (Md_fdvec[vfd].mdfd_lstbcnt > RELSEG_SIZE)
- elog(FATAL, "segment too big on relopen!");
-#endif
-
- targsegno = blkno / RELSEG_SIZE;
- for (v = &Md_fdvec[vfd], segno = 1; segno <= targsegno; segno++)
+ /* append the '.segno', if needed */
+ segno = blkno / RELSEG_SIZE;
+ if (segno > 0)
{
- char *segpath;
- MdfdVec *newv;
- MemoryContext oldcxt;
+ char *segpath = (char *) palloc(strlen(path) + 12);
- segpath = (char *) palloc(strlen(path) + 12);
sprintf(segpath, "%s.%d", path, segno);
-
-#ifndef __CYGWIN32__
- fd = FileNameOpenFile(segpath, O_RDWR | O_CREAT, 0600);
-#else
- fd = FileNameOpenFile(segpath, O_RDWR | O_BINARY | O_CREAT, 0600);
+ pfree(path);
+ path = segpath;
+ }
#endif
- pfree(segpath);
-
- if (fd < 0)
- return (MdfdVec *) NULL;
-
- /* allocate an mdfdvec entry for it */
- oldcxt = MemoryContextSwitchTo(MdCxt);
- newv = (MdfdVec *) palloc(sizeof(MdfdVec));
- MemoryContextSwitchTo(oldcxt);
-
- /* fill the entry */
- newv->mdfd_vfd = fd;
- newv->mdfd_flags = MDFD_TEMP;
- newv->mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
- newv->mdfd_chain = (MdfdVec *) NULL;
-#ifdef DIAGNOSTIC
- if (newv->mdfd_lstbcnt > RELSEG_SIZE)
- elog(FATAL, "segment too big on open!");
-#endif
- v->mdfd_chain = newv;
- v = newv;
- }
+#ifndef __CYGWIN32__
+ fd = open(path, O_RDWR, 0600);
#else
- v = &Md_fdvec[vfd];
+ fd = open(path, O_RDWR | O_BINARY, 0600);
#endif
pfree(path);
- return v;
+ return fd;
}
static BlockNumber
diff --git a/src/backend/storage/smgr/mm.c b/src/backend/storage/smgr/mm.c
index fc3acead661..a5b22cbcc5c 100644
--- a/src/backend/storage/smgr/mm.c
+++ b/src/backend/storage/smgr/mm.c
@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.18 2000/01/26 05:57:05 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/Attic/mm.c,v 1.19 2000/04/10 23:41:51 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -478,7 +478,8 @@ mmblindwrt(char *dbstr,
Oid dbid,
Oid relid,
BlockNumber blkno,
- char *buffer)
+ char *buffer,
+ bool dofsync)
{
return SM_FAIL;
}
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 839636b118b..27cad952aeb 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.33 2000/04/09 04:43:20 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/smgr.c,v 1.34 2000/04/10 23:41:52 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -38,7 +38,8 @@ typedef struct f_smgr
char *buffer);
int (*smgr_blindwrt) (char *dbname, char *relname,
Oid dbid, Oid relid,
- BlockNumber blkno, char *buffer);
+ BlockNumber blkno, char *buffer,
+ bool dofsync);
int (*smgr_markdirty) (Relation reln, BlockNumber blkno);
int (*smgr_blindmarkdirty) (char *dbname, char *relname,
Oid dbid, Oid relid,
@@ -293,7 +294,8 @@ smgrflush(int16 which, Relation reln, BlockNumber blocknum, char *buffer)
* this case, the buffer manager will call smgrblindwrt() with
* the name and OID of the database and the relation to which the
* buffer belongs. Every storage manager must be able to force
- * this page down to stable storage in this circumstance.
+ * this page down to stable storage in this circumstance. The
+ * write should be synchronous if dofsync is true.
*/
int
smgrblindwrt(int16 which,
@@ -302,7 +304,8 @@ smgrblindwrt(int16 which,
Oid dbid,
Oid relid,
BlockNumber blkno,
- char *buffer)
+ char *buffer,
+ bool dofsync)
{
char *dbstr;
char *relstr;
@@ -313,7 +316,7 @@ smgrblindwrt(int16 which,
relstr = pstrdup(relname);
status = (*(smgrsw[which].smgr_blindwrt)) (dbstr, relstr, dbid, relid,
- blkno, buffer);
+ blkno, buffer, dofsync);
if (status == SM_FAIL)
elog(ERROR, "cannot write block %d of %s [%s] blind",
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index 053a63196e5..bc0ec04bb2b 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: smgr.h,v 1.18 2000/04/09 04:43:18 tgl Exp $
+ * $Id: smgr.h,v 1.19 2000/04/10 23:41:45 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -37,7 +37,8 @@ extern int smgrflush(int16 which, Relation reln, BlockNumber blocknum,
char *buffer);
extern int smgrblindwrt(int16 which, char *dbname, char *relname,
Oid dbid, Oid relid,
- BlockNumber blkno, char *buffer);
+ BlockNumber blkno, char *buffer,
+ bool dofsync);
extern int smgrmarkdirty(int16 which, Relation reln, BlockNumber blkno);
extern int smgrblindmarkdirty(int16 which, char *dbname, char *relname,
Oid dbid, Oid relid,
@@ -62,7 +63,8 @@ extern int mdread(Relation reln, BlockNumber blocknum, char *buffer);
extern int mdwrite(Relation reln, BlockNumber blocknum, char *buffer);
extern int mdflush(Relation reln, BlockNumber blocknum, char *buffer);
extern int mdblindwrt(char *dbname, char *relname, Oid dbid, Oid relid,
- BlockNumber blkno, char *buffer);
+ BlockNumber blkno, char *buffer,
+ bool dofsync);
extern int mdmarkdirty(Relation reln, BlockNumber blkno);
extern int mdblindmarkdirty(char *dbname, char *relname, Oid dbid, Oid relid,
BlockNumber blkno);
@@ -84,7 +86,8 @@ extern int mmread(Relation reln, BlockNumber blocknum, char *buffer);
extern int mmwrite(Relation reln, BlockNumber blocknum, char *buffer);
extern int mmflush(Relation reln, BlockNumber blocknum, char *buffer);
extern int mmblindwrt(char *dbname, char *relname, Oid dbid, Oid relid,
- BlockNumber blkno, char *buffer);
+ BlockNumber blkno, char *buffer,
+ bool dofsync);
extern int mmmarkdirty(Relation reln, BlockNumber blkno);
extern int mmblindmarkdirty(char *dbname, char *relname, Oid dbid, Oid relid,
BlockNumber blkno);