aboutsummaryrefslogtreecommitdiff
path: root/src/backend/storage/smgr/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/storage/smgr/md.c')
-rw-r--r--src/backend/storage/smgr/md.c1088
1 files changed, 563 insertions, 525 deletions
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 2688ad3aed1..7a2903fff5c 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -1,28 +1,28 @@
/*-------------------------------------------------------------------------
*
* md.c--
- * This code manages relations that reside on magnetic disk.
+ * This code manages relations that reside on magnetic disk.
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.18 1997/08/18 20:53:14 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/storage/smgr/md.c,v 1.19 1997/09/07 04:49:17 momjian Exp $
*
*-------------------------------------------------------------------------
*/
#include <unistd.h>
-#include <stdio.h> /* for sprintf() */
+#include <stdio.h> /* for sprintf() */
#include <string.h>
-#include <fcntl.h> /* for open() flags */
+#include <fcntl.h> /* for open() flags */
#include <sys/file.h>
#include "postgres.h"
-#include "miscadmin.h" /* for DataDir */
+#include "miscadmin.h" /* for DataDir */
#include "storage/block.h"
#include "storage/fd.h"
-#include "storage/smgr.h" /* where the declarations go */
+#include "storage/smgr.h" /* where the declarations go */
#include "storage/fd.h"
#include "utils/mcxt.h"
#include "utils/rel.h"
@@ -32,764 +32,802 @@
#undef DIAGNOSTIC
/*
- * The magnetic disk storage manager keeps track of open file descriptors
- * in its own descriptor pool. This happens for two reasons. First, at
- * transaction boundaries, we walk the list of descriptors and flush
- * anything that we've dirtied in the current transaction. Second, we
- * have to support relations of > 4GBytes. In order to do this, we break
- * relations up into chunks of < 2GBytes and store one chunk in each of
- * several files that represent the relation.
+ * The magnetic disk storage manager keeps track of open file descriptors
+ * in its own descriptor pool. This happens for two reasons. First, at
+ * transaction boundaries, we walk the list of descriptors and flush
+ * anything that we've dirtied in the current transaction. Second, we
+ * have to support relations of > 4GBytes. In order to do this, we break
+ * relations up into chunks of < 2GBytes and store one chunk in each of
+ * several files that represent the relation.
*/
-typedef struct _MdfdVec {
- int mdfd_vfd; /* fd number in vfd pool */
- uint16 mdfd_flags; /* clean, dirty, free */
- int mdfd_lstbcnt; /* most recent block count */
- int mdfd_nextFree; /* next free vector */
- struct _MdfdVec *mdfd_chain; /* for large relations */
-} MdfdVec;
+typedef struct _MdfdVec
+{
+ int mdfd_vfd; /* fd number in vfd pool */
+ uint16 mdfd_flags; /* clean, dirty, free */
+ int mdfd_lstbcnt; /* most recent block count */
+ int mdfd_nextFree; /* next free vector */
+ struct _MdfdVec *mdfd_chain;/* for large relations */
+} MdfdVec;
-static int Nfds = 100;
-static MdfdVec *Md_fdvec = (MdfdVec *) NULL;
-static int Md_Free = -1;
-static int CurFd = 0;
-static MemoryContext MdCxt;
+static int Nfds = 100;
+static MdfdVec *Md_fdvec = (MdfdVec *) NULL;
+static int Md_Free = -1;
+static int CurFd = 0;
+static MemoryContext MdCxt;
-#define MDFD_DIRTY (uint16) 0x01
-#define MDFD_FREE (uint16) 0x02
+#define MDFD_DIRTY (uint16) 0x01
+#define MDFD_FREE (uint16) 0x02
-#define RELSEG_SIZE 262144 /* (2 ** 31) / 8192 -- 2GB file */
+#define RELSEG_SIZE 262144 /* (2 ** 31) / 8192 -- 2GB file */
/* routines declared here */
-static MdfdVec *_mdfd_openseg(Relation reln, int segno, int oflags);
-static MdfdVec *_mdfd_getseg(Relation reln, int blkno, int oflag);
-static int _fdvec_alloc (void);
-static void _fdvec_free (int);
+static MdfdVec *_mdfd_openseg(Relation reln, int segno, int oflags);
+static MdfdVec *_mdfd_getseg(Relation reln, int blkno, int oflag);
+static int _fdvec_alloc(void);
+static void _fdvec_free(int);
static BlockNumber _mdnblocks(File file, Size blcksz);
/*
- * mdinit() -- Initialize private state for magnetic disk storage manager.
+ * mdinit() -- Initialize private state for magnetic disk storage manager.
*
- * We keep a private table of all file descriptors. Whenever we do
- * a write to one, we mark it dirty in our table. Whenever we force
- * changes to disk, we mark the file descriptor clean. At transaction
- * commit, we force changes to disk for all dirty file descriptors.
- * This routine allocates and initializes the table.
+ * We keep a private table of all file descriptors. Whenever we do
+ * a write to one, we mark it dirty in our table. Whenever we force
+ * changes to disk, we mark the file descriptor clean. At transaction
+ * commit, we force changes to disk for all dirty file descriptors.
+ * This routine allocates and initializes the table.
*
- * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
+ * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
*/
int
mdinit()
{
- MemoryContext oldcxt;
- int i;
+ MemoryContext oldcxt;
+ int i;
- MdCxt = (MemoryContext) CreateGlobalMemory("MdSmgr");
- if (MdCxt == (MemoryContext) NULL)
- return (SM_FAIL);
+ MdCxt = (MemoryContext) CreateGlobalMemory("MdSmgr");
+ if (MdCxt == (MemoryContext) NULL)
+ return (SM_FAIL);
- oldcxt = MemoryContextSwitchTo(MdCxt);
- Md_fdvec = (MdfdVec *) palloc(Nfds * sizeof(MdfdVec));
- MemoryContextSwitchTo(oldcxt);
+ oldcxt = MemoryContextSwitchTo(MdCxt);
+ Md_fdvec = (MdfdVec *) palloc(Nfds * sizeof(MdfdVec));
+ MemoryContextSwitchTo(oldcxt);
- if (Md_fdvec == (MdfdVec *) NULL)
- return (SM_FAIL);
+ if (Md_fdvec == (MdfdVec *) NULL)
+ return (SM_FAIL);
- memset(Md_fdvec, 0, Nfds * sizeof(MdfdVec));
+ memset(Md_fdvec, 0, Nfds * sizeof(MdfdVec));
- /* Set free list */
- for (i = 0; i < Nfds; i++ )
- {
- Md_fdvec[i].mdfd_nextFree = i + 1;
- Md_fdvec[i].mdfd_flags = MDFD_FREE;
- }
- Md_Free = 0;
- Md_fdvec[Nfds - 1].mdfd_nextFree = -1;
+ /* Set free list */
+ for (i = 0; i < Nfds; i++)
+ {
+ Md_fdvec[i].mdfd_nextFree = i + 1;
+ Md_fdvec[i].mdfd_flags = MDFD_FREE;
+ }
+ Md_Free = 0;
+ Md_fdvec[Nfds - 1].mdfd_nextFree = -1;
- return (SM_SUCCESS);
+ return (SM_SUCCESS);
}
int
mdcreate(Relation reln)
{
- int fd, vfd;
- char *path;
-
- path = relpath(&(reln->rd_rel->relname.data[0]));
- fd = FileNameOpenFile(path, O_RDWR|O_CREAT|O_EXCL, 0600);
-
- /*
- * If the file already exists and is empty, we pretend that the
- * create succeeded. During bootstrap processing, we skip that check,
- * because pg_time, pg_variable, and pg_log get created before their
- * .bki file entries are processed.
- *
- * As the result of this pretence it was possible to have in
- * pg_class > 1 records with the same relname. Actually, it
- * should be fixed in upper levels, too, but... - vadim 05/06/97
- */
-
- if (fd < 0)
- {
- if ( !IsBootstrapProcessingMode() )
- return (-1);
- fd = FileNameOpenFile(path, O_RDWR, 0600); /* Bootstrap */
- if ( fd < 0 )
- return (-1);
- }
-
- vfd = _fdvec_alloc ();
- if ( vfd < 0 )
- return (-1);
-
- Md_fdvec[vfd].mdfd_vfd = fd;
- Md_fdvec[vfd].mdfd_flags = (uint16) 0;
- Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL;
- Md_fdvec[vfd].mdfd_lstbcnt = 0;
-
- return (vfd);
+ int fd,
+ vfd;
+ char *path;
+
+ path = relpath(&(reln->rd_rel->relname.data[0]));
+ fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL, 0600);
+
+ /*
+ * If the file already exists and is empty, we pretend that the create
+ * succeeded. During bootstrap processing, we skip that check,
+ * because pg_time, pg_variable, and pg_log get created before their
+ * .bki file entries are processed.
+ *
+ * As the result of this pretence it was possible to have in pg_class > 1
+ * records with the same relname. Actually, it should be fixed in
+ * upper levels, too, but... - vadim 05/06/97
+ */
+
+ if (fd < 0)
+ {
+ if (!IsBootstrapProcessingMode())
+ return (-1);
+ fd = FileNameOpenFile(path, O_RDWR, 0600); /* Bootstrap */
+ if (fd < 0)
+ return (-1);
+ }
+
+ vfd = _fdvec_alloc();
+ if (vfd < 0)
+ return (-1);
+
+ Md_fdvec[vfd].mdfd_vfd = fd;
+ Md_fdvec[vfd].mdfd_flags = (uint16) 0;
+ Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL;
+ Md_fdvec[vfd].mdfd_lstbcnt = 0;
+
+ return (vfd);
}
/*
- * mdunlink() -- Unlink a relation.
+ * mdunlink() -- Unlink a relation.
*/
int
mdunlink(Relation reln)
{
- int fd;
- int i;
- MdfdVec *v, *ov;
- MemoryContext oldcxt;
- char fname[NAMEDATALEN];
- char tname[NAMEDATALEN+10]; /* leave room for overflow suffixes*/
-
- /* On Windows NT you can't unlink a file if it is open so we have
- ** to do this.
- */
+ int fd;
+ int i;
+ MdfdVec *v,
+ *ov;
+ MemoryContext oldcxt;
+ char fname[NAMEDATALEN];
+ char tname[NAMEDATALEN + 10]; /* leave room for overflow
+ * suffixes */
+
+ /*
+ * On Windows NT you can't unlink a file if it is open so we have * to
+ * do this.
+ */
+
+ strNcpy(fname, RelationGetRelationName(reln)->data, NAMEDATALEN - 1);
+
+ if (FileNameUnlink(fname) < 0)
+ return (SM_FAIL);
+
+ /* unlink all the overflow files for large relations */
+ for (i = 1;; i++)
+ {
+ sprintf(tname, "%s.%d", fname, i);
+ if (FileNameUnlink(tname) < 0)
+ break;
+ }
+
+ /* finally, clean out the mdfd vector */
+ fd = RelationGetFile(reln);
+ Md_fdvec[fd].mdfd_flags = (uint16) 0;
+
+ oldcxt = MemoryContextSwitchTo(MdCxt);
+ for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL;)
+ {
+ FileUnlink(v->mdfd_vfd);
+ ov = v;
+ v = v->mdfd_chain;
+ if (ov != &Md_fdvec[fd])
+ pfree(ov);
+ }
+ Md_fdvec[fd].mdfd_chain = (MdfdVec *) NULL;
+ MemoryContextSwitchTo(oldcxt);
- strNcpy(fname, RelationGetRelationName(reln)->data, NAMEDATALEN-1);
-
- if (FileNameUnlink(fname) < 0)
- return (SM_FAIL);
-
- /* unlink all the overflow files for large relations */
- for (i = 1; ; i++) {
- sprintf(tname, "%s.%d", fname, i);
- if (FileNameUnlink(tname) < 0)
- break;
- }
-
- /* finally, clean out the mdfd vector */
- fd = RelationGetFile(reln);
- Md_fdvec[fd].mdfd_flags = (uint16) 0;
-
- oldcxt = MemoryContextSwitchTo(MdCxt);
- for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL; )
- {
- FileUnlink(v->mdfd_vfd);
- ov = v;
- v = v->mdfd_chain;
- if (ov != &Md_fdvec[fd])
- pfree(ov);
- }
- Md_fdvec[fd].mdfd_chain = (MdfdVec *) NULL;
- MemoryContextSwitchTo(oldcxt);
-
- _fdvec_free (fd);
-
- return (SM_SUCCESS);
+ _fdvec_free(fd);
+
+ return (SM_SUCCESS);
}
/*
- * mdextend() -- Add a block to the specified relation.
+ * mdextend() -- Add a block to the specified relation.
*
- * This routine returns SM_FAIL or SM_SUCCESS, with errno set as
- * appropriate.
+ * This routine returns SM_FAIL or SM_SUCCESS, with errno set as
+ * appropriate.
*/
int
mdextend(Relation reln, char *buffer)
{
- long pos;
- int nblocks;
- MdfdVec *v;
+ long pos;
+ int nblocks;
+ MdfdVec *v;
- nblocks = mdnblocks(reln);
- v = _mdfd_getseg(reln, nblocks, O_CREAT);
+ nblocks = mdnblocks(reln);
+ v = _mdfd_getseg(reln, nblocks, O_CREAT);
- if ((pos = FileSeek(v->mdfd_vfd, 0L, SEEK_END)) < 0)
- return (SM_FAIL);
+ if ((pos = FileSeek(v->mdfd_vfd, 0L, SEEK_END)) < 0)
+ return (SM_FAIL);
- if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
- return (SM_FAIL);
+ if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
+ return (SM_FAIL);
- /* remember that we did a write, so we can sync at xact commit */
- v->mdfd_flags |= MDFD_DIRTY;
+ /* remember that we did a write, so we can sync at xact commit */
+ v->mdfd_flags |= MDFD_DIRTY;
- /* try to keep the last block count current, though it's just a hint */
- if ((v->mdfd_lstbcnt = (++nblocks % RELSEG_SIZE)) == 0)
- v->mdfd_lstbcnt = RELSEG_SIZE;
+ /* try to keep the last block count current, though it's just a hint */
+ if ((v->mdfd_lstbcnt = (++nblocks % RELSEG_SIZE)) == 0)
+ v->mdfd_lstbcnt = RELSEG_SIZE;
#ifdef DIAGNOSTIC
- if (_mdnblocks(v->mdfd_vfd, BLCKSZ) > RELSEG_SIZE
- || v->mdfd_lstbcnt > RELSEG_SIZE)
- elog(FATAL, "segment too big!");
+ if (_mdnblocks(v->mdfd_vfd, BLCKSZ) > RELSEG_SIZE
+ || v->mdfd_lstbcnt > RELSEG_SIZE)
+ elog(FATAL, "segment too big!");
#endif
- return (SM_SUCCESS);
+ return (SM_SUCCESS);
}
/*
- * mdopen() -- Open the specified relation.
+ * mdopen() -- Open the specified relation.
*/
int
mdopen(Relation reln)
{
- char *path;
- int fd;
- int vfd;
+ char *path;
+ int fd;
+ int vfd;
- path = relpath(&(reln->rd_rel->relname.data[0]));
+ path = relpath(&(reln->rd_rel->relname.data[0]));
- fd = FileNameOpenFile(path, O_RDWR, 0600);
+ fd = FileNameOpenFile(path, O_RDWR, 0600);
- /* this should only happen during bootstrap processing */
- if (fd < 0)
- fd = FileNameOpenFile(path, O_RDWR|O_CREAT|O_EXCL, 0600);
+ /* this should only happen during bootstrap processing */
+ if (fd < 0)
+ fd = FileNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL, 0600);
- vfd = _fdvec_alloc ();
- if ( vfd < 0 )
- return (-1);
+ vfd = _fdvec_alloc();
+ if (vfd < 0)
+ return (-1);
- Md_fdvec[vfd].mdfd_vfd = fd;
- Md_fdvec[vfd].mdfd_flags = (uint16) 0;
- Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL;
- Md_fdvec[vfd].mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
+ Md_fdvec[vfd].mdfd_vfd = fd;
+ Md_fdvec[vfd].mdfd_flags = (uint16) 0;
+ Md_fdvec[vfd].mdfd_chain = (MdfdVec *) NULL;
+ Md_fdvec[vfd].mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
#ifdef DIAGNOSTIC
- if (Md_fdvec[vfd].mdfd_lstbcnt > RELSEG_SIZE)
- elog(FATAL, "segment too big on relopen!");
+ if (Md_fdvec[vfd].mdfd_lstbcnt > RELSEG_SIZE)
+ elog(FATAL, "segment too big on relopen!");
#endif
- return (vfd);
+ return (vfd);
}
/*
- * mdclose() -- Close the specified relation
+ * mdclose() -- Close the specified relation
*
- * AND FREE fd vector! It may be re-used for other relation!
- * reln should be flushed from cache after closing !..
+ * AND FREE fd vector! It may be re-used for other relation!
+ * reln should be flushed from cache after closing !..
*
- * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
+ * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
*/
int
mdclose(Relation reln)
{
- int fd;
- MdfdVec *v, *ov;
- MemoryContext oldcxt;
+ int fd;
+ MdfdVec *v,
+ *ov;
+ MemoryContext oldcxt;
- fd = RelationGetFile(reln);
+ fd = RelationGetFile(reln);
- oldcxt = MemoryContextSwitchTo(MdCxt);
- for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL; )
- {
- /* if not closed already */
- if ( v->mdfd_vfd >= 0 )
+ oldcxt = MemoryContextSwitchTo(MdCxt);
+ for (v = &Md_fdvec[fd]; v != (MdfdVec *) NULL;)
{
- /*
- * We sync the file descriptor so that we don't need to reopen it at
- * transaction commit to force changes to disk.
- */
+ /* if not closed already */
+ if (v->mdfd_vfd >= 0)
+ {
+
+ /*
+ * We sync the file descriptor so that we don't need to reopen
+ * it at transaction commit to force changes to disk.
+ */
+
+ FileSync(v->mdfd_vfd);
+ FileClose(v->mdfd_vfd);
+
+ /* mark this file descriptor as clean in our private table */
+ v->mdfd_flags &= ~MDFD_DIRTY;
+ }
+ /* Now free vector */
+ ov = v;
+ v = v->mdfd_chain;
+ if (ov != &Md_fdvec[fd])
+ pfree(ov);
+ }
- FileSync(v->mdfd_vfd);
- FileClose(v->mdfd_vfd);
+ MemoryContextSwitchTo(oldcxt);
+ Md_fdvec[fd].mdfd_chain = (MdfdVec *) NULL;
- /* mark this file descriptor as clean in our private table */
- v->mdfd_flags &= ~MDFD_DIRTY;
- }
- /* Now free vector */
- ov = v;
- v = v->mdfd_chain;
- if (ov != &Md_fdvec[fd])
- pfree(ov);
- }
-
- MemoryContextSwitchTo(oldcxt);
- Md_fdvec[fd].mdfd_chain = (MdfdVec *) NULL;
-
- _fdvec_free (fd);
-
- return (SM_SUCCESS);
+ _fdvec_free(fd);
+
+ return (SM_SUCCESS);
}
/*
- * mdread() -- Read the specified block from a relation.
+ * mdread() -- Read the specified block from a relation.
*
- * Returns SM_SUCCESS or SM_FAIL.
+ * Returns SM_SUCCESS or SM_FAIL.
*/
int
mdread(Relation reln, BlockNumber blocknum, char *buffer)
{
- int status;
- long seekpos;
- int nbytes;
- MdfdVec *v;
+ int status;
+ long seekpos;
+ int nbytes;
+ MdfdVec *v;
- v = _mdfd_getseg(reln, blocknum, 0);
+ v = _mdfd_getseg(reln, blocknum, 0);
- seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
+ seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
#ifdef DIAGNOSTIC
- if (seekpos >= BLCKSZ * RELSEG_SIZE)
- elog(FATAL, "seekpos too big!");
+ if (seekpos >= BLCKSZ * RELSEG_SIZE)
+ elog(FATAL, "seekpos too big!");
#endif
- if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) {
- return (SM_FAIL);
- }
+ if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
+ {
+ return (SM_FAIL);
+ }
- status = SM_SUCCESS;
- if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) {
- if (nbytes == 0) {
- memset(buffer, 0, BLCKSZ);
- } else {
- status = SM_FAIL;
+ status = SM_SUCCESS;
+ if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
+ {
+ if (nbytes == 0)
+ {
+ memset(buffer, 0, BLCKSZ);
+ }
+ else
+ {
+ status = SM_FAIL;
+ }
}
- }
- return (status);
+ return (status);
}
/*
- * mdwrite() -- Write the supplied block at the appropriate location.
+ * mdwrite() -- Write the supplied block at the appropriate location.
*
- * Returns SM_SUCCESS or SM_FAIL.
+ * Returns SM_SUCCESS or SM_FAIL.
*/
int
mdwrite(Relation reln, BlockNumber blocknum, char *buffer)
{
- int status;
- long seekpos;
- MdfdVec *v;
+ int status;
+ long seekpos;
+ MdfdVec *v;
- v = _mdfd_getseg(reln, blocknum, 0);
+ v = _mdfd_getseg(reln, blocknum, 0);
- seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
+ seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
#ifdef DIAGNOSTIC
- if (seekpos >= BLCKSZ * RELSEG_SIZE)
- elog(FATAL, "seekpos too big!");
+ if (seekpos >= BLCKSZ * RELSEG_SIZE)
+ elog(FATAL, "seekpos too big!");
#endif
- if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) {
- return (SM_FAIL);
- }
+ if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
+ {
+ return (SM_FAIL);
+ }
- status = SM_SUCCESS;
- if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
- status = SM_FAIL;
+ status = SM_SUCCESS;
+ if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
+ status = SM_FAIL;
- v->mdfd_flags |= MDFD_DIRTY;
+ v->mdfd_flags |= MDFD_DIRTY;
- return (status);
+ return (status);
}
/*
- * mdflush() -- Synchronously write a block to disk.
+ * mdflush() -- Synchronously write a block to disk.
*
- * This is exactly like mdwrite(), but doesn't return until the file
- * system buffer cache has been flushed.
+ * This is exactly like mdwrite(), but doesn't return until the file
+ * system buffer cache has been flushed.
*/
int
mdflush(Relation reln, BlockNumber blocknum, char *buffer)
{
- int status;
- long seekpos;
- MdfdVec *v;
+ int status;
+ long seekpos;
+ MdfdVec *v;
- v = _mdfd_getseg(reln, blocknum, 0);
+ v = _mdfd_getseg(reln, blocknum, 0);
- seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
+ seekpos = (long) (BLCKSZ * (blocknum % RELSEG_SIZE));
#ifdef DIAGNOSTIC
- if (seekpos >= BLCKSZ * RELSEG_SIZE)
- elog(FATAL, "seekpos too big!");
+ if (seekpos >= BLCKSZ * RELSEG_SIZE)
+ elog(FATAL, "seekpos too big!");
#endif
- if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) {
- return (SM_FAIL);
- }
+ if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
+ {
+ return (SM_FAIL);
+ }
- /* write and sync the block */
- status = SM_SUCCESS;
- if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ
- || FileSync(v->mdfd_vfd) < 0)
- status = SM_FAIL;
+ /* write and sync the block */
+ status = SM_SUCCESS;
+ if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ
+ || FileSync(v->mdfd_vfd) < 0)
+ status = SM_FAIL;
- /*
- * By here, the block is written and changes have been forced to stable
- * storage. Mark the descriptor as clean until the next write, so we
- * don't sync it again unnecessarily at transaction commit.
- */
+ /*
+ * By here, the block is written and changes have been forced to
+ * stable storage. Mark the descriptor as clean until the next write,
+ * so we don't sync it again unnecessarily at transaction commit.
+ */
- v->mdfd_flags &= ~MDFD_DIRTY;
+ v->mdfd_flags &= ~MDFD_DIRTY;
- return (status);
+ return (status);
}
/*
- * mdblindwrt() -- Write a block to disk blind.
+ * mdblindwrt() -- Write a block to disk blind.
*
- * We have to be able to do this using only the name and OID of
- * the database and relation in which the block belongs. This
- * is a synchronous write.
+ * We have to be able to do this using only the name and OID of
+ * the database and relation in which the block belongs. This
+ * is a synchronous write.
*/
int
mdblindwrt(char *dbstr,
- char *relstr,
- Oid dbid,
- Oid relid,
- BlockNumber blkno,
- char *buffer)
+ char *relstr,
+ Oid dbid,
+ Oid relid,
+ BlockNumber blkno,
+ char *buffer)
{
- int fd;
- int segno;
- long seekpos;
- int status;
- char *path;
- int nchars;
-
- /* be sure we have enough space for the '.segno', if any */
- segno = blkno / RELSEG_SIZE;
- if (segno > 0)
- nchars = 10;
- else
- nchars = 0;
-
- /* construct the path to the file and open it */
- if (dbid == (Oid) 0) {
- path = (char *) palloc(strlen(DataDir) + sizeof(NameData) + 2 + nchars);
- if (segno == 0)
- sprintf(path, "%s/%s", DataDir, relstr);
+ int fd;
+ int segno;
+ long seekpos;
+ int status;
+ char *path;
+ int nchars;
+
+ /* be sure we have enough space for the '.segno', if any */
+ segno = blkno / RELSEG_SIZE;
+ if (segno > 0)
+ nchars = 10;
else
- sprintf(path, "%s/%s.%d", DataDir, relstr, segno);
- } else {
- path = (char *) palloc(strlen(DataDir) + strlen("/base/") + 2 * sizeof(NameData) + 2 + nchars);
- if (segno == 0)
- sprintf(path, "%s/base/%s/%s", DataDir,
- dbstr, relstr);
+ nchars = 0;
+
+ /* construct the path to the file and open it */
+ if (dbid == (Oid) 0)
+ {
+ path = (char *) palloc(strlen(DataDir) + sizeof(NameData) + 2 + nchars);
+ if (segno == 0)
+ sprintf(path, "%s/%s", DataDir, relstr);
+ else
+ sprintf(path, "%s/%s.%d", DataDir, relstr, segno);
+ }
else
- sprintf(path, "%s/base/%s/%s.%d", DataDir, dbstr,
- relstr, segno);
- }
+ {
+ path = (char *) palloc(strlen(DataDir) + strlen("/base/") + 2 * sizeof(NameData) + 2 + nchars);
+ if (segno == 0)
+ sprintf(path, "%s/base/%s/%s", DataDir,
+ dbstr, relstr);
+ else
+ sprintf(path, "%s/base/%s/%s.%d", DataDir, dbstr,
+ relstr, segno);
+ }
- if ((fd = open(path, O_RDWR, 0600)) < 0)
- return (SM_FAIL);
+ if ((fd = open(path, O_RDWR, 0600)) < 0)
+ return (SM_FAIL);
- /* seek to the right spot */
- seekpos = (long) (BLCKSZ * (blkno % RELSEG_SIZE));
- if (lseek(fd, seekpos, SEEK_SET) != seekpos) {
- close(fd);
- return (SM_FAIL);
- }
+ /* seek to the right spot */
+ seekpos = (long) (BLCKSZ * (blkno % RELSEG_SIZE));
+ if (lseek(fd, seekpos, SEEK_SET) != seekpos)
+ {
+ close(fd);
+ return (SM_FAIL);
+ }
- status = SM_SUCCESS;
+ status = SM_SUCCESS;
- /* write and sync the block */
- if (write(fd, buffer, BLCKSZ) != BLCKSZ || (pg_fsync(fd) < 0))
- status = SM_FAIL;
+ /* write and sync the block */
+ if (write(fd, buffer, BLCKSZ) != BLCKSZ || (pg_fsync(fd) < 0))
+ status = SM_FAIL;
- if (close(fd) < 0)
- status = SM_FAIL;
+ if (close(fd) < 0)
+ status = SM_FAIL;
- pfree(path);
+ pfree(path);
- return (status);
+ return (status);
}
/*
- * mdnblocks() -- Get the number of blocks stored in a relation.
+ * mdnblocks() -- Get the number of blocks stored in a relation.
*
- * Returns # of blocks or -1 on error.
+ * Returns # of blocks or -1 on error.
*/
int
mdnblocks(Relation reln)
{
- int fd;
- MdfdVec *v;
- int nblocks;
- int segno;
+ int fd;
+ MdfdVec *v;
+ int nblocks;
+ int segno;
- fd = RelationGetFile(reln);
- v = &Md_fdvec[fd];
+ fd = RelationGetFile(reln);
+ v = &Md_fdvec[fd];
#ifdef DIAGNOSTIC
- if (_mdnblocks(v->mdfd_vfd, BLCKSZ) > RELSEG_SIZE)
- elog(FATAL, "segment too big in getseg!");
+ if (_mdnblocks(v->mdfd_vfd, BLCKSZ) > RELSEG_SIZE)
+ elog(FATAL, "segment too big in getseg!");
#endif
- segno = 0;
- for (;;) {
- if (v->mdfd_lstbcnt == RELSEG_SIZE
- || (nblocks = _mdnblocks(v->mdfd_vfd, BLCKSZ)) == RELSEG_SIZE) {
-
- v->mdfd_lstbcnt = RELSEG_SIZE;
- segno++;
-
- if (v->mdfd_chain == (MdfdVec *) NULL) {
- v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT);
- if (v->mdfd_chain == (MdfdVec *) NULL)
- elog(WARN, "cannot count blocks for %.16s -- open failed",
- RelationGetRelationName(reln));
- }
-
- v = v->mdfd_chain;
- } else {
- return ((segno * RELSEG_SIZE) + nblocks);
+ segno = 0;
+ for (;;)
+ {
+ if (v->mdfd_lstbcnt == RELSEG_SIZE
+ || (nblocks = _mdnblocks(v->mdfd_vfd, BLCKSZ)) == RELSEG_SIZE)
+ {
+
+ v->mdfd_lstbcnt = RELSEG_SIZE;
+ segno++;
+
+ if (v->mdfd_chain == (MdfdVec *) NULL)
+ {
+ v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT);
+ if (v->mdfd_chain == (MdfdVec *) NULL)
+ elog(WARN, "cannot count blocks for %.16s -- open failed",
+ RelationGetRelationName(reln));
+ }
+
+ v = v->mdfd_chain;
+ }
+ else
+ {
+ return ((segno * RELSEG_SIZE) + nblocks);
+ }
}
- }
}
/*
- * mdtruncate() -- Truncate relation to specified number of blocks.
+ * mdtruncate() -- Truncate relation to specified number of blocks.
*
- * Returns # of blocks or -1 on error.
+ * Returns # of blocks or -1 on error.
*/
int
-mdtruncate (Relation reln, int nblocks)
+mdtruncate(Relation reln, int nblocks)
{
- int fd;
- MdfdVec *v;
- int curnblk;
+ int fd;
+ MdfdVec *v;
+ int curnblk;
- curnblk = mdnblocks (reln);
- if ( curnblk / RELSEG_SIZE > 0 )
- {
- elog (NOTICE, "Can't truncate multi-segments relation %s",
- &(reln->rd_rel->relname.data[0]));
- return (curnblk);
- }
+ curnblk = mdnblocks(reln);
+ if (curnblk / RELSEG_SIZE > 0)
+ {
+ elog(NOTICE, "Can't truncate multi-segments relation %s",
+ &(reln->rd_rel->relname.data[0]));
+ return (curnblk);
+ }
+
+ fd = RelationGetFile(reln);
+ v = &Md_fdvec[fd];
- fd = RelationGetFile(reln);
- v = &Md_fdvec[fd];
+ if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0)
+ return (-1);
- if ( FileTruncate (v->mdfd_vfd, nblocks * BLCKSZ) < 0 )
- return (-1);
-
- return (nblocks);
+ return (nblocks);
-} /* mdtruncate */
+} /* mdtruncate */
/*
- * mdcommit() -- Commit a transaction.
+ * mdcommit() -- Commit a transaction.
*
- * All changes to magnetic disk relations must be forced to stable
- * storage. This routine makes a pass over the private table of
- * file descriptors. Any descriptors to which we have done writes,
- * but not synced, are synced here.
+ * All changes to magnetic disk relations must be forced to stable
+ * storage. This routine makes a pass over the private table of
+ * file descriptors. Any descriptors to which we have done writes,
+ * but not synced, are synced here.
*
- * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
+ * Returns SM_SUCCESS or SM_FAIL with errno set as appropriate.
*/
int
mdcommit()
{
- int i;
- MdfdVec *v;
+ int i;
+ MdfdVec *v;
- for (i = 0; i < CurFd; i++) {
- for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain) {
- if (v->mdfd_flags & MDFD_DIRTY) {
- if (FileSync(v->mdfd_vfd) < 0)
- return (SM_FAIL);
-
- v->mdfd_flags &= ~MDFD_DIRTY;
- }
+ for (i = 0; i < CurFd; i++)
+ {
+ for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain)
+ {
+ if (v->mdfd_flags & MDFD_DIRTY)
+ {
+ if (FileSync(v->mdfd_vfd) < 0)
+ return (SM_FAIL);
+
+ v->mdfd_flags &= ~MDFD_DIRTY;
+ }
+ }
}
- }
- return (SM_SUCCESS);
+ return (SM_SUCCESS);
}
/*
- * mdabort() -- Abort a transaction.
+ * mdabort() -- Abort a transaction.
*
- * Changes need not be forced to disk at transaction abort. We mark
- * all file descriptors as clean here. Always returns SM_SUCCESS.
+ * Changes need not be forced to disk at transaction abort. We mark
+ * all file descriptors as clean here. Always returns SM_SUCCESS.
*/
int
mdabort()
{
- int i;
- MdfdVec *v;
+ int i;
+ MdfdVec *v;
- for (i = 0; i < CurFd; i++) {
- for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain) {
- v->mdfd_flags &= ~MDFD_DIRTY;
+ for (i = 0; i < CurFd; i++)
+ {
+ for (v = &Md_fdvec[i]; v != (MdfdVec *) NULL; v = v->mdfd_chain)
+ {
+ v->mdfd_flags &= ~MDFD_DIRTY;
+ }
}
- }
- return (SM_SUCCESS);
+ return (SM_SUCCESS);
}
/*
- * _fdvec_alloc () -- grab a free (or new) md file descriptor vector.
+ * _fdvec_alloc () -- grab a free (or new) md file descriptor vector.
*
*/
static
-int _fdvec_alloc ()
+int
+_fdvec_alloc()
{
- MdfdVec *nvec;
- int fdvec, i;
- MemoryContext oldcxt;
-
- if ( Md_Free >= 0 ) /* get from free list */
- {
- fdvec = Md_Free;
- Md_Free = Md_fdvec[fdvec].mdfd_nextFree;
- Assert ( Md_fdvec[fdvec].mdfd_flags == MDFD_FREE );
- Md_fdvec[fdvec].mdfd_flags = 0;
- if ( fdvec >= CurFd )
+ MdfdVec *nvec;
+ int fdvec,
+ i;
+ MemoryContext oldcxt;
+
+ if (Md_Free >= 0) /* get from free list */
{
- Assert ( fdvec == CurFd );
- CurFd++;
+ fdvec = Md_Free;
+ Md_Free = Md_fdvec[fdvec].mdfd_nextFree;
+ Assert(Md_fdvec[fdvec].mdfd_flags == MDFD_FREE);
+ Md_fdvec[fdvec].mdfd_flags = 0;
+ if (fdvec >= CurFd)
+ {
+ Assert(fdvec == CurFd);
+ CurFd++;
+ }
+ return (fdvec);
}
- return (fdvec);
- }
- /* Must allocate more room */
-
- if ( Nfds != CurFd )
- elog (FATAL, "_fdvec_alloc error");
-
- Nfds *= 2;
+ /* Must allocate more room */
+
+ if (Nfds != CurFd)
+ elog(FATAL, "_fdvec_alloc error");
- oldcxt = MemoryContextSwitchTo(MdCxt);
+ Nfds *= 2;
- nvec = (MdfdVec *) palloc(Nfds * sizeof(MdfdVec));
- memset(nvec, 0, Nfds * sizeof(MdfdVec));
- memmove(nvec, (char *) Md_fdvec, CurFd * sizeof(MdfdVec));
- pfree(Md_fdvec);
+ oldcxt = MemoryContextSwitchTo(MdCxt);
- MemoryContextSwitchTo(oldcxt);
+ nvec = (MdfdVec *) palloc(Nfds * sizeof(MdfdVec));
+ memset(nvec, 0, Nfds * sizeof(MdfdVec));
+ memmove(nvec, (char *) Md_fdvec, CurFd * sizeof(MdfdVec));
+ pfree(Md_fdvec);
- Md_fdvec = nvec;
+ MemoryContextSwitchTo(oldcxt);
- /* Set new free list */
- for (i = CurFd; i < Nfds; i++ )
- {
- Md_fdvec[i].mdfd_nextFree = i + 1;
- Md_fdvec[i].mdfd_flags = MDFD_FREE;
- }
- Md_fdvec[Nfds - 1].mdfd_nextFree = -1;
- Md_Free = CurFd + 1;
+ Md_fdvec = nvec;
- fdvec = CurFd;
- CurFd++;
- Md_fdvec[fdvec].mdfd_flags = 0;
+ /* Set new free list */
+ for (i = CurFd; i < Nfds; i++)
+ {
+ Md_fdvec[i].mdfd_nextFree = i + 1;
+ Md_fdvec[i].mdfd_flags = MDFD_FREE;
+ }
+ Md_fdvec[Nfds - 1].mdfd_nextFree = -1;
+ Md_Free = CurFd + 1;
- return (fdvec);
+ fdvec = CurFd;
+ CurFd++;
+ Md_fdvec[fdvec].mdfd_flags = 0;
+
+ return (fdvec);
}
/*
- * _fdvec_free () -- free md file descriptor vector.
+ * _fdvec_free () -- free md file descriptor vector.
*
*/
static
-void _fdvec_free (int fdvec)
+void
+_fdvec_free(int fdvec)
{
-
- Assert ( Md_Free < 0 || Md_fdvec[Md_Free].mdfd_flags == MDFD_FREE );
- Md_fdvec[fdvec].mdfd_nextFree = Md_Free;
- Md_fdvec[fdvec].mdfd_flags = MDFD_FREE;
- Md_Free = fdvec;
+
+ Assert(Md_Free < 0 || Md_fdvec[Md_Free].mdfd_flags == MDFD_FREE);
+ Md_fdvec[fdvec].mdfd_nextFree = Md_Free;
+ Md_fdvec[fdvec].mdfd_flags = MDFD_FREE;
+ Md_Free = fdvec;
}
static MdfdVec *
_mdfd_openseg(Relation reln, int segno, int oflags)
{
- MemoryContext oldcxt;
- MdfdVec *v;
- int fd;
- bool dofree;
- char *path, *fullpath;
-
- /* be sure we have enough space for the '.segno', if any */
- path = relpath(RelationGetRelationName(reln)->data);
-
- dofree = false;
- if (segno > 0) {
- dofree = true;
- fullpath = (char *) palloc(strlen(path) + 12);
- sprintf(fullpath, "%s.%d", path, segno);
- } else
- fullpath = path;
-
- /* open the file */
- fd = PathNameOpenFile(fullpath, O_RDWR|oflags, 0600);
-
- if (dofree)
- pfree(fullpath);
-
- if (fd < 0)
- return ((MdfdVec *) NULL);
-
- /* allocate an mdfdvec entry for it */
- oldcxt = MemoryContextSwitchTo(MdCxt);
- v = (MdfdVec *) palloc(sizeof(MdfdVec));
- MemoryContextSwitchTo(oldcxt);
-
- /* fill the entry */
- v->mdfd_vfd = fd;
- v->mdfd_flags = (uint16) 0;
- v->mdfd_chain = (MdfdVec *) NULL;
- v->mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
+ MemoryContext oldcxt;
+ MdfdVec *v;
+ int fd;
+ bool dofree;
+ char *path,
+ *fullpath;
+
+ /* be sure we have enough space for the '.segno', if any */
+ path = relpath(RelationGetRelationName(reln)->data);
+
+ dofree = false;
+ if (segno > 0)
+ {
+ dofree = true;
+ fullpath = (char *) palloc(strlen(path) + 12);
+ sprintf(fullpath, "%s.%d", path, segno);
+ }
+ else
+ fullpath = path;
+
+ /* open the file */
+ fd = PathNameOpenFile(fullpath, O_RDWR | oflags, 0600);
+
+ if (dofree)
+ pfree(fullpath);
+
+ if (fd < 0)
+ return ((MdfdVec *) NULL);
+
+ /* allocate an mdfdvec entry for it */
+ oldcxt = MemoryContextSwitchTo(MdCxt);
+ v = (MdfdVec *) palloc(sizeof(MdfdVec));
+ MemoryContextSwitchTo(oldcxt);
+
+ /* fill the entry */
+ v->mdfd_vfd = fd;
+ v->mdfd_flags = (uint16) 0;
+ v->mdfd_chain = (MdfdVec *) NULL;
+ v->mdfd_lstbcnt = _mdnblocks(fd, BLCKSZ);
#ifdef DIAGNOSTIC
- if (v->mdfd_lstbcnt > RELSEG_SIZE)
- elog(FATAL, "segment too big on open!");
+ if (v->mdfd_lstbcnt > RELSEG_SIZE)
+ elog(FATAL, "segment too big on open!");
#endif
- /* all done */
- return (v);
+ /* all done */
+ return (v);
}
static MdfdVec *
_mdfd_getseg(Relation reln, int blkno, int oflag)
{
- MdfdVec *v;
- int segno;
- int fd;
- int i;
-
- fd = RelationGetFile(reln);
- if (fd < 0) {
- if ((fd = mdopen(reln)) < 0)
- elog(WARN, "cannot open relation %.16s",
- RelationGetRelationName(reln));
- reln->rd_fd = fd;
- }
-
- for (v = &Md_fdvec[fd], segno = blkno / RELSEG_SIZE, i = 1;
- segno > 0;
- i++, segno--) {
-
- if (v->mdfd_chain == (MdfdVec *) NULL) {
- v->mdfd_chain = _mdfd_openseg(reln, i, oflag);
-
- if (v->mdfd_chain == (MdfdVec *) NULL)
- elog(WARN, "cannot open segment %d of relation %.16s",
- i, RelationGetRelationName(reln));
+ MdfdVec *v;
+ int segno;
+ int fd;
+ int i;
+
+ fd = RelationGetFile(reln);
+ if (fd < 0)
+ {
+ if ((fd = mdopen(reln)) < 0)
+ elog(WARN, "cannot open relation %.16s",
+ RelationGetRelationName(reln));
+ reln->rd_fd = fd;
+ }
+
+ for (v = &Md_fdvec[fd], segno = blkno / RELSEG_SIZE, i = 1;
+ segno > 0;
+ i++, segno--)
+ {
+
+ if (v->mdfd_chain == (MdfdVec *) NULL)
+ {
+ v->mdfd_chain = _mdfd_openseg(reln, i, oflag);
+
+ if (v->mdfd_chain == (MdfdVec *) NULL)
+ elog(WARN, "cannot open segment %d of relation %.16s",
+ i, RelationGetRelationName(reln));
+ }
+ v = v->mdfd_chain;
}
- v = v->mdfd_chain;
- }
- return (v);
+ return (v);
}
-static BlockNumber
+static BlockNumber
_mdnblocks(File file, Size blcksz)
{
- long len;
-
- len = FileSeek(file, 0L, SEEK_END) - 1;
- return((BlockNumber)((len < 0) ? 0 : 1 + len / blcksz));
+ long len;
+
+ len = FileSeek(file, 0L, SEEK_END) - 1;
+ return ((BlockNumber) ((len < 0) ? 0 : 1 + len / blcksz));
}