aboutsummaryrefslogtreecommitdiff
path: root/src/backend/storage/buffer/buf_init.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/storage/buffer/buf_init.c')
-rw-r--r--src/backend/storage/buffer/buf_init.c407
1 files changed, 210 insertions, 197 deletions
diff --git a/src/backend/storage/buffer/buf_init.c b/src/backend/storage/buffer/buf_init.c
index 20f8195d1e9..4ce064d6713 100644
--- a/src/backend/storage/buffer/buf_init.c
+++ b/src/backend/storage/buffer/buf_init.c
@@ -1,13 +1,13 @@
/*-------------------------------------------------------------------------
*
* buf_init.c--
- * buffer manager initialization routines
+ * buffer manager initialization routines
*
* Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.10 1997/07/28 00:54:33 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/buf_init.c,v 1.11 1997/09/07 04:48:15 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -35,98 +35,103 @@
#include "utils/dynahash.h"
#include "utils/hsearch.h"
#include "utils/memutils.h"
-#include "executor/execdebug.h" /* for NDirectFileRead */
+#include "executor/execdebug.h" /* for NDirectFileRead */
#include "catalog/catalog.h"
/*
- * if BMTRACE is defined, we trace the last 200 buffer allocations and
- * deallocations in a circular buffer in shared memory.
+ * if BMTRACE is defined, we trace the last 200 buffer allocations and
+ * deallocations in a circular buffer in shared memory.
*/
#ifdef BMTRACE
-bmtrace *TraceBuf;
-long *CurTraceBuf;
-#define BMT_LIMIT 200
-#endif /* BMTRACE */
-int ShowPinTrace = 0;
-
-int NBuffers = NDBUFS; /* NDBUFS defined in miscadmin.h */
-int Data_Descriptors;
-int Free_List_Descriptor;
-int Lookup_List_Descriptor;
-int Num_Descriptors;
-
-BufferDesc *BufferDescriptors;
-BufferBlock BufferBlocks;
+bmtrace *TraceBuf;
+long *CurTraceBuf;
+
+#define BMT_LIMIT 200
+#endif /* BMTRACE */
+int ShowPinTrace = 0;
+
+int NBuffers = NDBUFS; /* NDBUFS defined in miscadmin.h */
+int Data_Descriptors;
+int Free_List_Descriptor;
+int Lookup_List_Descriptor;
+int Num_Descriptors;
+
+BufferDesc *BufferDescriptors;
+BufferBlock BufferBlocks;
+
#ifndef HAS_TEST_AND_SET
-long *NWaitIOBackendP;
+long *NWaitIOBackendP;
+
#endif
-extern IpcSemaphoreId WaitIOSemId;
+extern IpcSemaphoreId WaitIOSemId;
+
+long *PrivateRefCount;/* also used in freelist.c */
+long *LastRefCount; /* refcounts of last ExecMain level */
+long *CommitInfoNeedsSave; /* to write buffers where we have
+ * filled in */
-long *PrivateRefCount; /* also used in freelist.c */
-long *LastRefCount; /* refcounts of last ExecMain level */
-long *CommitInfoNeedsSave; /* to write buffers where we have filled in */
- /* t_tmin (or t_tmax) */
+ /* t_tmin (or t_tmax) */
/*
* Data Structures:
- * buffers live in a freelist and a lookup data structure.
- *
+ * buffers live in a freelist and a lookup data structure.
+ *
*
* Buffer Lookup:
- * Two important notes. First, the buffer has to be
- * available for lookup BEFORE an IO begins. Otherwise
- * a second process trying to read the buffer will
- * allocate its own copy and the buffeer pool will
- * become inconsistent.
+ * Two important notes. First, the buffer has to be
+ * available for lookup BEFORE an IO begins. Otherwise
+ * a second process trying to read the buffer will
+ * allocate its own copy and the buffeer pool will
+ * become inconsistent.
*
* Buffer Replacement:
- * see freelist.c. A buffer cannot be replaced while in
- * use either by data manager or during IO.
+ * see freelist.c. A buffer cannot be replaced while in
+ * use either by data manager or during IO.
*
* WriteBufferBack:
- * currently, a buffer is only written back at the time
- * it is selected for replacement. It should
- * be done sooner if possible to reduce latency of
- * BufferAlloc(). Maybe there should be a daemon process.
+ * currently, a buffer is only written back at the time
+ * it is selected for replacement. It should
+ * be done sooner if possible to reduce latency of
+ * BufferAlloc(). Maybe there should be a daemon process.
*
* Synchronization/Locking:
*
- * BufMgrLock lock -- must be acquired before manipulating the
- * buffer queues (lookup/freelist). Must be released
- * before exit and before doing any IO.
+ * BufMgrLock lock -- must be acquired before manipulating the
+ * buffer queues (lookup/freelist). Must be released
+ * before exit and before doing any IO.
*
* IO_IN_PROGRESS -- this is a flag in the buffer descriptor.
- * It must be set when an IO is initiated and cleared at
- * the end of the IO. It is there to make sure that one
- * process doesn't start to use a buffer while another is
- * faulting it in. see IOWait/IOSignal.
+ * It must be set when an IO is initiated and cleared at
+ * the end of the IO. It is there to make sure that one
+ * process doesn't start to use a buffer while another is
+ * faulting it in. see IOWait/IOSignal.
*
- * refcount -- A buffer is pinned during IO and immediately
- * after a BufferAlloc(). A buffer is always either pinned
- * or on the freelist but never both. The buffer must be
- * released, written, or flushed before the end of
- * transaction.
+ * refcount -- A buffer is pinned during IO and immediately
+ * after a BufferAlloc(). A buffer is always either pinned
+ * or on the freelist but never both. The buffer must be
+ * released, written, or flushed before the end of
+ * transaction.
*
* PrivateRefCount -- Each buffer also has a private refcount the keeps
- * track of the number of times the buffer is pinned in the current
- * processes. This is used for two purposes, first, if we pin a
- * a buffer more than once, we only need to change the shared refcount
- * once, thus only lock the buffer pool once, second, when a transaction
- * aborts, it should only unpin the buffers exactly the number of times it
- * has pinned them, so that it will not blow away buffers of another
- * backend.
+ * track of the number of times the buffer is pinned in the current
+ * processes. This is used for two purposes, first, if we pin a
+ * a buffer more than once, we only need to change the shared refcount
+ * once, thus only lock the buffer pool once, second, when a transaction
+ * aborts, it should only unpin the buffers exactly the number of times it
+ * has pinned them, so that it will not blow away buffers of another
+ * backend.
*
*/
-SPINLOCK BufMgrLock;
+SPINLOCK BufMgrLock;
-long int ReadBufferCount;
-long int ReadLocalBufferCount;
-long int BufferHitCount;
-long int LocalBufferHitCount;
-long int BufferFlushCount;
-long int LocalBufferFlushCount;
+long int ReadBufferCount;
+long int ReadLocalBufferCount;
+long int BufferHitCount;
+long int LocalBufferHitCount;
+long int BufferFlushCount;
+long int LocalBufferFlushCount;
/*
@@ -138,111 +143,121 @@ long int LocalBufferFlushCount;
void
InitBufferPool(IPCKey key)
{
- bool foundBufs,foundDescs;
- int i;
-
- /* check padding of BufferDesc and BufferHdr */
- /* we need both checks because a sbufdesc_padded > PADDED_SBUFDESC_SIZE
- will shrink sbufdesc to the required size, which is bad */
- if (sizeof(struct sbufdesc) != PADDED_SBUFDESC_SIZE ||
- sizeof(struct sbufdesc_unpadded) > PADDED_SBUFDESC_SIZE)
- elog(WARN,"Internal error: sbufdesc does not have the proper size, "
- "contact the Postgres developers");
- if (sizeof(struct sbufdesc_unpadded) <= PADDED_SBUFDESC_SIZE/2)
- elog(WARN,"Internal error: sbufdesc is greatly over-sized, "
- "contact the Postgres developers");
-
- Data_Descriptors = NBuffers;
- Free_List_Descriptor = Data_Descriptors;
- Lookup_List_Descriptor = Data_Descriptors + 1;
- Num_Descriptors = Data_Descriptors + 1;
-
- SpinAcquire(BufMgrLock);
-
+ bool foundBufs,
+ foundDescs;
+ int i;
+
+ /* check padding of BufferDesc and BufferHdr */
+
+ /*
+ * we need both checks because a sbufdesc_padded >
+ * PADDED_SBUFDESC_SIZE will shrink sbufdesc to the required size,
+ * which is bad
+ */
+ if (sizeof(struct sbufdesc) != PADDED_SBUFDESC_SIZE ||
+ sizeof(struct sbufdesc_unpadded) > PADDED_SBUFDESC_SIZE)
+ elog(WARN, "Internal error: sbufdesc does not have the proper size, "
+ "contact the Postgres developers");
+ if (sizeof(struct sbufdesc_unpadded) <= PADDED_SBUFDESC_SIZE / 2)
+ elog(WARN, "Internal error: sbufdesc is greatly over-sized, "
+ "contact the Postgres developers");
+
+ Data_Descriptors = NBuffers;
+ Free_List_Descriptor = Data_Descriptors;
+ Lookup_List_Descriptor = Data_Descriptors + 1;
+ Num_Descriptors = Data_Descriptors + 1;
+
+ SpinAcquire(BufMgrLock);
+
#ifdef BMTRACE
- CurTraceBuf = (long *) ShmemInitStruct("Buffer trace",
- (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long),
- &foundDescs);
- if (!foundDescs)
- memset(CurTraceBuf, 0, (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long));
-
- TraceBuf = (bmtrace *) &(CurTraceBuf[1]);
+ CurTraceBuf = (long *) ShmemInitStruct("Buffer trace",
+ (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long),
+ &foundDescs);
+ if (!foundDescs)
+ memset(CurTraceBuf, 0, (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long));
+
+ TraceBuf = (bmtrace *) & (CurTraceBuf[1]);
#endif
-
- BufferDescriptors = (BufferDesc *)
- ShmemInitStruct("Buffer Descriptors",
- Num_Descriptors*sizeof(BufferDesc),&foundDescs);
-
- BufferBlocks = (BufferBlock)
- ShmemInitStruct("Buffer Blocks",
- NBuffers*BLCKSZ,&foundBufs);
-
+
+ BufferDescriptors = (BufferDesc *)
+ ShmemInitStruct("Buffer Descriptors",
+ Num_Descriptors * sizeof(BufferDesc), &foundDescs);
+
+ BufferBlocks = (BufferBlock)
+ ShmemInitStruct("Buffer Blocks",
+ NBuffers * BLCKSZ, &foundBufs);
+
#ifndef HAS_TEST_AND_SET
- {
- bool foundNWaitIO;
-
- NWaitIOBackendP = (long *)ShmemInitStruct("#Backends Waiting IO",
- sizeof(long),
- &foundNWaitIO);
- if (!foundNWaitIO)
- *NWaitIOBackendP = 0;
- }
+ {
+ bool foundNWaitIO;
+
+ NWaitIOBackendP = (long *) ShmemInitStruct("#Backends Waiting IO",
+ sizeof(long),
+ &foundNWaitIO);
+ if (!foundNWaitIO)
+ *NWaitIOBackendP = 0;
+ }
#endif
-
- if (foundDescs || foundBufs) {
-
- /* both should be present or neither */
- Assert(foundDescs && foundBufs);
-
- } else {
- BufferDesc *buf;
- unsigned long block;
-
- buf = BufferDescriptors;
- block = (unsigned long) BufferBlocks;
-
- /*
- * link the buffers into a circular, doubly-linked list to
- * initialize free list. Still don't know anything about
- * replacement strategy in this file.
- */
- for (i = 0; i < Data_Descriptors; block+=BLCKSZ,buf++,i++) {
- Assert(ShmemIsValid((unsigned long)block));
-
- buf->freeNext = i+1;
- buf->freePrev = i-1;
-
- CLEAR_BUFFERTAG(&(buf->tag));
- buf->data = MAKE_OFFSET(block);
- buf->flags = (BM_DELETED | BM_FREE | BM_VALID);
- buf->refcount = 0;
- buf->buf_id = i;
+
+ if (foundDescs || foundBufs)
+ {
+
+ /* both should be present or neither */
+ Assert(foundDescs && foundBufs);
+
+ }
+ else
+ {
+ BufferDesc *buf;
+ unsigned long block;
+
+ buf = BufferDescriptors;
+ block = (unsigned long) BufferBlocks;
+
+ /*
+ * link the buffers into a circular, doubly-linked list to
+ * initialize free list. Still don't know anything about
+ * replacement strategy in this file.
+ */
+ for (i = 0; i < Data_Descriptors; block += BLCKSZ, buf++, i++)
+ {
+ Assert(ShmemIsValid((unsigned long) block));
+
+ buf->freeNext = i + 1;
+ buf->freePrev = i - 1;
+
+ CLEAR_BUFFERTAG(&(buf->tag));
+ buf->data = MAKE_OFFSET(block);
+ buf->flags = (BM_DELETED | BM_FREE | BM_VALID);
+ buf->refcount = 0;
+ buf->buf_id = i;
#ifdef HAS_TEST_AND_SET
- S_INIT_LOCK(&(buf->io_in_progress_lock));
+ S_INIT_LOCK(&(buf->io_in_progress_lock));
#endif
+ }
+
+ /* close the circular queue */
+ BufferDescriptors[0].freePrev = Data_Descriptors - 1;
+ BufferDescriptors[Data_Descriptors - 1].freeNext = 0;
}
-
- /* close the circular queue */
- BufferDescriptors[0].freePrev = Data_Descriptors-1;
- BufferDescriptors[Data_Descriptors-1].freeNext = 0;
- }
-
- /* Init the rest of the module */
- InitBufTable();
- InitFreeList(!foundDescs);
-
- SpinRelease(BufMgrLock);
-
+
+ /* Init the rest of the module */
+ InitBufTable();
+ InitFreeList(!foundDescs);
+
+ SpinRelease(BufMgrLock);
+
#ifndef HAS_TEST_AND_SET
- {
- int status;
- WaitIOSemId = IpcSemaphoreCreate(IPCKeyGetWaitIOSemaphoreKey(key),
- 1, IPCProtection, 0, 1, &status);
- }
+ {
+ int status;
+
+ WaitIOSemId = IpcSemaphoreCreate(IPCKeyGetWaitIOSemaphoreKey(key),
+ 1, IPCProtection, 0, 1, &status);
+ }
#endif
- PrivateRefCount = (long *) calloc(NBuffers, sizeof(long));
- LastRefCount = (long *) calloc(NBuffers, sizeof(long));
- CommitInfoNeedsSave = (long *) calloc(NBuffers, sizeof(long));
+ PrivateRefCount = (long *) calloc(NBuffers, sizeof(long));
+ LastRefCount = (long *) calloc(NBuffers, sizeof(long));
+ CommitInfoNeedsSave = (long *) calloc(NBuffers, sizeof(long));
}
/* -----------------------------------------------------
@@ -255,43 +270,41 @@ InitBufferPool(IPCKey key)
int
BufferShmemSize()
{
- int size = 0;
- int nbuckets;
- int nsegs;
- int tmp;
-
- nbuckets = 1 << (int)my_log2((NBuffers - 1) / DEF_FFACTOR + 1);
- nsegs = 1 << (int)my_log2((nbuckets - 1) / DEF_SEGSIZE + 1);
-
- /* size of shmem binding table */
- size += MAXALIGN(my_log2(BTABLE_SIZE) * sizeof(void *)); /* HTAB->dir */
- size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */
- size += MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT));
- size += BUCKET_ALLOC_INCR *
- (MAXALIGN(sizeof(BUCKET_INDEX)) +
- MAXALIGN(BTABLE_KEYSIZE) +
- MAXALIGN(BTABLE_DATASIZE));
-
- /* size of buffer descriptors */
- size += MAXALIGN((NBuffers + 1) * sizeof(BufferDesc));
-
- /* size of data pages */
- size += NBuffers * MAXALIGN(BLCKSZ);
-
- /* size of buffer hash table */
- size += MAXALIGN(my_log2(NBuffers) * sizeof(void *)); /* HTAB->dir */
- size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */
- size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT));
- tmp = (int)ceil((double)NBuffers/BUCKET_ALLOC_INCR);
- size += tmp * BUCKET_ALLOC_INCR *
- (MAXALIGN(sizeof(BUCKET_INDEX)) +
- MAXALIGN(sizeof(BufferTag)) +
- MAXALIGN(sizeof(Buffer)));
-
+ int size = 0;
+ int nbuckets;
+ int nsegs;
+ int tmp;
+
+ nbuckets = 1 << (int) my_log2((NBuffers - 1) / DEF_FFACTOR + 1);
+ nsegs = 1 << (int) my_log2((nbuckets - 1) / DEF_SEGSIZE + 1);
+
+ /* size of shmem binding table */
+ size += MAXALIGN(my_log2(BTABLE_SIZE) * sizeof(void *)); /* HTAB->dir */
+ size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */
+ size += MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT));
+ size += BUCKET_ALLOC_INCR *
+ (MAXALIGN(sizeof(BUCKET_INDEX)) +
+ MAXALIGN(BTABLE_KEYSIZE) +
+ MAXALIGN(BTABLE_DATASIZE));
+
+ /* size of buffer descriptors */
+ size += MAXALIGN((NBuffers + 1) * sizeof(BufferDesc));
+
+ /* size of data pages */
+ size += NBuffers * MAXALIGN(BLCKSZ);
+
+ /* size of buffer hash table */
+ size += MAXALIGN(my_log2(NBuffers) * sizeof(void *)); /* HTAB->dir */
+ size += MAXALIGN(sizeof(HHDR)); /* HTAB->hctl */
+ size += nsegs * MAXALIGN(DEF_SEGSIZE * sizeof(SEGMENT));
+ tmp = (int) ceil((double) NBuffers / BUCKET_ALLOC_INCR);
+ size += tmp * BUCKET_ALLOC_INCR *
+ (MAXALIGN(sizeof(BUCKET_INDEX)) +
+ MAXALIGN(sizeof(BufferTag)) +
+ MAXALIGN(sizeof(Buffer)));
+
#ifdef BMTRACE
- size += (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long);
+ size += (BMT_LIMIT * sizeof(bmtrace)) + sizeof(long);
#endif
- return size;
+ return size;
}
-
-