aboutsummaryrefslogtreecommitdiff
path: root/src/backend/storage/buffer/freelist.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/storage/buffer/freelist.c')
-rw-r--r--src/backend/storage/buffer/freelist.c710
1 files changed, 325 insertions, 385 deletions
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c
index 595e4905a80..c14d4464974 100644
--- a/src/backend/storage/buffer/freelist.c
+++ b/src/backend/storage/buffer/freelist.c
@@ -3,210 +3,208 @@
* freelist.c
* routines for manipulating the buffer pool's replacement strategy.
*
+ * Note: all routines in this file assume that the BufMgrLock is held
+ * by the caller, so no synchronization is needed.
+ *
+ *
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.41 2004/02/12 15:06:56 wieck Exp $
+ * $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.42 2004/04/19 23:27:17 tgl Exp $
*
*-------------------------------------------------------------------------
*/
-/*
- * OLD COMMENTS
- *
- * Data Structures:
- * SharedFreeList is a circular queue. Notice that this
- * is a shared memory queue so the next/prev "ptrs" are
- * buffer ids, not addresses.
- *
- * Sync: all routines in this file assume that the buffer
- * semaphore has been acquired by the caller.
- */
-
#include "postgres.h"
+#include "access/xact.h"
#include "storage/buf_internals.h"
#include "storage/bufmgr.h"
-#include "storage/ipc.h"
-#include "storage/proc.h"
-#include "access/xact.h"
-#include "miscadmin.h"
-#ifndef MAX
-#define MAX(a,b) (((a) > (b)) ? (a) : (b))
-#endif
-#ifndef MIN
-#define MIN(a,b) (((a) < (b)) ? (a) : (b))
-#endif
+/* GUC variable: time in seconds between statistics reports */
+int DebugSharedBuffers = 0;
+
+/* Pointers to shared state */
static BufferStrategyControl *StrategyControl = NULL;
static BufferStrategyCDB *StrategyCDB = NULL;
-static int strategy_cdb_found;
-static int strategy_cdb_replace;
-static int strategy_get_from;
-
-int DebugSharedBuffers = 0;
-
-static bool strategy_hint_vacuum;
+/* Backend-local state about whether currently vacuuming */
+static bool strategy_hint_vacuum = false;
static TransactionId strategy_vacuum_xid;
-#define T1_TARGET StrategyControl->target_T1_size
-#define B1_LENGTH StrategyControl->listSize[STRAT_LIST_B1]
-#define T1_LENGTH StrategyControl->listSize[STRAT_LIST_T1]
-#define T2_LENGTH StrategyControl->listSize[STRAT_LIST_T2]
-#define B2_LENGTH StrategyControl->listSize[STRAT_LIST_B2]
+#define T1_TARGET (StrategyControl->target_T1_size)
+#define B1_LENGTH (StrategyControl->listSize[STRAT_LIST_B1])
+#define T1_LENGTH (StrategyControl->listSize[STRAT_LIST_T1])
+#define T2_LENGTH (StrategyControl->listSize[STRAT_LIST_T2])
+#define B2_LENGTH (StrategyControl->listSize[STRAT_LIST_B2])
/*
* Macro to remove a CDB from whichever list it currently is on
*/
#define STRAT_LIST_REMOVE(cdb) \
-{ \
- AssertMacro((cdb)->list >= 0 && (cdb)->list < STRAT_NUM_LISTS); \
- if ((cdb)->prev < 0) \
- StrategyControl->listHead[(cdb)->list] = (cdb)->next; \
- else \
- StrategyCDB[(cdb)->prev].next = (cdb)->next; \
- if ((cdb)->next < 0) \
- StrategyControl->listTail[(cdb)->list] = (cdb)->prev; \
- else \
- StrategyCDB[(cdb)->next].prev = (cdb)->prev; \
- StrategyControl->listSize[(cdb)->list]--; \
- (cdb)->list = STRAT_LIST_UNUSED; \
-}
+do { \
+ Assert((cdb)->list >= 0 && (cdb)->list < STRAT_NUM_LISTS); \
+ if ((cdb)->prev < 0) \
+ StrategyControl->listHead[(cdb)->list] = (cdb)->next; \
+ else \
+ StrategyCDB[(cdb)->prev].next = (cdb)->next; \
+ if ((cdb)->next < 0) \
+ StrategyControl->listTail[(cdb)->list] = (cdb)->prev; \
+ else \
+ StrategyCDB[(cdb)->next].prev = (cdb)->prev; \
+ StrategyControl->listSize[(cdb)->list]--; \
+ (cdb)->list = STRAT_LIST_UNUSED; \
+} while(0)
/*
* Macro to add a CDB to the tail of a list (MRU position)
*/
#define STRAT_MRU_INSERT(cdb,l) \
-{ \
- AssertMacro((cdb)->list == STRAT_LIST_UNUSED); \
- if (StrategyControl->listTail[(l)] < 0) \
- { \
- (cdb)->prev = (cdb)->next = -1; \
- StrategyControl->listHead[(l)] = \
- StrategyControl->listTail[(l)] = \
- ((cdb) - StrategyCDB); \
- } \
- else \
- { \
- (cdb)->next = -1; \
- (cdb)->prev = StrategyControl->listTail[(l)]; \
- StrategyCDB[StrategyControl->listTail[(l)]].next = \
- ((cdb) - StrategyCDB); \
- StrategyControl->listTail[(l)] = \
- ((cdb) - StrategyCDB); \
- } \
- StrategyControl->listSize[(l)]++; \
- (cdb)->list = (l); \
-}
+do { \
+ Assert((cdb)->list == STRAT_LIST_UNUSED); \
+ if (StrategyControl->listTail[(l)] < 0) \
+ { \
+ (cdb)->prev = (cdb)->next = -1; \
+ StrategyControl->listHead[(l)] = \
+ StrategyControl->listTail[(l)] = \
+ ((cdb) - StrategyCDB); \
+ } \
+ else \
+ { \
+ (cdb)->next = -1; \
+ (cdb)->prev = StrategyControl->listTail[(l)]; \
+ StrategyCDB[StrategyControl->listTail[(l)]].next = \
+ ((cdb) - StrategyCDB); \
+ StrategyControl->listTail[(l)] = \
+ ((cdb) - StrategyCDB); \
+ } \
+ StrategyControl->listSize[(l)]++; \
+ (cdb)->list = (l); \
+} while(0)
/*
* Macro to add a CDB to the head of a list (LRU position)
*/
#define STRAT_LRU_INSERT(cdb,l) \
-{ \
- AssertMacro((cdb)->list == STRAT_LIST_UNUSED); \
- if (StrategyControl->listHead[(l)] < 0) \
- { \
- (cdb)->prev = (cdb)->next = -1; \
- StrategyControl->listHead[(l)] = \
- StrategyControl->listTail[(l)] = \
- ((cdb) - StrategyCDB); \
- } \
- else \
- { \
- (cdb)->prev = -1; \
- (cdb)->next = StrategyControl->listHead[(l)]; \
- StrategyCDB[StrategyControl->listHead[(l)]].prev = \
- ((cdb) - StrategyCDB); \
- StrategyControl->listHead[(l)] = \
- ((cdb) - StrategyCDB); \
- } \
- StrategyControl->listSize[(l)]++; \
- (cdb)->list = (l); \
-}
+do { \
+ Assert((cdb)->list == STRAT_LIST_UNUSED); \
+ if (StrategyControl->listHead[(l)] < 0) \
+ { \
+ (cdb)->prev = (cdb)->next = -1; \
+ StrategyControl->listHead[(l)] = \
+ StrategyControl->listTail[(l)] = \
+ ((cdb) - StrategyCDB); \
+ } \
+ else \
+ { \
+ (cdb)->prev = -1; \
+ (cdb)->next = StrategyControl->listHead[(l)]; \
+ StrategyCDB[StrategyControl->listHead[(l)]].prev = \
+ ((cdb) - StrategyCDB); \
+ StrategyControl->listHead[(l)] = \
+ ((cdb) - StrategyCDB); \
+ } \
+ StrategyControl->listSize[(l)]++; \
+ (cdb)->list = (l); \
+} while(0)
+
+
+/*
+ * Printout for use when DebugSharedBuffers is enabled
+ */
+static void
+StrategyStatsDump(void)
+{
+ time_t now = time(NULL);
+
+ if (StrategyControl->stat_report + DebugSharedBuffers < now)
+ {
+ long all_hit, b1_hit, t1_hit, t2_hit, b2_hit;
+ int id, t1_clean, t2_clean;
+ ErrorContextCallback *errcxtold;
+ id = StrategyControl->listHead[STRAT_LIST_T1];
+ t1_clean = 0;
+ while (id >= 0)
+ {
+ if (BufferDescriptors[StrategyCDB[id].buf_id].flags & BM_DIRTY)
+ break;
+ t1_clean++;
+ id = StrategyCDB[id].next;
+ }
+ id = StrategyControl->listHead[STRAT_LIST_T2];
+ t2_clean = 0;
+ while (id >= 0)
+ {
+ if (BufferDescriptors[StrategyCDB[id].buf_id].flags & BM_DIRTY)
+ break;
+ t2_clean++;
+ id = StrategyCDB[id].next;
+ }
+
+ if (StrategyControl->num_lookup == 0)
+ {
+ all_hit = b1_hit = t1_hit = t2_hit = b2_hit = 0;
+ }
+ else
+ {
+ b1_hit = (StrategyControl->num_hit[STRAT_LIST_B1] * 100 /
+ StrategyControl->num_lookup);
+ t1_hit = (StrategyControl->num_hit[STRAT_LIST_T1] * 100 /
+ StrategyControl->num_lookup);
+ t2_hit = (StrategyControl->num_hit[STRAT_LIST_T2] * 100 /
+ StrategyControl->num_lookup);
+ b2_hit = (StrategyControl->num_hit[STRAT_LIST_B2] * 100 /
+ StrategyControl->num_lookup);
+ all_hit = b1_hit + t1_hit + t2_hit + b2_hit;
+ }
+
+ errcxtold = error_context_stack;
+ error_context_stack = NULL;
+ elog(DEBUG1, "ARC T1target=%5d B1len=%5d T1len=%5d T2len=%5d B2len=%5d",
+ T1_TARGET, B1_LENGTH, T1_LENGTH, T2_LENGTH, B2_LENGTH);
+ elog(DEBUG1, "ARC total =%4ld%% B1hit=%4ld%% T1hit=%4ld%% T2hit=%4ld%% B2hit=%4ld%%",
+ all_hit, b1_hit, t1_hit, t2_hit, b2_hit);
+ elog(DEBUG1, "ARC clean buffers at LRU T1= %5d T2= %5d",
+ t1_clean, t2_clean);
+ error_context_stack = errcxtold;
+
+ StrategyControl->num_lookup = 0;
+ StrategyControl->num_hit[STRAT_LIST_B1] = 0;
+ StrategyControl->num_hit[STRAT_LIST_T1] = 0;
+ StrategyControl->num_hit[STRAT_LIST_T2] = 0;
+ StrategyControl->num_hit[STRAT_LIST_B2] = 0;
+ StrategyControl->stat_report = now;
+ }
+}
/*
* StrategyBufferLookup
*
* Lookup a page request in the cache directory. A buffer is only
- * returned for a T1 or T2 cache hit. B1 and B2 hits are only
- * remembered here to later affect the behaviour.
+ * returned for a T1 or T2 cache hit. B1 and B2 hits are just
+ * remembered here, to possibly affect the behaviour later.
+ *
+ * recheck indicates we are rechecking after I/O wait; do not change
+ * internal status in this case.
+ *
+ * *cdb_found_index is set to the index of the found CDB, or -1 if none.
+ * This is not intended to be used by the caller, except to pass to
+ * StrategyReplaceBuffer().
*/
BufferDesc *
-StrategyBufferLookup(BufferTag *tagPtr, bool recheck)
+StrategyBufferLookup(BufferTag *tagPtr, bool recheck,
+ int *cdb_found_index)
{
BufferStrategyCDB *cdb;
- time_t now;
+ /* Optional stats printout */
if (DebugSharedBuffers > 0)
- {
- time(&now);
- if (StrategyControl->stat_report + DebugSharedBuffers < now)
- {
- long all_hit, b1_hit, t1_hit, t2_hit, b2_hit;
- int id, t1_clean, t2_clean;
- ErrorContextCallback *errcxtold;
-
- id = StrategyControl->listHead[STRAT_LIST_T1];
- t1_clean = 0;
- while (id >= 0)
- {
- if (BufferDescriptors[StrategyCDB[id].buf_id].flags & BM_DIRTY)
- break;
- t1_clean++;
- id = StrategyCDB[id].next;
- }
- id = StrategyControl->listHead[STRAT_LIST_T2];
- t2_clean = 0;
- while (id >= 0)
- {
- if (BufferDescriptors[StrategyCDB[id].buf_id].flags & BM_DIRTY)
- break;
- t2_clean++;
- id = StrategyCDB[id].next;
- }
-
- if (StrategyControl->num_lookup == 0)
- {
- all_hit = b1_hit = t1_hit = t2_hit = b2_hit = 0;
- }
- else
- {
- b1_hit = (StrategyControl->num_hit[STRAT_LIST_B1] * 100 /
- StrategyControl->num_lookup);
- t1_hit = (StrategyControl->num_hit[STRAT_LIST_T1] * 100 /
- StrategyControl->num_lookup);
- t2_hit = (StrategyControl->num_hit[STRAT_LIST_T2] * 100 /
- StrategyControl->num_lookup);
- b2_hit = (StrategyControl->num_hit[STRAT_LIST_B2] * 100 /
- StrategyControl->num_lookup);
- all_hit = b1_hit + t1_hit + t2_hit + b2_hit;
- }
-
- errcxtold = error_context_stack;
- error_context_stack = NULL;
- elog(DEBUG1, "ARC T1target=%5d B1len=%5d T1len=%5d T2len=%5d B2len=%5d",
- T1_TARGET, B1_LENGTH, T1_LENGTH, T2_LENGTH, B2_LENGTH);
- elog(DEBUG1, "ARC total =%4ld%% B1hit=%4ld%% T1hit=%4ld%% T2hit=%4ld%% B2hit=%4ld%%",
- all_hit, b1_hit, t1_hit, t2_hit, b2_hit);
- elog(DEBUG1, "ARC clean buffers at LRU T1= %5d T2= %5d",
- t1_clean, t2_clean);
- error_context_stack = errcxtold;
-
- StrategyControl->num_lookup = 0;
- StrategyControl->num_hit[STRAT_LIST_B1] = 0;
- StrategyControl->num_hit[STRAT_LIST_T1] = 0;
- StrategyControl->num_hit[STRAT_LIST_T2] = 0;
- StrategyControl->num_hit[STRAT_LIST_B2] = 0;
- StrategyControl->stat_report = now;
- }
- }
+ StrategyStatsDump();
/*
* Count lookups
@@ -216,75 +214,75 @@ StrategyBufferLookup(BufferTag *tagPtr, bool recheck)
/*
* Lookup the block in the shared hash table
*/
- strategy_cdb_found = BufTableLookup(tagPtr);
+ *cdb_found_index = BufTableLookup(tagPtr);
/*
- * Handle CDB lookup miss
+ * Done if complete CDB lookup miss
*/
- if (strategy_cdb_found < 0)
- {
- if (!recheck)
- {
- /*
- * This is an initial lookup and we have a complete
- * cache miss (block found nowhere). This means we
- * remember according to the current T1 size and the
- * target T1 size from where we take a block if we
- * need one later.
- */
- if (T1_LENGTH >= MAX(1, T1_TARGET))
- strategy_get_from = STRAT_LIST_T1;
- else
- strategy_get_from = STRAT_LIST_T2;
- }
-
- /*
- * Do the cost accounting for vacuum
- */
- if (VacuumCostActive)
- VacuumCostBalance += VacuumCostPageMiss;
-
- /* report cache miss */
+ if (*cdb_found_index < 0)
return NULL;
- }
/*
* We found a CDB
*/
- cdb = &StrategyCDB[strategy_cdb_found];
+ cdb = &StrategyCDB[*cdb_found_index];
/*
* Count hits
*/
StrategyControl->num_hit[cdb->list]++;
- if (VacuumCostActive)
- VacuumCostBalance += VacuumCostPageHit;
/*
* If this is a T2 hit, we simply move the CDB to the
* T2 MRU position and return the found buffer.
+ *
+ * A CDB in T2 cannot have t1_vacuum set, so we needn't check. However,
+ * if the current process is VACUUM then it doesn't promote to MRU.
*/
if (cdb->list == STRAT_LIST_T2)
{
- STRAT_LIST_REMOVE(cdb);
- STRAT_MRU_INSERT(cdb, STRAT_LIST_T2);
+ if (!strategy_hint_vacuum)
+ {
+ STRAT_LIST_REMOVE(cdb);
+ STRAT_MRU_INSERT(cdb, STRAT_LIST_T2);
+ }
return &BufferDescriptors[cdb->buf_id];
}
/*
- * If this is a T1 hit, we move the buffer to the T2 MRU
- * only if another transaction had read it into T1. This is
- * required because any UPDATE or DELETE in PostgreSQL does
- * multiple ReadBuffer(), first during the scan, later during
- * the heap_update() or heap_delete().
+ * If this is a T1 hit, we move the buffer to the T2 MRU only if another
+ * transaction had read it into T1, *and* neither transaction is a VACUUM.
+ * This is required because any UPDATE or DELETE in PostgreSQL does
+ * multiple ReadBuffer(), first during the scan, later during the
+ * heap_update() or heap_delete(). Otherwise move to T1 MRU. VACUUM
+ * doesn't even get to make that happen.
*/
if (cdb->list == STRAT_LIST_T1)
{
- if (!TransactionIdIsCurrentTransactionId(cdb->t1_xid))
+ if (!strategy_hint_vacuum)
{
- STRAT_LIST_REMOVE(cdb);
- STRAT_MRU_INSERT(cdb, STRAT_LIST_T2);
+ if (!cdb->t1_vacuum &&
+ !TransactionIdIsCurrentTransactionId(cdb->t1_xid))
+ {
+ STRAT_LIST_REMOVE(cdb);
+ STRAT_MRU_INSERT(cdb, STRAT_LIST_T2);
+ }
+ else
+ {
+ STRAT_LIST_REMOVE(cdb);
+ STRAT_MRU_INSERT(cdb, STRAT_LIST_T1);
+ /*
+ * If a non-VACUUM process references a page recently loaded
+ * by VACUUM, clear the stigma; the state will now be the
+ * same as if this process loaded it originally.
+ */
+ if (cdb->t1_vacuum)
+ {
+ cdb->t1_xid = GetCurrentTransactionId();
+ cdb->t1_vacuum = false;
+ }
+ }
}
return &BufferDescriptors[cdb->buf_id];
@@ -292,17 +290,19 @@ StrategyBufferLookup(BufferTag *tagPtr, bool recheck)
/*
* In the case of a recheck we don't care about B1 or B2 hits here.
- * The bufmgr does this call only to make sure noone faulted in the
- * block while we where busy flushing another. Now for this really
- * to end up as a B1 or B2 cache hit, we must have been flushing for
- * quite some time as the block not only must have been read, but
- * also traveled through the queue and evicted from the T cache again
- * already.
+ * The bufmgr does this call only to make sure no-one faulted in the
+ * block while we where busy flushing another; we don't want to doubly
+ * adjust the T1target.
+ *
+ * Now for this really to end up as a B1 or B2 cache hit, we must have
+ * been flushing for quite some time as the block not only must have been
+ * read, but also traveled through the queue and evicted from the T cache
+ * again already.
+ *
+ * VACUUM re-reads shouldn't adjust the target either.
*/
- if (recheck)
- {
+ if (recheck || strategy_hint_vacuum)
return NULL;
- }
/*
* Adjust the target size of the T1 cache depending on if this is
@@ -316,8 +316,8 @@ StrategyBufferLookup(BufferTag *tagPtr, bool recheck)
* small. Adjust the T1 target size and continue
* below.
*/
- T1_TARGET = MIN(T1_TARGET + MAX(B2_LENGTH / B1_LENGTH, 1),
- Data_Descriptors);
+ T1_TARGET = Min(T1_TARGET + Max(B2_LENGTH / B1_LENGTH, 1),
+ NBuffers);
break;
case STRAT_LIST_B2:
@@ -325,26 +325,17 @@ StrategyBufferLookup(BufferTag *tagPtr, bool recheck)
* B2 hit means that the T2 cache is probably too
* small. Adjust the T1 target size and continue
* below.
- */
- T1_TARGET = MAX(T1_TARGET - MAX(B1_LENGTH / B2_LENGTH, 1), 0);
+ */
+ T1_TARGET = Max(T1_TARGET - Max(B1_LENGTH / B2_LENGTH, 1), 0);
break;
default:
- elog(ERROR, "Buffer hash table corrupted - CDB on list %d found",
- cdb->list);
+ elog(ERROR, "buffer hash table corrupted: CDB->list = %d",
+ cdb->list);
}
/*
- * Decide where to take from if we will be out of
- * free blocks later in StrategyGetBuffer().
- */
- if (T1_LENGTH >= MAX(1, T1_TARGET))
- strategy_get_from = STRAT_LIST_T1;
- else
- strategy_get_from = STRAT_LIST_T2;
-
- /*
- * Even if we had seen the block in the past, it's data is
+ * Even though we had seen the block in the past, its data is
* not currently in memory ... cache miss to the bufmgr.
*/
return NULL;
@@ -357,18 +348,25 @@ StrategyBufferLookup(BufferTag *tagPtr, bool recheck)
* Called by the bufmgr to get the next candidate buffer to use in
* BufferAlloc(). The only hard requirement BufferAlloc() has is that
* this buffer must not currently be pinned.
+ *
+ * *cdb_replace_index is set to the index of the candidate CDB, or -1 if
+ * none (meaning we are using a previously free buffer). This is not
+ * intended to be used by the caller, except to pass to
+ * StrategyReplaceBuffer().
*/
BufferDesc *
-StrategyGetBuffer(void)
+StrategyGetBuffer(int *cdb_replace_index)
{
int cdb_id;
BufferDesc *buf;
if (StrategyControl->listFreeBuffers < 0)
{
- /* We don't have a free buffer, must take one from T1 or T2 */
-
- if (strategy_get_from == STRAT_LIST_T1)
+ /*
+ * We don't have a free buffer, must take one from T1 or T2.
+ * Choose based on trying to converge T1len to T1target.
+ */
+ if (T1_LENGTH >= Max(1, T1_TARGET))
{
/*
* We should take the first unpinned buffer from T1.
@@ -379,7 +377,7 @@ StrategyGetBuffer(void)
buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
if (buf->refcount == 0)
{
- strategy_cdb_replace = cdb_id;
+ *cdb_replace_index = cdb_id;
Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T1);
return buf;
}
@@ -387,7 +385,7 @@ StrategyGetBuffer(void)
}
/*
- * No unpinned T1 buffer found - pardon T2 cache.
+ * No unpinned T1 buffer found - try T2 cache.
*/
cdb_id = StrategyControl->listHead[STRAT_LIST_T2];
while (cdb_id >= 0)
@@ -395,7 +393,7 @@ StrategyGetBuffer(void)
buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
if (buf->refcount == 0)
{
- strategy_cdb_replace = cdb_id;
+ *cdb_replace_index = cdb_id;
Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T2);
return buf;
}
@@ -405,7 +403,7 @@ StrategyGetBuffer(void)
/*
* No unpinned buffers at all!!!
*/
- elog(ERROR, "StrategyGetBuffer(): Out of unpinned buffers");
+ elog(ERROR, "no unpinned buffers available");
}
else
{
@@ -418,7 +416,7 @@ StrategyGetBuffer(void)
buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
if (buf->refcount == 0)
{
- strategy_cdb_replace = cdb_id;
+ *cdb_replace_index = cdb_id;
Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T2);
return buf;
}
@@ -426,7 +424,7 @@ StrategyGetBuffer(void)
}
/*
- * No unpinned T2 buffer found - pardon T1 cache.
+ * No unpinned T2 buffer found - try T1 cache.
*/
cdb_id = StrategyControl->listHead[STRAT_LIST_T1];
while (cdb_id >= 0)
@@ -434,7 +432,7 @@ StrategyGetBuffer(void)
buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id];
if (buf->refcount == 0)
{
- strategy_cdb_replace = cdb_id;
+ *cdb_replace_index = cdb_id;
Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T1);
return buf;
}
@@ -444,7 +442,7 @@ StrategyGetBuffer(void)
/*
* No unpinned buffers at all!!!
*/
- elog(ERROR, "StrategyGetBuffer(): Out of unpinned buffers");
+ elog(ERROR, "no unpinned buffers available");
}
}
else
@@ -459,13 +457,13 @@ StrategyGetBuffer(void)
* that there will never be any reason to recheck. Otherwise
* we would leak shared buffers here!
*/
- strategy_cdb_replace = -1;
+ *cdb_replace_index = -1;
buf = &BufferDescriptors[StrategyControl->listFreeBuffers];
StrategyControl->listFreeBuffers = buf->bufNext;
buf->bufNext = -1;
- /* Buffer of freelist cannot be pinned */
+ /* Buffer in freelist cannot be pinned */
Assert(buf->refcount == 0);
Assert(!(buf->flags & BM_DIRTY));
@@ -480,54 +478,59 @@ StrategyGetBuffer(void)
/*
* StrategyReplaceBuffer
*
- * Called by the buffer manager to inform us that he possibly flushed
- * a buffer and is now about to replace the content. Prior to this call,
+ * Called by the buffer manager to inform us that he flushed a buffer
+ * and is now about to replace the content. Prior to this call,
* the cache algorithm still reports the buffer as in the cache. After
* this call we report the new block, even if IO might still need to
- * start.
+ * be done to bring in the new content.
+ *
+ * cdb_found_index and cdb_replace_index must be the auxiliary values
+ * returned by previous calls to StrategyBufferLookup and StrategyGetBuffer.
*/
void
-StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum)
+StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag,
+ int cdb_found_index, int cdb_replace_index)
{
BufferStrategyCDB *cdb_found;
BufferStrategyCDB *cdb_replace;
- if (strategy_cdb_found >= 0)
+ if (cdb_found_index >= 0)
{
- /* This was a ghost buffer cache hit (B1 or B2) */
- cdb_found = &StrategyCDB[strategy_cdb_found];
+ /* This must have been a ghost buffer cache hit (B1 or B2) */
+ cdb_found = &StrategyCDB[cdb_found_index];
/* Assert that the buffer remembered in cdb_found is the one */
/* the buffer manager is currently faulting in */
- Assert(BUFFERTAG_EQUALS(&(cdb_found->buf_tag), rnode, blockNum));
+ Assert(BUFFERTAGS_EQUAL(&(cdb_found->buf_tag), newTag));
- if (strategy_cdb_replace >= 0)
+ if (cdb_replace_index >= 0)
{
/* We are satisfying it with an evicted T buffer */
- cdb_replace = &StrategyCDB[strategy_cdb_replace];
+ cdb_replace = &StrategyCDB[cdb_replace_index];
/* Assert that the buffer remembered in cdb_replace is */
/* the one the buffer manager has just evicted */
Assert(cdb_replace->list == STRAT_LIST_T1 ||
- cdb_replace->list == STRAT_LIST_T2);
+ cdb_replace->list == STRAT_LIST_T2);
Assert(cdb_replace->buf_id == buf->buf_id);
Assert(BUFFERTAGS_EQUAL(&(cdb_replace->buf_tag), &(buf->tag)));
- /* If this was a T1 buffer faulted in by vacuum, just */
- /* do not cause the CDB end up in the B1 list, so that */
- /* the vacuum scan does not affect T1_target adjusting */
- if (strategy_hint_vacuum)
+ /*
+ * Under normal circumstances we move the evicted T list entry to
+ * the corresponding B list. However, T1 entries that exist only
+ * because of VACUUM are just thrown into the unused list instead.
+ * We don't expect them to be touched again by the VACUUM, and if
+ * we put them into B1 then VACUUM would skew T1_target adjusting.
+ */
+ if (cdb_replace->t1_vacuum)
{
BufTableDelete(&(cdb_replace->buf_tag));
STRAT_LIST_REMOVE(cdb_replace);
- cdb_replace->buf_id = -1;
cdb_replace->next = StrategyControl->listUnusedCDB;
- StrategyControl->listUnusedCDB = strategy_cdb_replace;
+ StrategyControl->listUnusedCDB = cdb_replace_index;
}
else
{
- /* Under normal circumstances move the evicted */
- /* T list entry to it's corresponding B list */
if (cdb_replace->list == STRAT_LIST_T1)
{
STRAT_LIST_REMOVE(cdb_replace);
@@ -539,25 +542,26 @@ StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum)
STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2);
}
}
- /* And clear it's block reference */
+ /* And clear its block reference */
cdb_replace->buf_id = -1;
}
else
{
- /* or we satisfy it with an unused buffer */
+ /* We are satisfying it with an unused buffer */
}
- /* Now the found B CDB get's the buffer and is moved to T2 */
+ /* Now the found B CDB gets the buffer and is moved to T2 */
cdb_found->buf_id = buf->buf_id;
STRAT_LIST_REMOVE(cdb_found);
STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T2);
}
else
{
- /* This was a complete cache miss, so we need to create */
- /* a new CDB. The goal is to keep T1len+B1len <= c */
-
- if (B1_LENGTH > 0 && (T1_LENGTH + B1_LENGTH) >= Data_Descriptors)
+ /*
+ * This was a complete cache miss, so we need to create
+ * a new CDB. The goal is to keep T1len+B1len <= c.
+ */
+ if (B1_LENGTH > 0 && (T1_LENGTH + B1_LENGTH) >= NBuffers)
{
/* So if B1 isn't empty and T1len+B1len >= c we take B1-LRU */
cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B1]];
@@ -587,18 +591,20 @@ StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum)
}
}
- /* Set the CDB's buf_tag and insert the hash key */
- INIT_BUFFERTAG(&(cdb_found->buf_tag), rnode, blockNum);
+ /* Set the CDB's buf_tag and insert it into the hash table */
+ cdb_found->buf_tag = *newTag;
BufTableInsert(&(cdb_found->buf_tag), (cdb_found - StrategyCDB));
- if (strategy_cdb_replace >= 0)
+ if (cdb_replace_index >= 0)
{
- /* The buffer was formerly in a T list, move it's CDB
- * to the corresponding B list */
- cdb_replace = &StrategyCDB[strategy_cdb_replace];
+ /*
+ * The buffer was formerly in a T list, move its CDB
+ * to the corresponding B list
+ */
+ cdb_replace = &StrategyCDB[cdb_replace_index];
Assert(cdb_replace->list == STRAT_LIST_T1 ||
- cdb_replace->list == STRAT_LIST_T2);
+ cdb_replace->list == STRAT_LIST_T2);
Assert(cdb_replace->buf_id == buf->buf_id);
Assert(BUFFERTAGS_EQUAL(&(cdb_replace->buf_tag), &(buf->tag)));
@@ -612,32 +618,32 @@ StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum)
STRAT_LIST_REMOVE(cdb_replace);
STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2);
}
- /* And clear it's block reference */
+ /* And clear its block reference */
cdb_replace->buf_id = -1;
}
else
{
- /* or we satisfy it with an unused buffer */
+ /* We are satisfying it with an unused buffer */
}
/* Assign the buffer id to the new CDB */
cdb_found->buf_id = buf->buf_id;
/*
- * Specialized VACUUM optimization. If this "complete cache miss"
- * happened because vacuum needed the page, we want it later on
- * to be placed at the LRU instead of the MRU position of T1.
+ * Specialized VACUUM optimization. If this complete cache miss
+ * happened because vacuum needed the page, we place it at the LRU
+ * position of T1; normally it goes at the MRU position.
*/
if (strategy_hint_vacuum)
{
- if (strategy_vacuum_xid != GetCurrentTransactionId())
+ if (TransactionIdIsCurrentTransactionId(strategy_vacuum_xid))
+ STRAT_LRU_INSERT(cdb_found, STRAT_LIST_T1);
+ else
{
+ /* VACUUM must have been aborted by error, reset flag */
strategy_hint_vacuum = false;
STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1);
}
- else
- STRAT_LRU_INSERT(cdb_found, STRAT_LIST_T1);
-
}
else
STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1);
@@ -645,8 +651,10 @@ StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum)
/*
* Remember the Xid when this buffer went onto T1 to avoid
* a single UPDATE promoting a newcomer straight into T2.
+ * Also remember if it was loaded for VACUUM.
*/
cdb_found->t1_xid = GetCurrentTransactionId();
+ cdb_found->t1_vacuum = strategy_hint_vacuum;
}
}
@@ -673,8 +681,7 @@ StrategyInvalidateBuffer(BufferDesc *buf)
*/
cdb_id = BufTableLookup(&(buf->tag));
if (cdb_id < 0)
- elog(ERROR, "StrategyInvalidateBuffer() buffer %d not in directory",
- buf->buf_id);
+ elog(ERROR, "buffer %d not in buffer hash table", buf->buf_id);
cdb = &StrategyCDB[cdb_id];
/*
@@ -694,7 +701,7 @@ StrategyInvalidateBuffer(BufferDesc *buf)
StrategyControl->listUnusedCDB = cdb_id;
/*
- * Clear out the buffers tag and add it to the list of
+ * Clear out the buffer's tag and add it to the list of
* currently unused buffers.
*/
CLEAR_BUFFERTAG(&(buf->tag));
@@ -702,7 +709,9 @@ StrategyInvalidateBuffer(BufferDesc *buf)
StrategyControl->listFreeBuffers = buf->buf_id;
}
-
+/*
+ * StrategyHintVacuum -- tell us whether VACUUM is active
+ */
void
StrategyHintVacuum(bool vacuum_active)
{
@@ -710,9 +719,24 @@ StrategyHintVacuum(bool vacuum_active)
strategy_vacuum_xid = GetCurrentTransactionId();
}
-
+/*
+ * StrategyDirtyBufferList
+ *
+ * Returns a list of dirty buffers, in priority order for writing.
+ * Note that the caller may choose not to write them all.
+ *
+ * The caller must beware of the possibility that a buffer is no longer dirty,
+ * or even contains a different page, by the time he reaches it. If it no
+ * longer contains the same page it need not be written, even if it is (again)
+ * dirty.
+ *
+ * Buffer pointers are stored into buffers[], and corresponding tags into
+ * buftags[], both of size max_buffers. The function returns the number of
+ * buffer IDs stored.
+ */
int
-StrategyDirtyBufferList(int *buffer_list, int max_buffers)
+StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags,
+ int max_buffers)
{
int num_buffer_dirty = 0;
int cdb_id_t1;
@@ -724,13 +748,13 @@ StrategyDirtyBufferList(int *buffer_list, int max_buffers)
* Traverse the T1 and T2 list LRU to MRU in "parallel"
* and add all dirty buffers found in that order to the list.
* The ARC strategy keeps all used buffers including pinned ones
- * in the T1 or T2 list. So we cannot loose any dirty buffers.
+ * in the T1 or T2 list. So we cannot miss any dirty buffers.
*/
cdb_id_t1 = StrategyControl->listHead[STRAT_LIST_T1];
cdb_id_t2 = StrategyControl->listHead[STRAT_LIST_T2];
while ((cdb_id_t1 >= 0 || cdb_id_t2 >= 0) &&
- num_buffer_dirty < max_buffers)
+ num_buffer_dirty < max_buffers)
{
if (cdb_id_t1 >= 0)
{
@@ -741,7 +765,9 @@ StrategyDirtyBufferList(int *buffer_list, int max_buffers)
{
if ((buf->flags & BM_DIRTY) || (buf->cntxDirty))
{
- buffer_list[num_buffer_dirty++] = buf_id;
+ buffers[num_buffer_dirty] = buf;
+ buftags[num_buffer_dirty] = buf->tag;
+ num_buffer_dirty++;
}
}
@@ -757,7 +783,9 @@ StrategyDirtyBufferList(int *buffer_list, int max_buffers)
{
if ((buf->flags & BM_DIRTY) || (buf->cntxDirty))
{
- buffer_list[num_buffer_dirty++] = buf_id;
+ buffers[num_buffer_dirty] = buf;
+ buftags[num_buffer_dirty] = buf->tag;
+ num_buffer_dirty++;
}
}
@@ -785,16 +813,16 @@ StrategyInitialize(bool init)
/*
* Initialize the shared CDB lookup hashtable
*/
- InitBufTable(Data_Descriptors * 2);
+ InitBufTable(NBuffers * 2);
/*
* Get or create the shared strategy control block and the CDB's
*/
StrategyControl = (BufferStrategyControl *)
- ShmemInitStruct("Buffer Strategy Status",
- sizeof(BufferStrategyControl) +
- sizeof(BufferStrategyCDB) * (Data_Descriptors * 2 - 1),
- &found);
+ ShmemInitStruct("Buffer Strategy Status",
+ sizeof(BufferStrategyControl) +
+ sizeof(BufferStrategyCDB) * (NBuffers * 2 - 1),
+ &found);
StrategyCDB = &(StrategyControl->cdb[0]);
if (!found)
@@ -805,8 +833,8 @@ StrategyInitialize(bool init)
Assert(init);
/*
- * Grab the whole linked list of free buffers for our
- * strategy
+ * Grab the whole linked list of free buffers for our strategy.
+ * We assume it was previously set up by InitBufferPool().
*/
StrategyControl->listFreeBuffers = 0;
@@ -814,7 +842,7 @@ StrategyInitialize(bool init)
* We start off with a target T1 list size of
* half the available cache blocks.
*/
- StrategyControl->target_T1_size = Data_Descriptors / 2;
+ StrategyControl->target_T1_size = NBuffers / 2;
/*
* Initialize B1, T1, T2 and B2 lists to be empty
@@ -832,14 +860,14 @@ StrategyInitialize(bool init)
/*
* All CDB's are linked as the listUnusedCDB
*/
- for (i = 0; i < Data_Descriptors * 2; i++)
+ for (i = 0; i < NBuffers * 2; i++)
{
StrategyCDB[i].next = i + 1;
StrategyCDB[i].list = STRAT_LIST_UNUSED;
CLEAR_BUFFERTAG(&(StrategyCDB[i].buf_tag));
StrategyCDB[i].buf_id = -1;
}
- StrategyCDB[Data_Descriptors * 2 - 1].next = -1;
+ StrategyCDB[NBuffers * 2 - 1].next = -1;
StrategyControl->listUnusedCDB = 0;
}
else
@@ -847,91 +875,3 @@ StrategyInitialize(bool init)
Assert(!init);
}
}
-
-
-#undef PinBuffer
-
-/*
- * PinBuffer -- make buffer unavailable for replacement.
- *
- * This should be applied only to shared buffers, never local ones.
- * Bufmgr lock must be held by caller.
- */
-void
-PinBuffer(BufferDesc *buf)
-{
- int b = BufferDescriptorGetBuffer(buf) - 1;
-
- if (PrivateRefCount[b] == 0)
- buf->refcount++;
- PrivateRefCount[b]++;
- Assert(PrivateRefCount[b] > 0);
-}
-
-#ifdef NOT_USED
-void
-PinBuffer_Debug(char *file, int line, BufferDesc *buf)
-{
- PinBuffer(buf);
- if (ShowPinTrace)
- {
- Buffer buffer = BufferDescriptorGetBuffer(buf);
-
- fprintf(stderr, "PIN(Pin) %ld relname = %s, blockNum = %d, \
-refcount = %ld, file: %s, line: %d\n",
- buffer, buf->blind.relname, buf->tag.blockNum,
- PrivateRefCount[buffer - 1], file, line);
- }
-}
-#endif
-
-#undef UnpinBuffer
-
-/*
- * UnpinBuffer -- make buffer available for replacement.
- *
- * This should be applied only to shared buffers, never local ones.
- * Bufmgr lock must be held by caller.
- */
-void
-UnpinBuffer(BufferDesc *buf)
-{
- int b = BufferDescriptorGetBuffer(buf) - 1;
-
- Assert(buf->refcount > 0);
- Assert(PrivateRefCount[b] > 0);
- PrivateRefCount[b]--;
- if (PrivateRefCount[b] == 0)
- buf->refcount--;
-
- if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 &&
- buf->refcount == 1)
- {
- /* we just released the last pin other than the waiter's */
- buf->flags &= ~BM_PIN_COUNT_WAITER;
- ProcSendSignal(buf->wait_backend_id);
- }
- else
- {
- /* do nothing */
- }
-}
-
-#ifdef NOT_USED
-void
-UnpinBuffer_Debug(char *file, int line, BufferDesc *buf)
-{
- UnpinBuffer(buf);
- if (ShowPinTrace)
- {
- Buffer buffer = BufferDescriptorGetBuffer(buf);
-
- fprintf(stderr, "UNPIN(Unpin) %ld relname = %s, blockNum = %d, \
-refcount = %ld, file: %s, line: %d\n",
- buffer, buf->blind.relname, buf->tag.blockNum,
- PrivateRefCount[buffer - 1], file, line);
- }
-}
-#endif
-
-