diff options
Diffstat (limited to 'src/backend/storage/buffer/freelist.c')
-rw-r--r-- | src/backend/storage/buffer/freelist.c | 710 |
1 files changed, 325 insertions, 385 deletions
diff --git a/src/backend/storage/buffer/freelist.c b/src/backend/storage/buffer/freelist.c index 595e4905a80..c14d4464974 100644 --- a/src/backend/storage/buffer/freelist.c +++ b/src/backend/storage/buffer/freelist.c @@ -3,210 +3,208 @@ * freelist.c * routines for manipulating the buffer pool's replacement strategy. * + * Note: all routines in this file assume that the BufMgrLock is held + * by the caller, so no synchronization is needed. + * + * * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.41 2004/02/12 15:06:56 wieck Exp $ + * $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.42 2004/04/19 23:27:17 tgl Exp $ * *------------------------------------------------------------------------- */ -/* - * OLD COMMENTS - * - * Data Structures: - * SharedFreeList is a circular queue. Notice that this - * is a shared memory queue so the next/prev "ptrs" are - * buffer ids, not addresses. - * - * Sync: all routines in this file assume that the buffer - * semaphore has been acquired by the caller. - */ - #include "postgres.h" +#include "access/xact.h" #include "storage/buf_internals.h" #include "storage/bufmgr.h" -#include "storage/ipc.h" -#include "storage/proc.h" -#include "access/xact.h" -#include "miscadmin.h" -#ifndef MAX -#define MAX(a,b) (((a) > (b)) ? (a) : (b)) -#endif -#ifndef MIN -#define MIN(a,b) (((a) < (b)) ? (a) : (b)) -#endif +/* GUC variable: time in seconds between statistics reports */ +int DebugSharedBuffers = 0; + +/* Pointers to shared state */ static BufferStrategyControl *StrategyControl = NULL; static BufferStrategyCDB *StrategyCDB = NULL; -static int strategy_cdb_found; -static int strategy_cdb_replace; -static int strategy_get_from; - -int DebugSharedBuffers = 0; - -static bool strategy_hint_vacuum; +/* Backend-local state about whether currently vacuuming */ +static bool strategy_hint_vacuum = false; static TransactionId strategy_vacuum_xid; -#define T1_TARGET StrategyControl->target_T1_size -#define B1_LENGTH StrategyControl->listSize[STRAT_LIST_B1] -#define T1_LENGTH StrategyControl->listSize[STRAT_LIST_T1] -#define T2_LENGTH StrategyControl->listSize[STRAT_LIST_T2] -#define B2_LENGTH StrategyControl->listSize[STRAT_LIST_B2] +#define T1_TARGET (StrategyControl->target_T1_size) +#define B1_LENGTH (StrategyControl->listSize[STRAT_LIST_B1]) +#define T1_LENGTH (StrategyControl->listSize[STRAT_LIST_T1]) +#define T2_LENGTH (StrategyControl->listSize[STRAT_LIST_T2]) +#define B2_LENGTH (StrategyControl->listSize[STRAT_LIST_B2]) /* * Macro to remove a CDB from whichever list it currently is on */ #define STRAT_LIST_REMOVE(cdb) \ -{ \ - AssertMacro((cdb)->list >= 0 && (cdb)->list < STRAT_NUM_LISTS); \ - if ((cdb)->prev < 0) \ - StrategyControl->listHead[(cdb)->list] = (cdb)->next; \ - else \ - StrategyCDB[(cdb)->prev].next = (cdb)->next; \ - if ((cdb)->next < 0) \ - StrategyControl->listTail[(cdb)->list] = (cdb)->prev; \ - else \ - StrategyCDB[(cdb)->next].prev = (cdb)->prev; \ - StrategyControl->listSize[(cdb)->list]--; \ - (cdb)->list = STRAT_LIST_UNUSED; \ -} +do { \ + Assert((cdb)->list >= 0 && (cdb)->list < STRAT_NUM_LISTS); \ + if ((cdb)->prev < 0) \ + StrategyControl->listHead[(cdb)->list] = (cdb)->next; \ + else \ + StrategyCDB[(cdb)->prev].next = (cdb)->next; \ + if ((cdb)->next < 0) \ + StrategyControl->listTail[(cdb)->list] = (cdb)->prev; \ + else \ + StrategyCDB[(cdb)->next].prev = (cdb)->prev; \ + StrategyControl->listSize[(cdb)->list]--; \ + (cdb)->list = STRAT_LIST_UNUSED; \ +} while(0) /* * Macro to add a CDB to the tail of a list (MRU position) */ #define STRAT_MRU_INSERT(cdb,l) \ -{ \ - AssertMacro((cdb)->list == STRAT_LIST_UNUSED); \ - if (StrategyControl->listTail[(l)] < 0) \ - { \ - (cdb)->prev = (cdb)->next = -1; \ - StrategyControl->listHead[(l)] = \ - StrategyControl->listTail[(l)] = \ - ((cdb) - StrategyCDB); \ - } \ - else \ - { \ - (cdb)->next = -1; \ - (cdb)->prev = StrategyControl->listTail[(l)]; \ - StrategyCDB[StrategyControl->listTail[(l)]].next = \ - ((cdb) - StrategyCDB); \ - StrategyControl->listTail[(l)] = \ - ((cdb) - StrategyCDB); \ - } \ - StrategyControl->listSize[(l)]++; \ - (cdb)->list = (l); \ -} +do { \ + Assert((cdb)->list == STRAT_LIST_UNUSED); \ + if (StrategyControl->listTail[(l)] < 0) \ + { \ + (cdb)->prev = (cdb)->next = -1; \ + StrategyControl->listHead[(l)] = \ + StrategyControl->listTail[(l)] = \ + ((cdb) - StrategyCDB); \ + } \ + else \ + { \ + (cdb)->next = -1; \ + (cdb)->prev = StrategyControl->listTail[(l)]; \ + StrategyCDB[StrategyControl->listTail[(l)]].next = \ + ((cdb) - StrategyCDB); \ + StrategyControl->listTail[(l)] = \ + ((cdb) - StrategyCDB); \ + } \ + StrategyControl->listSize[(l)]++; \ + (cdb)->list = (l); \ +} while(0) /* * Macro to add a CDB to the head of a list (LRU position) */ #define STRAT_LRU_INSERT(cdb,l) \ -{ \ - AssertMacro((cdb)->list == STRAT_LIST_UNUSED); \ - if (StrategyControl->listHead[(l)] < 0) \ - { \ - (cdb)->prev = (cdb)->next = -1; \ - StrategyControl->listHead[(l)] = \ - StrategyControl->listTail[(l)] = \ - ((cdb) - StrategyCDB); \ - } \ - else \ - { \ - (cdb)->prev = -1; \ - (cdb)->next = StrategyControl->listHead[(l)]; \ - StrategyCDB[StrategyControl->listHead[(l)]].prev = \ - ((cdb) - StrategyCDB); \ - StrategyControl->listHead[(l)] = \ - ((cdb) - StrategyCDB); \ - } \ - StrategyControl->listSize[(l)]++; \ - (cdb)->list = (l); \ -} +do { \ + Assert((cdb)->list == STRAT_LIST_UNUSED); \ + if (StrategyControl->listHead[(l)] < 0) \ + { \ + (cdb)->prev = (cdb)->next = -1; \ + StrategyControl->listHead[(l)] = \ + StrategyControl->listTail[(l)] = \ + ((cdb) - StrategyCDB); \ + } \ + else \ + { \ + (cdb)->prev = -1; \ + (cdb)->next = StrategyControl->listHead[(l)]; \ + StrategyCDB[StrategyControl->listHead[(l)]].prev = \ + ((cdb) - StrategyCDB); \ + StrategyControl->listHead[(l)] = \ + ((cdb) - StrategyCDB); \ + } \ + StrategyControl->listSize[(l)]++; \ + (cdb)->list = (l); \ +} while(0) + + +/* + * Printout for use when DebugSharedBuffers is enabled + */ +static void +StrategyStatsDump(void) +{ + time_t now = time(NULL); + + if (StrategyControl->stat_report + DebugSharedBuffers < now) + { + long all_hit, b1_hit, t1_hit, t2_hit, b2_hit; + int id, t1_clean, t2_clean; + ErrorContextCallback *errcxtold; + id = StrategyControl->listHead[STRAT_LIST_T1]; + t1_clean = 0; + while (id >= 0) + { + if (BufferDescriptors[StrategyCDB[id].buf_id].flags & BM_DIRTY) + break; + t1_clean++; + id = StrategyCDB[id].next; + } + id = StrategyControl->listHead[STRAT_LIST_T2]; + t2_clean = 0; + while (id >= 0) + { + if (BufferDescriptors[StrategyCDB[id].buf_id].flags & BM_DIRTY) + break; + t2_clean++; + id = StrategyCDB[id].next; + } + + if (StrategyControl->num_lookup == 0) + { + all_hit = b1_hit = t1_hit = t2_hit = b2_hit = 0; + } + else + { + b1_hit = (StrategyControl->num_hit[STRAT_LIST_B1] * 100 / + StrategyControl->num_lookup); + t1_hit = (StrategyControl->num_hit[STRAT_LIST_T1] * 100 / + StrategyControl->num_lookup); + t2_hit = (StrategyControl->num_hit[STRAT_LIST_T2] * 100 / + StrategyControl->num_lookup); + b2_hit = (StrategyControl->num_hit[STRAT_LIST_B2] * 100 / + StrategyControl->num_lookup); + all_hit = b1_hit + t1_hit + t2_hit + b2_hit; + } + + errcxtold = error_context_stack; + error_context_stack = NULL; + elog(DEBUG1, "ARC T1target=%5d B1len=%5d T1len=%5d T2len=%5d B2len=%5d", + T1_TARGET, B1_LENGTH, T1_LENGTH, T2_LENGTH, B2_LENGTH); + elog(DEBUG1, "ARC total =%4ld%% B1hit=%4ld%% T1hit=%4ld%% T2hit=%4ld%% B2hit=%4ld%%", + all_hit, b1_hit, t1_hit, t2_hit, b2_hit); + elog(DEBUG1, "ARC clean buffers at LRU T1= %5d T2= %5d", + t1_clean, t2_clean); + error_context_stack = errcxtold; + + StrategyControl->num_lookup = 0; + StrategyControl->num_hit[STRAT_LIST_B1] = 0; + StrategyControl->num_hit[STRAT_LIST_T1] = 0; + StrategyControl->num_hit[STRAT_LIST_T2] = 0; + StrategyControl->num_hit[STRAT_LIST_B2] = 0; + StrategyControl->stat_report = now; + } +} /* * StrategyBufferLookup * * Lookup a page request in the cache directory. A buffer is only - * returned for a T1 or T2 cache hit. B1 and B2 hits are only - * remembered here to later affect the behaviour. + * returned for a T1 or T2 cache hit. B1 and B2 hits are just + * remembered here, to possibly affect the behaviour later. + * + * recheck indicates we are rechecking after I/O wait; do not change + * internal status in this case. + * + * *cdb_found_index is set to the index of the found CDB, or -1 if none. + * This is not intended to be used by the caller, except to pass to + * StrategyReplaceBuffer(). */ BufferDesc * -StrategyBufferLookup(BufferTag *tagPtr, bool recheck) +StrategyBufferLookup(BufferTag *tagPtr, bool recheck, + int *cdb_found_index) { BufferStrategyCDB *cdb; - time_t now; + /* Optional stats printout */ if (DebugSharedBuffers > 0) - { - time(&now); - if (StrategyControl->stat_report + DebugSharedBuffers < now) - { - long all_hit, b1_hit, t1_hit, t2_hit, b2_hit; - int id, t1_clean, t2_clean; - ErrorContextCallback *errcxtold; - - id = StrategyControl->listHead[STRAT_LIST_T1]; - t1_clean = 0; - while (id >= 0) - { - if (BufferDescriptors[StrategyCDB[id].buf_id].flags & BM_DIRTY) - break; - t1_clean++; - id = StrategyCDB[id].next; - } - id = StrategyControl->listHead[STRAT_LIST_T2]; - t2_clean = 0; - while (id >= 0) - { - if (BufferDescriptors[StrategyCDB[id].buf_id].flags & BM_DIRTY) - break; - t2_clean++; - id = StrategyCDB[id].next; - } - - if (StrategyControl->num_lookup == 0) - { - all_hit = b1_hit = t1_hit = t2_hit = b2_hit = 0; - } - else - { - b1_hit = (StrategyControl->num_hit[STRAT_LIST_B1] * 100 / - StrategyControl->num_lookup); - t1_hit = (StrategyControl->num_hit[STRAT_LIST_T1] * 100 / - StrategyControl->num_lookup); - t2_hit = (StrategyControl->num_hit[STRAT_LIST_T2] * 100 / - StrategyControl->num_lookup); - b2_hit = (StrategyControl->num_hit[STRAT_LIST_B2] * 100 / - StrategyControl->num_lookup); - all_hit = b1_hit + t1_hit + t2_hit + b2_hit; - } - - errcxtold = error_context_stack; - error_context_stack = NULL; - elog(DEBUG1, "ARC T1target=%5d B1len=%5d T1len=%5d T2len=%5d B2len=%5d", - T1_TARGET, B1_LENGTH, T1_LENGTH, T2_LENGTH, B2_LENGTH); - elog(DEBUG1, "ARC total =%4ld%% B1hit=%4ld%% T1hit=%4ld%% T2hit=%4ld%% B2hit=%4ld%%", - all_hit, b1_hit, t1_hit, t2_hit, b2_hit); - elog(DEBUG1, "ARC clean buffers at LRU T1= %5d T2= %5d", - t1_clean, t2_clean); - error_context_stack = errcxtold; - - StrategyControl->num_lookup = 0; - StrategyControl->num_hit[STRAT_LIST_B1] = 0; - StrategyControl->num_hit[STRAT_LIST_T1] = 0; - StrategyControl->num_hit[STRAT_LIST_T2] = 0; - StrategyControl->num_hit[STRAT_LIST_B2] = 0; - StrategyControl->stat_report = now; - } - } + StrategyStatsDump(); /* * Count lookups @@ -216,75 +214,75 @@ StrategyBufferLookup(BufferTag *tagPtr, bool recheck) /* * Lookup the block in the shared hash table */ - strategy_cdb_found = BufTableLookup(tagPtr); + *cdb_found_index = BufTableLookup(tagPtr); /* - * Handle CDB lookup miss + * Done if complete CDB lookup miss */ - if (strategy_cdb_found < 0) - { - if (!recheck) - { - /* - * This is an initial lookup and we have a complete - * cache miss (block found nowhere). This means we - * remember according to the current T1 size and the - * target T1 size from where we take a block if we - * need one later. - */ - if (T1_LENGTH >= MAX(1, T1_TARGET)) - strategy_get_from = STRAT_LIST_T1; - else - strategy_get_from = STRAT_LIST_T2; - } - - /* - * Do the cost accounting for vacuum - */ - if (VacuumCostActive) - VacuumCostBalance += VacuumCostPageMiss; - - /* report cache miss */ + if (*cdb_found_index < 0) return NULL; - } /* * We found a CDB */ - cdb = &StrategyCDB[strategy_cdb_found]; + cdb = &StrategyCDB[*cdb_found_index]; /* * Count hits */ StrategyControl->num_hit[cdb->list]++; - if (VacuumCostActive) - VacuumCostBalance += VacuumCostPageHit; /* * If this is a T2 hit, we simply move the CDB to the * T2 MRU position and return the found buffer. + * + * A CDB in T2 cannot have t1_vacuum set, so we needn't check. However, + * if the current process is VACUUM then it doesn't promote to MRU. */ if (cdb->list == STRAT_LIST_T2) { - STRAT_LIST_REMOVE(cdb); - STRAT_MRU_INSERT(cdb, STRAT_LIST_T2); + if (!strategy_hint_vacuum) + { + STRAT_LIST_REMOVE(cdb); + STRAT_MRU_INSERT(cdb, STRAT_LIST_T2); + } return &BufferDescriptors[cdb->buf_id]; } /* - * If this is a T1 hit, we move the buffer to the T2 MRU - * only if another transaction had read it into T1. This is - * required because any UPDATE or DELETE in PostgreSQL does - * multiple ReadBuffer(), first during the scan, later during - * the heap_update() or heap_delete(). + * If this is a T1 hit, we move the buffer to the T2 MRU only if another + * transaction had read it into T1, *and* neither transaction is a VACUUM. + * This is required because any UPDATE or DELETE in PostgreSQL does + * multiple ReadBuffer(), first during the scan, later during the + * heap_update() or heap_delete(). Otherwise move to T1 MRU. VACUUM + * doesn't even get to make that happen. */ if (cdb->list == STRAT_LIST_T1) { - if (!TransactionIdIsCurrentTransactionId(cdb->t1_xid)) + if (!strategy_hint_vacuum) { - STRAT_LIST_REMOVE(cdb); - STRAT_MRU_INSERT(cdb, STRAT_LIST_T2); + if (!cdb->t1_vacuum && + !TransactionIdIsCurrentTransactionId(cdb->t1_xid)) + { + STRAT_LIST_REMOVE(cdb); + STRAT_MRU_INSERT(cdb, STRAT_LIST_T2); + } + else + { + STRAT_LIST_REMOVE(cdb); + STRAT_MRU_INSERT(cdb, STRAT_LIST_T1); + /* + * If a non-VACUUM process references a page recently loaded + * by VACUUM, clear the stigma; the state will now be the + * same as if this process loaded it originally. + */ + if (cdb->t1_vacuum) + { + cdb->t1_xid = GetCurrentTransactionId(); + cdb->t1_vacuum = false; + } + } } return &BufferDescriptors[cdb->buf_id]; @@ -292,17 +290,19 @@ StrategyBufferLookup(BufferTag *tagPtr, bool recheck) /* * In the case of a recheck we don't care about B1 or B2 hits here. - * The bufmgr does this call only to make sure noone faulted in the - * block while we where busy flushing another. Now for this really - * to end up as a B1 or B2 cache hit, we must have been flushing for - * quite some time as the block not only must have been read, but - * also traveled through the queue and evicted from the T cache again - * already. + * The bufmgr does this call only to make sure no-one faulted in the + * block while we where busy flushing another; we don't want to doubly + * adjust the T1target. + * + * Now for this really to end up as a B1 or B2 cache hit, we must have + * been flushing for quite some time as the block not only must have been + * read, but also traveled through the queue and evicted from the T cache + * again already. + * + * VACUUM re-reads shouldn't adjust the target either. */ - if (recheck) - { + if (recheck || strategy_hint_vacuum) return NULL; - } /* * Adjust the target size of the T1 cache depending on if this is @@ -316,8 +316,8 @@ StrategyBufferLookup(BufferTag *tagPtr, bool recheck) * small. Adjust the T1 target size and continue * below. */ - T1_TARGET = MIN(T1_TARGET + MAX(B2_LENGTH / B1_LENGTH, 1), - Data_Descriptors); + T1_TARGET = Min(T1_TARGET + Max(B2_LENGTH / B1_LENGTH, 1), + NBuffers); break; case STRAT_LIST_B2: @@ -325,26 +325,17 @@ StrategyBufferLookup(BufferTag *tagPtr, bool recheck) * B2 hit means that the T2 cache is probably too * small. Adjust the T1 target size and continue * below. - */ - T1_TARGET = MAX(T1_TARGET - MAX(B1_LENGTH / B2_LENGTH, 1), 0); + */ + T1_TARGET = Max(T1_TARGET - Max(B1_LENGTH / B2_LENGTH, 1), 0); break; default: - elog(ERROR, "Buffer hash table corrupted - CDB on list %d found", - cdb->list); + elog(ERROR, "buffer hash table corrupted: CDB->list = %d", + cdb->list); } /* - * Decide where to take from if we will be out of - * free blocks later in StrategyGetBuffer(). - */ - if (T1_LENGTH >= MAX(1, T1_TARGET)) - strategy_get_from = STRAT_LIST_T1; - else - strategy_get_from = STRAT_LIST_T2; - - /* - * Even if we had seen the block in the past, it's data is + * Even though we had seen the block in the past, its data is * not currently in memory ... cache miss to the bufmgr. */ return NULL; @@ -357,18 +348,25 @@ StrategyBufferLookup(BufferTag *tagPtr, bool recheck) * Called by the bufmgr to get the next candidate buffer to use in * BufferAlloc(). The only hard requirement BufferAlloc() has is that * this buffer must not currently be pinned. + * + * *cdb_replace_index is set to the index of the candidate CDB, or -1 if + * none (meaning we are using a previously free buffer). This is not + * intended to be used by the caller, except to pass to + * StrategyReplaceBuffer(). */ BufferDesc * -StrategyGetBuffer(void) +StrategyGetBuffer(int *cdb_replace_index) { int cdb_id; BufferDesc *buf; if (StrategyControl->listFreeBuffers < 0) { - /* We don't have a free buffer, must take one from T1 or T2 */ - - if (strategy_get_from == STRAT_LIST_T1) + /* + * We don't have a free buffer, must take one from T1 or T2. + * Choose based on trying to converge T1len to T1target. + */ + if (T1_LENGTH >= Max(1, T1_TARGET)) { /* * We should take the first unpinned buffer from T1. @@ -379,7 +377,7 @@ StrategyGetBuffer(void) buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id]; if (buf->refcount == 0) { - strategy_cdb_replace = cdb_id; + *cdb_replace_index = cdb_id; Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T1); return buf; } @@ -387,7 +385,7 @@ StrategyGetBuffer(void) } /* - * No unpinned T1 buffer found - pardon T2 cache. + * No unpinned T1 buffer found - try T2 cache. */ cdb_id = StrategyControl->listHead[STRAT_LIST_T2]; while (cdb_id >= 0) @@ -395,7 +393,7 @@ StrategyGetBuffer(void) buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id]; if (buf->refcount == 0) { - strategy_cdb_replace = cdb_id; + *cdb_replace_index = cdb_id; Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T2); return buf; } @@ -405,7 +403,7 @@ StrategyGetBuffer(void) /* * No unpinned buffers at all!!! */ - elog(ERROR, "StrategyGetBuffer(): Out of unpinned buffers"); + elog(ERROR, "no unpinned buffers available"); } else { @@ -418,7 +416,7 @@ StrategyGetBuffer(void) buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id]; if (buf->refcount == 0) { - strategy_cdb_replace = cdb_id; + *cdb_replace_index = cdb_id; Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T2); return buf; } @@ -426,7 +424,7 @@ StrategyGetBuffer(void) } /* - * No unpinned T2 buffer found - pardon T1 cache. + * No unpinned T2 buffer found - try T1 cache. */ cdb_id = StrategyControl->listHead[STRAT_LIST_T1]; while (cdb_id >= 0) @@ -434,7 +432,7 @@ StrategyGetBuffer(void) buf = &BufferDescriptors[StrategyCDB[cdb_id].buf_id]; if (buf->refcount == 0) { - strategy_cdb_replace = cdb_id; + *cdb_replace_index = cdb_id; Assert(StrategyCDB[cdb_id].list == STRAT_LIST_T1); return buf; } @@ -444,7 +442,7 @@ StrategyGetBuffer(void) /* * No unpinned buffers at all!!! */ - elog(ERROR, "StrategyGetBuffer(): Out of unpinned buffers"); + elog(ERROR, "no unpinned buffers available"); } } else @@ -459,13 +457,13 @@ StrategyGetBuffer(void) * that there will never be any reason to recheck. Otherwise * we would leak shared buffers here! */ - strategy_cdb_replace = -1; + *cdb_replace_index = -1; buf = &BufferDescriptors[StrategyControl->listFreeBuffers]; StrategyControl->listFreeBuffers = buf->bufNext; buf->bufNext = -1; - /* Buffer of freelist cannot be pinned */ + /* Buffer in freelist cannot be pinned */ Assert(buf->refcount == 0); Assert(!(buf->flags & BM_DIRTY)); @@ -480,54 +478,59 @@ StrategyGetBuffer(void) /* * StrategyReplaceBuffer * - * Called by the buffer manager to inform us that he possibly flushed - * a buffer and is now about to replace the content. Prior to this call, + * Called by the buffer manager to inform us that he flushed a buffer + * and is now about to replace the content. Prior to this call, * the cache algorithm still reports the buffer as in the cache. After * this call we report the new block, even if IO might still need to - * start. + * be done to bring in the new content. + * + * cdb_found_index and cdb_replace_index must be the auxiliary values + * returned by previous calls to StrategyBufferLookup and StrategyGetBuffer. */ void -StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum) +StrategyReplaceBuffer(BufferDesc *buf, BufferTag *newTag, + int cdb_found_index, int cdb_replace_index) { BufferStrategyCDB *cdb_found; BufferStrategyCDB *cdb_replace; - if (strategy_cdb_found >= 0) + if (cdb_found_index >= 0) { - /* This was a ghost buffer cache hit (B1 or B2) */ - cdb_found = &StrategyCDB[strategy_cdb_found]; + /* This must have been a ghost buffer cache hit (B1 or B2) */ + cdb_found = &StrategyCDB[cdb_found_index]; /* Assert that the buffer remembered in cdb_found is the one */ /* the buffer manager is currently faulting in */ - Assert(BUFFERTAG_EQUALS(&(cdb_found->buf_tag), rnode, blockNum)); + Assert(BUFFERTAGS_EQUAL(&(cdb_found->buf_tag), newTag)); - if (strategy_cdb_replace >= 0) + if (cdb_replace_index >= 0) { /* We are satisfying it with an evicted T buffer */ - cdb_replace = &StrategyCDB[strategy_cdb_replace]; + cdb_replace = &StrategyCDB[cdb_replace_index]; /* Assert that the buffer remembered in cdb_replace is */ /* the one the buffer manager has just evicted */ Assert(cdb_replace->list == STRAT_LIST_T1 || - cdb_replace->list == STRAT_LIST_T2); + cdb_replace->list == STRAT_LIST_T2); Assert(cdb_replace->buf_id == buf->buf_id); Assert(BUFFERTAGS_EQUAL(&(cdb_replace->buf_tag), &(buf->tag))); - /* If this was a T1 buffer faulted in by vacuum, just */ - /* do not cause the CDB end up in the B1 list, so that */ - /* the vacuum scan does not affect T1_target adjusting */ - if (strategy_hint_vacuum) + /* + * Under normal circumstances we move the evicted T list entry to + * the corresponding B list. However, T1 entries that exist only + * because of VACUUM are just thrown into the unused list instead. + * We don't expect them to be touched again by the VACUUM, and if + * we put them into B1 then VACUUM would skew T1_target adjusting. + */ + if (cdb_replace->t1_vacuum) { BufTableDelete(&(cdb_replace->buf_tag)); STRAT_LIST_REMOVE(cdb_replace); - cdb_replace->buf_id = -1; cdb_replace->next = StrategyControl->listUnusedCDB; - StrategyControl->listUnusedCDB = strategy_cdb_replace; + StrategyControl->listUnusedCDB = cdb_replace_index; } else { - /* Under normal circumstances move the evicted */ - /* T list entry to it's corresponding B list */ if (cdb_replace->list == STRAT_LIST_T1) { STRAT_LIST_REMOVE(cdb_replace); @@ -539,25 +542,26 @@ StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum) STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2); } } - /* And clear it's block reference */ + /* And clear its block reference */ cdb_replace->buf_id = -1; } else { - /* or we satisfy it with an unused buffer */ + /* We are satisfying it with an unused buffer */ } - /* Now the found B CDB get's the buffer and is moved to T2 */ + /* Now the found B CDB gets the buffer and is moved to T2 */ cdb_found->buf_id = buf->buf_id; STRAT_LIST_REMOVE(cdb_found); STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T2); } else { - /* This was a complete cache miss, so we need to create */ - /* a new CDB. The goal is to keep T1len+B1len <= c */ - - if (B1_LENGTH > 0 && (T1_LENGTH + B1_LENGTH) >= Data_Descriptors) + /* + * This was a complete cache miss, so we need to create + * a new CDB. The goal is to keep T1len+B1len <= c. + */ + if (B1_LENGTH > 0 && (T1_LENGTH + B1_LENGTH) >= NBuffers) { /* So if B1 isn't empty and T1len+B1len >= c we take B1-LRU */ cdb_found = &StrategyCDB[StrategyControl->listHead[STRAT_LIST_B1]]; @@ -587,18 +591,20 @@ StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum) } } - /* Set the CDB's buf_tag and insert the hash key */ - INIT_BUFFERTAG(&(cdb_found->buf_tag), rnode, blockNum); + /* Set the CDB's buf_tag and insert it into the hash table */ + cdb_found->buf_tag = *newTag; BufTableInsert(&(cdb_found->buf_tag), (cdb_found - StrategyCDB)); - if (strategy_cdb_replace >= 0) + if (cdb_replace_index >= 0) { - /* The buffer was formerly in a T list, move it's CDB - * to the corresponding B list */ - cdb_replace = &StrategyCDB[strategy_cdb_replace]; + /* + * The buffer was formerly in a T list, move its CDB + * to the corresponding B list + */ + cdb_replace = &StrategyCDB[cdb_replace_index]; Assert(cdb_replace->list == STRAT_LIST_T1 || - cdb_replace->list == STRAT_LIST_T2); + cdb_replace->list == STRAT_LIST_T2); Assert(cdb_replace->buf_id == buf->buf_id); Assert(BUFFERTAGS_EQUAL(&(cdb_replace->buf_tag), &(buf->tag))); @@ -612,32 +618,32 @@ StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum) STRAT_LIST_REMOVE(cdb_replace); STRAT_MRU_INSERT(cdb_replace, STRAT_LIST_B2); } - /* And clear it's block reference */ + /* And clear its block reference */ cdb_replace->buf_id = -1; } else { - /* or we satisfy it with an unused buffer */ + /* We are satisfying it with an unused buffer */ } /* Assign the buffer id to the new CDB */ cdb_found->buf_id = buf->buf_id; /* - * Specialized VACUUM optimization. If this "complete cache miss" - * happened because vacuum needed the page, we want it later on - * to be placed at the LRU instead of the MRU position of T1. + * Specialized VACUUM optimization. If this complete cache miss + * happened because vacuum needed the page, we place it at the LRU + * position of T1; normally it goes at the MRU position. */ if (strategy_hint_vacuum) { - if (strategy_vacuum_xid != GetCurrentTransactionId()) + if (TransactionIdIsCurrentTransactionId(strategy_vacuum_xid)) + STRAT_LRU_INSERT(cdb_found, STRAT_LIST_T1); + else { + /* VACUUM must have been aborted by error, reset flag */ strategy_hint_vacuum = false; STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1); } - else - STRAT_LRU_INSERT(cdb_found, STRAT_LIST_T1); - } else STRAT_MRU_INSERT(cdb_found, STRAT_LIST_T1); @@ -645,8 +651,10 @@ StrategyReplaceBuffer(BufferDesc *buf, Relation rnode, BlockNumber blockNum) /* * Remember the Xid when this buffer went onto T1 to avoid * a single UPDATE promoting a newcomer straight into T2. + * Also remember if it was loaded for VACUUM. */ cdb_found->t1_xid = GetCurrentTransactionId(); + cdb_found->t1_vacuum = strategy_hint_vacuum; } } @@ -673,8 +681,7 @@ StrategyInvalidateBuffer(BufferDesc *buf) */ cdb_id = BufTableLookup(&(buf->tag)); if (cdb_id < 0) - elog(ERROR, "StrategyInvalidateBuffer() buffer %d not in directory", - buf->buf_id); + elog(ERROR, "buffer %d not in buffer hash table", buf->buf_id); cdb = &StrategyCDB[cdb_id]; /* @@ -694,7 +701,7 @@ StrategyInvalidateBuffer(BufferDesc *buf) StrategyControl->listUnusedCDB = cdb_id; /* - * Clear out the buffers tag and add it to the list of + * Clear out the buffer's tag and add it to the list of * currently unused buffers. */ CLEAR_BUFFERTAG(&(buf->tag)); @@ -702,7 +709,9 @@ StrategyInvalidateBuffer(BufferDesc *buf) StrategyControl->listFreeBuffers = buf->buf_id; } - +/* + * StrategyHintVacuum -- tell us whether VACUUM is active + */ void StrategyHintVacuum(bool vacuum_active) { @@ -710,9 +719,24 @@ StrategyHintVacuum(bool vacuum_active) strategy_vacuum_xid = GetCurrentTransactionId(); } - +/* + * StrategyDirtyBufferList + * + * Returns a list of dirty buffers, in priority order for writing. + * Note that the caller may choose not to write them all. + * + * The caller must beware of the possibility that a buffer is no longer dirty, + * or even contains a different page, by the time he reaches it. If it no + * longer contains the same page it need not be written, even if it is (again) + * dirty. + * + * Buffer pointers are stored into buffers[], and corresponding tags into + * buftags[], both of size max_buffers. The function returns the number of + * buffer IDs stored. + */ int -StrategyDirtyBufferList(int *buffer_list, int max_buffers) +StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags, + int max_buffers) { int num_buffer_dirty = 0; int cdb_id_t1; @@ -724,13 +748,13 @@ StrategyDirtyBufferList(int *buffer_list, int max_buffers) * Traverse the T1 and T2 list LRU to MRU in "parallel" * and add all dirty buffers found in that order to the list. * The ARC strategy keeps all used buffers including pinned ones - * in the T1 or T2 list. So we cannot loose any dirty buffers. + * in the T1 or T2 list. So we cannot miss any dirty buffers. */ cdb_id_t1 = StrategyControl->listHead[STRAT_LIST_T1]; cdb_id_t2 = StrategyControl->listHead[STRAT_LIST_T2]; while ((cdb_id_t1 >= 0 || cdb_id_t2 >= 0) && - num_buffer_dirty < max_buffers) + num_buffer_dirty < max_buffers) { if (cdb_id_t1 >= 0) { @@ -741,7 +765,9 @@ StrategyDirtyBufferList(int *buffer_list, int max_buffers) { if ((buf->flags & BM_DIRTY) || (buf->cntxDirty)) { - buffer_list[num_buffer_dirty++] = buf_id; + buffers[num_buffer_dirty] = buf; + buftags[num_buffer_dirty] = buf->tag; + num_buffer_dirty++; } } @@ -757,7 +783,9 @@ StrategyDirtyBufferList(int *buffer_list, int max_buffers) { if ((buf->flags & BM_DIRTY) || (buf->cntxDirty)) { - buffer_list[num_buffer_dirty++] = buf_id; + buffers[num_buffer_dirty] = buf; + buftags[num_buffer_dirty] = buf->tag; + num_buffer_dirty++; } } @@ -785,16 +813,16 @@ StrategyInitialize(bool init) /* * Initialize the shared CDB lookup hashtable */ - InitBufTable(Data_Descriptors * 2); + InitBufTable(NBuffers * 2); /* * Get or create the shared strategy control block and the CDB's */ StrategyControl = (BufferStrategyControl *) - ShmemInitStruct("Buffer Strategy Status", - sizeof(BufferStrategyControl) + - sizeof(BufferStrategyCDB) * (Data_Descriptors * 2 - 1), - &found); + ShmemInitStruct("Buffer Strategy Status", + sizeof(BufferStrategyControl) + + sizeof(BufferStrategyCDB) * (NBuffers * 2 - 1), + &found); StrategyCDB = &(StrategyControl->cdb[0]); if (!found) @@ -805,8 +833,8 @@ StrategyInitialize(bool init) Assert(init); /* - * Grab the whole linked list of free buffers for our - * strategy + * Grab the whole linked list of free buffers for our strategy. + * We assume it was previously set up by InitBufferPool(). */ StrategyControl->listFreeBuffers = 0; @@ -814,7 +842,7 @@ StrategyInitialize(bool init) * We start off with a target T1 list size of * half the available cache blocks. */ - StrategyControl->target_T1_size = Data_Descriptors / 2; + StrategyControl->target_T1_size = NBuffers / 2; /* * Initialize B1, T1, T2 and B2 lists to be empty @@ -832,14 +860,14 @@ StrategyInitialize(bool init) /* * All CDB's are linked as the listUnusedCDB */ - for (i = 0; i < Data_Descriptors * 2; i++) + for (i = 0; i < NBuffers * 2; i++) { StrategyCDB[i].next = i + 1; StrategyCDB[i].list = STRAT_LIST_UNUSED; CLEAR_BUFFERTAG(&(StrategyCDB[i].buf_tag)); StrategyCDB[i].buf_id = -1; } - StrategyCDB[Data_Descriptors * 2 - 1].next = -1; + StrategyCDB[NBuffers * 2 - 1].next = -1; StrategyControl->listUnusedCDB = 0; } else @@ -847,91 +875,3 @@ StrategyInitialize(bool init) Assert(!init); } } - - -#undef PinBuffer - -/* - * PinBuffer -- make buffer unavailable for replacement. - * - * This should be applied only to shared buffers, never local ones. - * Bufmgr lock must be held by caller. - */ -void -PinBuffer(BufferDesc *buf) -{ - int b = BufferDescriptorGetBuffer(buf) - 1; - - if (PrivateRefCount[b] == 0) - buf->refcount++; - PrivateRefCount[b]++; - Assert(PrivateRefCount[b] > 0); -} - -#ifdef NOT_USED -void -PinBuffer_Debug(char *file, int line, BufferDesc *buf) -{ - PinBuffer(buf); - if (ShowPinTrace) - { - Buffer buffer = BufferDescriptorGetBuffer(buf); - - fprintf(stderr, "PIN(Pin) %ld relname = %s, blockNum = %d, \ -refcount = %ld, file: %s, line: %d\n", - buffer, buf->blind.relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } -} -#endif - -#undef UnpinBuffer - -/* - * UnpinBuffer -- make buffer available for replacement. - * - * This should be applied only to shared buffers, never local ones. - * Bufmgr lock must be held by caller. - */ -void -UnpinBuffer(BufferDesc *buf) -{ - int b = BufferDescriptorGetBuffer(buf) - 1; - - Assert(buf->refcount > 0); - Assert(PrivateRefCount[b] > 0); - PrivateRefCount[b]--; - if (PrivateRefCount[b] == 0) - buf->refcount--; - - if ((buf->flags & BM_PIN_COUNT_WAITER) != 0 && - buf->refcount == 1) - { - /* we just released the last pin other than the waiter's */ - buf->flags &= ~BM_PIN_COUNT_WAITER; - ProcSendSignal(buf->wait_backend_id); - } - else - { - /* do nothing */ - } -} - -#ifdef NOT_USED -void -UnpinBuffer_Debug(char *file, int line, BufferDesc *buf) -{ - UnpinBuffer(buf); - if (ShowPinTrace) - { - Buffer buffer = BufferDescriptorGetBuffer(buf); - - fprintf(stderr, "UNPIN(Unpin) %ld relname = %s, blockNum = %d, \ -refcount = %ld, file: %s, line: %d\n", - buffer, buf->blind.relname, buf->tag.blockNum, - PrivateRefCount[buffer - 1], file, line); - } -} -#endif - - |