diff options
author | drh <drh@noemail.net> | 2011-08-23 12:50:09 +0000 |
---|---|---|
committer | drh <drh@noemail.net> | 2011-08-23 12:50:09 +0000 |
commit | 2dd9b60c3589a86355d2d414e2a6bee6312ce5fb (patch) | |
tree | 9b33160b8fd8f3ee252e8c6f4195cdf12e0f8263 /src | |
parent | 06199d84e850c13851c3bc7de9b0847e871c9e22 (diff) | |
parent | da04ea4f88214821c197a17b800d1f29e49843d5 (diff) | |
download | sqlite-2dd9b60c3589a86355d2d414e2a6bee6312ce5fb.tar.gz sqlite-2dd9b60c3589a86355d2d414e2a6bee6312ce5fb.zip |
Merge the PAGECACHE_BLOCKALLOC changes into trunk.
FossilOrigin-Name: 768c1846d48a555054f07edeabdae8817a2c0a8e
Diffstat (limited to 'src')
-rw-r--r-- | src/ctime.c | 3 | ||||
-rw-r--r-- | src/os_unix.c | 6 | ||||
-rw-r--r-- | src/pcache1.c | 247 | ||||
-rw-r--r-- | src/test_config.c | 6 |
4 files changed, 257 insertions, 5 deletions
diff --git a/src/ctime.c b/src/ctime.c index 9d31596bb..77174d0da 100644 --- a/src/ctime.c +++ b/src/ctime.c @@ -326,6 +326,9 @@ static const char * const azCompileOpt[] = { #ifdef SQLITE_OMIT_XFER_OPT "OMIT_XFER_OPT", #endif +#ifdef SQLITE_PAGECACHE_BLOCKALLOC + "PAGECACHE_BLOCKALLOC", +#endif #ifdef SQLITE_PERFORMANCE_TRACE "PERFORMANCE_TRACE", #endif diff --git a/src/os_unix.c b/src/os_unix.c index 83bf7b142..2f068dc58 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -3510,7 +3510,11 @@ static int unixFileControl(sqlite3_file *id, int op, void *pArg){ return SQLITE_OK; } case SQLITE_FCNTL_SIZE_HINT: { - return fcntlSizeHint(pFile, *(i64 *)pArg); + int rc; + SimulateIOErrorBenign(1); + rc = fcntlSizeHint(pFile, *(i64 *)pArg); + SimulateIOErrorBenign(0); + return rc; } case SQLITE_FCNTL_PERSIST_WAL: { int bPersist = *(int*)pArg; diff --git a/src/pcache1.c b/src/pcache1.c index e47265a22..b39e453dd 100644 --- a/src/pcache1.c +++ b/src/pcache1.c @@ -24,6 +24,9 @@ typedef struct PgHdr1 PgHdr1; typedef struct PgFreeslot PgFreeslot; typedef struct PGroup PGroup; +typedef struct PGroupBlock PGroupBlock; +typedef struct PGroupBlockList PGroupBlockList; + /* Each page cache (or PCache) belongs to a PGroup. A PGroup is a set ** of one or more PCaches that are able to recycle each others unpinned ** pages when they are under memory pressure. A PGroup is an instance of @@ -53,8 +56,63 @@ struct PGroup { int mxPinned; /* nMaxpage + 10 - nMinPage */ int nCurrentPage; /* Number of purgeable pages allocated */ PgHdr1 *pLruHead, *pLruTail; /* LRU list of unpinned pages */ + PGroupBlockList *pBlockList; /* List of block-lists for this group */ +}; + +/* +** If SQLITE_PAGECACHE_BLOCKALLOC is defined when the library is built, +** each PGroup structure has a linked list of the the following starting +** at PGroup.pBlockList. There is one entry for each distinct page-size +** currently used by members of the PGroup (i.e. 1024 bytes, 4096 bytes +** etc.). Variable PGroupBlockList.nByte is set to the actual allocation +** size requested by each pcache, which is the database page-size plus +** the various header structures used by the pcache, pager and btree layers. +** Usually around (pgsz+200) bytes. +** +** This size (pgsz+200) bytes is not allocated efficiently by some +** implementations of malloc. In particular, some implementations are only +** able to allocate blocks of memory chunks of 2^N bytes, where N is some +** integer value. Since the page-size is a power of 2, this means we +** end up wasting (pgsz-200) bytes in each allocation. +** +** If SQLITE_PAGECACHE_BLOCKALLOC is defined, the (pgsz+200) byte blocks +** are not allocated directly. Instead, blocks of roughly M*(pgsz+200) bytes +** are requested from malloc allocator. After a block is returned, +** sqlite3MallocSize() is used to determine how many (pgsz+200) byte +** allocations can fit in the space returned by malloc(). This value may +** be more than M. +** +** The blocks are stored in a doubly-linked list. Variable PGroupBlock.nEntry +** contains the number of allocations that will fit in the aData[] space. +** nEntry is limited to the number of bits in bitmask mUsed. If a slot +** within aData is in use, the corresponding bit in mUsed is set. Thus +** when (mUsed+1==(1 << nEntry)) the block is completely full. +** +** Each time a slot within a block is freed, the block is moved to the start +** of the linked-list. And if a block becomes completely full, then it is +** moved to the end of the list. As a result, when searching for a free +** slot, only the first block in the list need be examined. If it is full, +** then it is guaranteed that all blocks are full. +*/ +struct PGroupBlockList { + int nByte; /* Size of each allocation in bytes */ + PGroupBlock *pFirst; /* First PGroupBlock in list */ + PGroupBlock *pLast; /* Last PGroupBlock in list */ + PGroupBlockList *pNext; /* Next block-list attached to group */ }; +struct PGroupBlock { + Bitmask mUsed; /* Mask of used slots */ + int nEntry; /* Maximum number of allocations in aData[] */ + u8 *aData; /* Pointer to data block */ + PGroupBlock *pNext; /* Next PGroupBlock in list */ + PGroupBlock *pPrev; /* Previous PGroupBlock in list */ + PGroupBlockList *pList; /* Owner list */ +}; + +/* Minimum value for PGroupBlock.nEntry */ +#define PAGECACHE_BLOCKALLOC_MINENTRY 15 + /* Each page cache is an instance of the following object. Every ** open database file (including each in-memory database and each ** temporary or transient database) has a single page cache which @@ -159,6 +217,17 @@ static SQLITE_WSD struct PCacheGlobal { #define PAGE_TO_PGHDR1(c, p) (PgHdr1*)(((char*)p) + c->szPage) /* +** Blocks used by the SQLITE_PAGECACHE_BLOCKALLOC blocks to store/retrieve +** a PGroupBlock pointer based on a pointer to a page buffer. +*/ +#define PAGE_SET_BLOCKPTR(pCache, pPg, pBlock) \ + ( *(PGroupBlock **)&(((u8*)pPg)[sizeof(PgHdr1) + pCache->szPage]) = pBlock ) + +#define PAGE_GET_BLOCKPTR(pCache, pPg) \ + ( *(PGroupBlock **)&(((u8*)pPg)[sizeof(PgHdr1) + pCache->szPage]) ) + + +/* ** Macros to enter and leave the PCache LRU mutex. */ #define pcache1EnterMutex(X) sqlite3_mutex_enter((X)->mutex) @@ -284,12 +353,142 @@ static int pcache1MemSize(void *p){ #endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */ /* +** The block pBlock belongs to list pList but is not currently linked in. +** Insert it into the start of the list. +*/ +static void addBlockToList(PGroupBlockList *pList, PGroupBlock *pBlock){ + pBlock->pPrev = 0; + pBlock->pNext = pList->pFirst; + pList->pFirst = pBlock; + if( pBlock->pNext ){ + pBlock->pNext->pPrev = pBlock; + }else{ + assert( pList->pLast==0 ); + pList->pLast = pBlock; + } +} + +/* +** If there are no blocks in the list headed by pList, remove pList +** from the pGroup->pBlockList list and free it with sqlite3_free(). +*/ +static void freeListIfEmpty(PGroup *pGroup, PGroupBlockList *pList){ + assert( sqlite3_mutex_held(pGroup->mutex) ); + if( pList->pFirst==0 ){ + PGroupBlockList **pp; + for(pp=&pGroup->pBlockList; *pp!=pList; pp=&(*pp)->pNext); + *pp = (*pp)->pNext; + sqlite3_free(pList); + } +} + +/* ** Allocate a new page object initially associated with cache pCache. */ static PgHdr1 *pcache1AllocPage(PCache1 *pCache){ int nByte = sizeof(PgHdr1) + pCache->szPage; - void *pPg = pcache1Alloc(nByte); + void *pPg = 0; PgHdr1 *p; + +#ifdef SQLITE_PAGECACHE_BLOCKALLOC + PGroup *pGroup = pCache->pGroup; + PGroupBlockList *pList; + PGroupBlock *pBlock; + int i; + + nByte += sizeof(PGroupBlockList *); + nByte = ROUND8(nByte); + + do{ + for(pList=pGroup->pBlockList; pList; pList=pList->pNext){ + if( pList->nByte==nByte ) break; + } + if( pList==0 ){ + PGroupBlockList *pNew; + pcache1LeaveMutex(pCache->pGroup); + pNew = (PGroupBlockList *)sqlite3MallocZero(sizeof(PGroupBlockList)); + pcache1EnterMutex(pCache->pGroup); + if( pNew==0 ){ + /* malloc() failure. Return early. */ + return 0; + } + for(pList=pGroup->pBlockList; pList; pList=pList->pNext){ + if( pList->nByte==nByte ) break; + } + if( pList ){ + sqlite3_free(pNew); + }else{ + pNew->nByte = nByte; + pNew->pNext = pGroup->pBlockList; + pGroup->pBlockList = pNew; + pList = pNew; + } + } + }while( pList==0 ); + + pBlock = pList->pFirst; + if( pBlock==0 || pBlock->mUsed==(((Bitmask)1<<pBlock->nEntry)-1) ){ + int sz; + + /* Allocate a new block. Try to allocate enough space for the PGroupBlock + ** structure and MINENTRY allocations of nByte bytes each. If the + ** allocator returns more memory than requested, then more than MINENTRY + ** allocations may fit in it. */ + pcache1LeaveMutex(pCache->pGroup); + sz = sizeof(PGroupBlock) + PAGECACHE_BLOCKALLOC_MINENTRY * nByte; + pBlock = (PGroupBlock *)sqlite3Malloc(sz); + pcache1EnterMutex(pCache->pGroup); + + if( !pBlock ){ + freeListIfEmpty(pGroup, pList); + return 0; + } + pBlock->nEntry = (sqlite3MallocSize(pBlock) - sizeof(PGroupBlock)) / nByte; + if( pBlock->nEntry>=BMS ){ + pBlock->nEntry = BMS-1; + } + pBlock->pList = pList; + pBlock->mUsed = 0; + pBlock->aData = (u8 *)&pBlock[1]; + addBlockToList(pList, pBlock); + + sz = sqlite3MallocSize(pBlock); + sqlite3_mutex_enter(pcache1.mutex); + sqlite3StatusAdd(SQLITE_STATUS_PAGECACHE_OVERFLOW, sz); + sqlite3_mutex_leave(pcache1.mutex); + } + + for(i=0; pPg==0 && ALWAYS(i<pBlock->nEntry); i++){ + if( 0==(pBlock->mUsed & ((Bitmask)1<<i)) ){ + pBlock->mUsed |= ((Bitmask)1<<i); + pPg = (void *)&pBlock->aData[pList->nByte * i]; + } + } + assert( pPg ); + PAGE_SET_BLOCKPTR(pCache, pPg, pBlock); + + /* If the block is now full, shift it to the end of the list */ + if( pBlock->mUsed==(((Bitmask)1<<pBlock->nEntry)-1) && pList->pLast!=pBlock ){ + assert( pList->pFirst==pBlock ); + assert( pBlock->pPrev==0 ); + assert( pList->pLast->pNext==0 ); + pList->pFirst = pBlock->pNext; + pList->pFirst->pPrev = 0; + pBlock->pPrev = pList->pLast; + pBlock->pNext = 0; + pList->pLast->pNext = pBlock; + pList->pLast = pBlock; + } +#else + /* The group mutex must be released before pcache1Alloc() is called. This + ** is because it may call sqlite3_release_memory(), which assumes that + ** this mutex is not held. */ + assert( sqlite3_mutex_held(pCache->pGroup->mutex) ); + pcache1LeaveMutex(pCache->pGroup); + pPg = pcache1Alloc(nByte); + pcache1EnterMutex(pCache->pGroup); +#endif + if( pPg ){ p = PAGE_TO_PGHDR1(pCache, pPg); if( pCache->bPurgeable ){ @@ -311,10 +510,52 @@ static PgHdr1 *pcache1AllocPage(PCache1 *pCache){ static void pcache1FreePage(PgHdr1 *p){ if( ALWAYS(p) ){ PCache1 *pCache = p->pCache; + void *pPg = PGHDR1_TO_PAGE(p); + +#ifdef SQLITE_PAGECACHE_BLOCKALLOC + PGroupBlock *pBlock = PAGE_GET_BLOCKPTR(pCache, pPg); + PGroupBlockList *pList = pBlock->pList; + int i = ((u8 *)pPg - pBlock->aData) / pList->nByte; + + assert( pPg==(void *)&pBlock->aData[i*pList->nByte] ); + assert( pBlock->mUsed & ((Bitmask)1<<i) ); + pBlock->mUsed &= ~((Bitmask)1<<i); + + /* Remove the block from the list. If it is completely empty, free it. + ** Or if it is not completely empty, re-insert it at the start of the + ** list. */ + if( pList->pFirst==pBlock ){ + pList->pFirst = pBlock->pNext; + if( pList->pFirst ) pList->pFirst->pPrev = 0; + }else{ + pBlock->pPrev->pNext = pBlock->pNext; + } + if( pList->pLast==pBlock ){ + pList->pLast = pBlock->pPrev; + if( pList->pLast ) pList->pLast->pNext = 0; + }else{ + pBlock->pNext->pPrev = pBlock->pPrev; + } + + if( pBlock->mUsed==0 ){ + PGroup *pGroup = p->pCache->pGroup; + + int sz = sqlite3MallocSize(pBlock); + sqlite3_mutex_enter(pcache1.mutex); + sqlite3StatusAdd(SQLITE_STATUS_PAGECACHE_OVERFLOW, -sz); + sqlite3_mutex_leave(pcache1.mutex); + freeListIfEmpty(pGroup, pList); + sqlite3_free(pBlock); + }else{ + addBlockToList(pList, pBlock); + } +#else + assert( sqlite3_mutex_held(p->pCache->pGroup->mutex) ); + pcache1Free(pPg); +#endif if( pCache->bPurgeable ){ pCache->pGroup->nCurrentPage--; } - pcache1Free(PGHDR1_TO_PAGE(p)); } } @@ -752,9 +993,7 @@ static void *pcache1Fetch(sqlite3_pcache *p, unsigned int iKey, int createFlag){ */ if( !pPage ){ if( createFlag==1 ) sqlite3BeginBenignMalloc(); - pcache1LeaveMutex(pGroup); pPage = pcache1AllocPage(pCache); - pcache1EnterMutex(pGroup); if( createFlag==1 ) sqlite3EndBenignMalloc(); } diff --git a/src/test_config.c b/src/test_config.c index 5b0ffaa69..5af60bafd 100644 --- a/src/test_config.c +++ b/src/test_config.c @@ -555,6 +555,12 @@ Tcl_SetVar2(interp, "sqlite_options", "long_double", Tcl_SetVar2(interp, "sqlite_options", "yytrackmaxstackdepth", "0", TCL_GLOBAL_ONLY); #endif +#ifdef SQLITE_PAGECACHE_BLOCKALLOC + Tcl_SetVar2(interp, "sqlite_options", "blockalloc", "1", TCL_GLOBAL_ONLY); +#else + Tcl_SetVar2(interp, "sqlite_options", "blockalloc", "0", TCL_GLOBAL_ONLY); +#endif + #define LINKVAR(x) { \ static const int cv_ ## x = SQLITE_ ## x; \ Tcl_LinkVar(interp, "SQLITE_" #x, (char *)&(cv_ ## x), \ |