diff options
author | Teodor Sigaev <teodor@sigaev.ru> | 2005-06-20 10:29:37 +0000 |
---|---|---|
committer | Teodor Sigaev <teodor@sigaev.ru> | 2005-06-20 10:29:37 +0000 |
commit | d544ec8bbd70d50c1b50a00437b8061cabeeb5f2 (patch) | |
tree | 2eed78c3728c26840f525aa2aa8e2a4e6092f69e /src/backend/access/gist/gistvacuum.c | |
parent | 0b62bbe086261a12cc6779244e979c54233da055 (diff) | |
download | postgresql-d544ec8bbd70d50c1b50a00437b8061cabeeb5f2.tar.gz postgresql-d544ec8bbd70d50c1b50a00437b8061cabeeb5f2.zip |
1. full functional WAL for GiST
2. improve vacuum for gist
- use FSM
- full vacuum:
- reforms parent tuple if it's needed
( tuples was deleted on child page or parent tuple remains invalid
after crash recovery )
- truncate index file if possible
3. fixes bugs and mistakes
Diffstat (limited to 'src/backend/access/gist/gistvacuum.c')
-rw-r--r-- | src/backend/access/gist/gistvacuum.c | 519 |
1 files changed, 519 insertions, 0 deletions
diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c new file mode 100644 index 00000000000..8f8e7f75070 --- /dev/null +++ b/src/backend/access/gist/gistvacuum.c @@ -0,0 +1,519 @@ +/*------------------------------------------------------------------------- + * + * gistvacuum.c + * interface routines for the postgres GiST index access method. + * + * + * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.1 2005/06/20 10:29:36 teodor Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/genam.h" +#include "access/gist_private.h" +#include "access/gistscan.h" +#include "access/heapam.h" +#include "catalog/index.h" +#include "commands/vacuum.h" +#include "miscadmin.h" +#include "utils/memutils.h" +#include "storage/freespace.h" +#include "storage/smgr.h" + +/* filled by gistbulkdelete, cleared by gistvacuumpcleanup */ +static bool needFullVacuum = false; + + +typedef struct { + GISTSTATE giststate; + Relation index; + MemoryContext opCtx; + IndexBulkDeleteResult *result; + + /* path to root */ + BlockNumber *path; + int pathlen; + int curpathlen; +} GistVacuum; + +static void +shiftPath(GistVacuum *gv, BlockNumber blkno) { + if ( gv->pathlen == 0 ) { + gv->pathlen = 8; + gv->path = (BlockNumber*) palloc( MAXALIGN(sizeof(BlockNumber)*gv->pathlen) ); + } else if ( gv->pathlen == gv->curpathlen ) { + gv->pathlen *= 2; + gv->path = (BlockNumber*) repalloc( gv->path, MAXALIGN(sizeof(BlockNumber)*gv->pathlen) ); + } + + if ( gv->curpathlen ) + memmove( gv->path+1, gv->path, sizeof(BlockNumber)*gv->curpathlen ); + gv->curpathlen++; + gv->path[0] = blkno; +} + +static void +unshiftPath(GistVacuum *gv) { + gv->curpathlen--; + if ( gv->curpathlen ) + memmove( gv->path, gv->path+1, sizeof(BlockNumber)*gv->curpathlen ); +} + +typedef struct { + IndexTuple *itup; + int ituplen; + bool emptypage; +} ArrayTuple; + + +static ArrayTuple +gistVacuumUpdate( GistVacuum *gv, BlockNumber blkno, bool needunion ) { + ArrayTuple res = {NULL, 0, false}; + Buffer buffer; + Page page; + OffsetNumber i, maxoff; + ItemId iid; + int lenaddon=4, curlenaddon=0, ntodelete=0; + IndexTuple idxtuple, *addon=NULL; + bool needwrite=false; + OffsetNumber *todelete=NULL; + ItemPointerData *completed=NULL; + int ncompleted=0, lencompleted=16; + + buffer = ReadBuffer(gv->index, blkno); + page = (Page) BufferGetPage(buffer); + maxoff = PageGetMaxOffsetNumber(page); + + + if ( GistPageIsLeaf(page) ) { + if ( GistTuplesDeleted(page) ) { + needunion = needwrite = true; + GistClearTuplesDeleted(page); + } + } else { + todelete = (OffsetNumber*)palloc( MAXALIGN(sizeof(OffsetNumber)*(maxoff+1)) ); + completed = (ItemPointerData*)palloc( sizeof(ItemPointerData)*lencompleted ); + addon=(IndexTuple*)palloc(sizeof(IndexTuple)*lenaddon); + + shiftPath(gv, blkno); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { + ArrayTuple chldtuple; + bool needchildunion; + + iid = PageGetItemId(page, i); + idxtuple = (IndexTuple) PageGetItem(page, iid); + needchildunion = (GistTupleIsInvalid(idxtuple)) ? true : false; + + if ( needchildunion ) + elog(DEBUG2,"gistVacuumUpdate: Need union for block %u", ItemPointerGetBlockNumber(&(idxtuple->t_tid))); + + chldtuple = gistVacuumUpdate( gv, ItemPointerGetBlockNumber(&(idxtuple->t_tid)), + needchildunion ); + if ( chldtuple.ituplen || chldtuple.emptypage ) { + /* adjust any scans that will be affected by this deletion */ + gistadjscans(gv->index, GISTOP_DEL, blkno, i); + PageIndexTupleDelete(page, i); + todelete[ ntodelete++ ] = i; + i--; maxoff--; + needwrite=needunion=true; + + if ( chldtuple.ituplen ) { + while( curlenaddon + chldtuple.ituplen >= lenaddon ) { + lenaddon*=2; + addon=(IndexTuple*)repalloc( addon, sizeof(IndexTuple)*lenaddon ); + } + + memcpy( addon + curlenaddon, chldtuple.itup, chldtuple.ituplen * sizeof(IndexTuple) ); + + curlenaddon += chldtuple.ituplen; + + if ( chldtuple.ituplen > 1 ) { + /* child was splitted, so we need mark completion insert(split) */ + int j; + + while( ncompleted + chldtuple.ituplen > lencompleted ) { + lencompleted*=2; + completed = (ItemPointerData*)repalloc(completed, sizeof(ItemPointerData) * lencompleted); + } + for(j=0;j<chldtuple.ituplen;j++) { + ItemPointerCopy( &(chldtuple.itup[j]->t_tid), completed + ncompleted ); + ncompleted++; + } + } + pfree( chldtuple.itup ); + } + } + } + + if ( curlenaddon ) { + /* insert updated tuples */ + if (gistnospace(page, addon, curlenaddon)) { + /* there is no space on page to insert tuples */ + IndexTuple *vec; + SplitedPageLayout *dist=NULL,*ptr; + int i; + MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx); + + vec = gistextractbuffer(buffer, &(res.ituplen)); + vec = gistjoinvector(vec, &(res.ituplen), addon, curlenaddon); + res.itup = gistSplit(gv->index, buffer, vec, &(res.ituplen), &dist, &(gv->giststate)); + MemoryContextSwitchTo(oldCtx); + + vec = (IndexTuple*)palloc( sizeof(IndexTuple) * res.ituplen ); + for(i=0;i<res.ituplen;i++) { + vec[i] = (IndexTuple)palloc( IndexTupleSize(res.itup[i]) ); + memcpy( vec[i], res.itup[i], IndexTupleSize(res.itup[i]) ); + } + res.itup = vec; + + if ( !gv->index->rd_istemp ) { + XLogRecPtr recptr; + XLogRecData *rdata; + ItemPointerData key; /* set key for incomplete insert */ + + ItemPointerSet(&key, blkno, TUPLE_IS_VALID); + + oldCtx = MemoryContextSwitchTo(gv->opCtx); + + /* path is need to recovery because there is new pages, in a case of + crash it's needed to add inner tuple pointers on parent page */ + rdata = formSplitRdata(gv->index->rd_node, blkno, + todelete, ntodelete, addon, curlenaddon, + &key, gv->path, gv->curpathlen, dist); + + MemoryContextSwitchTo(oldCtx); + + START_CRIT_SECTION(); + + recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata); + ptr = dist; + while(ptr) { + PageSetLSN(BufferGetPage(ptr->buffer), recptr); + PageSetTLI(BufferGetPage(ptr->buffer), ThisTimeLineID); + ptr=ptr->next; + } + + END_CRIT_SECTION(); + + } + + ptr = dist; + while(ptr) { + WriteBuffer(ptr->buffer); + ptr=ptr->next; + } + + if ( blkno == GIST_ROOT_BLKNO ) { + ItemPointerData key; /* set key for incomplete insert */ + + ItemPointerSet(&key, blkno, TUPLE_IS_VALID); + + oldCtx = MemoryContextSwitchTo(gv->opCtx); + gistnewroot(gv->index, res.itup, res.ituplen, &key); + MemoryContextSwitchTo(oldCtx); + } + + needwrite=false; + + MemoryContextReset(gv->opCtx); + + needunion = false; /* gistSplit already forms unions */ + } else { + OffsetNumber off = (PageIsEmpty(page)) ? + FirstOffsetNumber + : + OffsetNumberNext(PageGetMaxOffsetNumber(page)); + + /* enough free space */ + gistfillbuffer(gv->index, page, addon, curlenaddon, off); + } + } + unshiftPath(gv); + } + + if ( needunion ) { + /* forms union for page or check empty*/ + if ( PageIsEmpty(page) ) { + if ( blkno == GIST_ROOT_BLKNO ) { + needwrite=true; + GistPageSetLeaf( page ); + } else { + needwrite=true; + res.emptypage=true; + GistPageSetDeleted( page ); + gv->result->pages_deleted++; + } + } else { + IndexTuple *vec, tmp; + int veclen=0; + MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx); + + vec = gistextractbuffer(buffer, &veclen); + tmp = gistunion(gv->index, vec, veclen, &(gv->giststate)); + MemoryContextSwitchTo(oldCtx); + + res.itup=(IndexTuple*)palloc( sizeof(IndexTuple) ); + res.ituplen = 1; + res.itup[0] = (IndexTuple)palloc( IndexTupleSize(tmp) ); + memcpy( res.itup[0], tmp, IndexTupleSize(tmp) ); + + ItemPointerSetBlockNumber(&(res.itup[0]->t_tid), blkno); + GistTupleSetValid( res.itup[0] ); + + MemoryContextReset(gv->opCtx); + } + } + + if ( needwrite ) { + if ( !gv->index->rd_istemp ) { + XLogRecData *rdata; + XLogRecPtr recptr; + MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx); + + /* In a vacuum, it's not need to push path, because + there is no new inserted keys */ + rdata = formUpdateRdata(gv->index->rd_node, blkno, todelete, ntodelete, + res.emptypage, addon, curlenaddon, NULL, NULL, 0); + MemoryContextSwitchTo(oldCtx); + + + START_CRIT_SECTION(); + recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_ENTRY_UPDATE, rdata); + PageSetLSN(page, recptr); + PageSetTLI(page, ThisTimeLineID); + END_CRIT_SECTION(); + MemoryContextReset(gv->opCtx); + } + WriteBuffer( buffer ); + } else + ReleaseBuffer( buffer ); + + if ( ncompleted && !gv->index->rd_istemp ) + gistxlogInsertCompletion( gv->index->rd_node, completed, ncompleted ); + + for(i=0;i<curlenaddon;i++) + pfree( addon[i] ); + if (addon) pfree(addon); + if (todelete) pfree(todelete); + if (completed) pfree(completed); + return res; +} + +/* + * For usial vacuum just update FSM, for full vacuum + * reforms parent tuples if some of childs was deleted or changed, + * update invalid tuples (they can exsist from last crash recovery only), + * tries to get smaller index + */ + +Datum +gistvacuumcleanup(PG_FUNCTION_ARGS) { + Relation rel = (Relation) PG_GETARG_POINTER(0); + IndexVacuumCleanupInfo *info = (IndexVacuumCleanupInfo *) PG_GETARG_POINTER(1); + IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(2); + BlockNumber npages, blkno; + BlockNumber nFreePages, *freePages, maxFreePages; + BlockNumber lastBlock = GIST_ROOT_BLKNO, lastFilledBlock = GIST_ROOT_BLKNO; + + /* LockRelation(rel, AccessExclusiveLock); */ + + /* gistVacuumUpdate may cause hard work */ + if ( info->vacuum_full ) { + GistVacuum gv; + ArrayTuple res; + + gv.index = rel; + initGISTstate(&(gv.giststate), rel); + gv.opCtx = createTempGistContext(); + gv.result = stats; + + gv.path=NULL; + gv.pathlen = gv.curpathlen = 0; + + /* walk through the entire index for update tuples */ + res = gistVacuumUpdate( &gv, GIST_ROOT_BLKNO, false ); + /* cleanup */ + if (res.itup) { + int i; + for(i=0;i<res.ituplen;i++) + pfree( res.itup[i] ); + pfree( res.itup ); + } + if ( gv.path ) + pfree( gv.path ); + freeGISTstate(&(gv.giststate)); + MemoryContextDelete(gv.opCtx); + } else if (needFullVacuum) { + elog(NOTICE,"It's desirable to vacuum full or reindex GiST index '%s' due to crash recovery", + RelationGetRelationName(rel)); + } + + needFullVacuum = false; + + /* try to find deleted pages */ + npages = RelationGetNumberOfBlocks(rel); + maxFreePages = RelationGetNumberOfBlocks(rel); + if ( maxFreePages > MaxFSMPages ) + maxFreePages = MaxFSMPages; + nFreePages = 0; + freePages = (BlockNumber*) palloc (sizeof(BlockNumber) * maxFreePages); + for(blkno=GIST_ROOT_BLKNO+1;blkno<npages;blkno++) { + Buffer buffer = ReadBuffer(rel, blkno); + Page page=(Page)BufferGetPage(buffer); + + if ( GistPageIsDeleted(page) ) { + if (nFreePages < maxFreePages) { + freePages[ nFreePages ] = blkno; + nFreePages++; + } + } else + lastFilledBlock = blkno; + ReleaseBuffer(buffer); + } + lastBlock = npages-1; + + if ( nFreePages > 0 ) { + if ( info->vacuum_full ) { /* try to truncate index */ + int i; + for(i=0;i<nFreePages;i++) + if ( freePages[i] >= lastFilledBlock ) { + nFreePages = i; + break; + } + + if ( lastBlock > lastFilledBlock ) + RelationTruncate( rel, lastFilledBlock+1 ); + stats->pages_removed = lastBlock - lastFilledBlock; + } + + if ( nFreePages > 0 ) + RecordIndexFreeSpace( &rel->rd_node, nFreePages, freePages ); + } + pfree( freePages ); + + /* return statistics */ + stats->pages_free = nFreePages; + stats->num_pages = RelationGetNumberOfBlocks(rel); + + /* UnlockRelation(rel, AccessExclusiveLock); */ + + PG_RETURN_POINTER(stats); +} + +typedef struct GistBDItem { + BlockNumber blkno; + struct GistBDItem *next; +} GistBDItem; + +/* + * Bulk deletion of all index entries pointing to a set of heap tuples and + * update invalid tuples after crash recovery. + * The set of target tuples is specified via a callback routine that tells + * whether any given heap tuple (identified by ItemPointer) is being deleted. + * + * Result: a palloc'd struct containing statistical info for VACUUM displays. + */ +Datum +gistbulkdelete(PG_FUNCTION_ARGS) { + Relation rel = (Relation) PG_GETARG_POINTER(0); + IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1); + void* callback_state = (void *) PG_GETARG_POINTER(2); + IndexBulkDeleteResult *result = (IndexBulkDeleteResult*)palloc0(sizeof(IndexBulkDeleteResult)); + GistBDItem *stack, *ptr; + MemoryContext opCtx = createTempGistContext(); + + stack = (GistBDItem*) palloc(sizeof(GistBDItem)); + + stack->blkno = GIST_ROOT_BLKNO; + stack->next = NULL; + needFullVacuum = false; + + while( stack ) { + Buffer buffer = ReadBuffer(rel, stack->blkno); + Page page = (Page) BufferGetPage(buffer); + OffsetNumber i, maxoff = PageGetMaxOffsetNumber(page); + IndexTuple idxtuple; + ItemId iid; + OffsetNumber *todelete = NULL; + int ntodelete = 0; + + if ( GistPageIsLeaf(page) ) { + ItemPointerData heapptr; + + todelete = (OffsetNumber*)palloc( MAXALIGN(sizeof(OffsetNumber)*maxoff) ); + + for(i=FirstOffsetNumber;i<=maxoff;i=OffsetNumberNext(i)) { + iid = PageGetItemId(page, i); + idxtuple = (IndexTuple) PageGetItem(page, iid); + heapptr = idxtuple->t_tid; + + if ( callback(&heapptr, callback_state) ) { + gistadjscans(rel, GISTOP_DEL, stack->blkno, i); + PageIndexTupleDelete(page, i); + todelete[ ntodelete++ ] = i; + i--; maxoff--; + result->tuples_removed += 1; + } else + result->num_index_tuples += 1; + } + } else { + for(i=FirstOffsetNumber;i<=maxoff;i=OffsetNumberNext(i)) { + iid = PageGetItemId(page, i); + idxtuple = (IndexTuple) PageGetItem(page, iid); + + ptr = (GistBDItem*) palloc(sizeof(GistBDItem)); + ptr->blkno = ItemPointerGetBlockNumber( &(idxtuple->t_tid) ); + ptr->next = stack->next; + stack->next = ptr; + + if ( GistTupleIsInvalid(idxtuple) ) + needFullVacuum = true; + } + } + + if ( ntodelete && todelete ) { + GistMarkTuplesDeleted(page); + + if (!rel->rd_istemp ) { + XLogRecData *rdata; + XLogRecPtr recptr; + MemoryContext oldCtx = MemoryContextSwitchTo(opCtx); + + rdata = formUpdateRdata(rel->rd_node, stack->blkno, todelete, ntodelete, + false, NULL, 0, NULL, NULL, 0); + MemoryContextSwitchTo(oldCtx); + + START_CRIT_SECTION(); + recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_ENTRY_UPDATE, rdata); + PageSetLSN(page, recptr); + PageSetTLI(page, ThisTimeLineID); + END_CRIT_SECTION(); + + MemoryContextReset(opCtx); + } + + WriteBuffer( buffer ); + } else + ReleaseBuffer( buffer ); + + if ( todelete ) + pfree( todelete ); + + ptr = stack->next; + pfree( stack ); + stack = ptr; + } + + MemoryContextDelete( opCtx ); + + result->num_pages = RelationGetNumberOfBlocks(rel); + + + PG_RETURN_POINTER( result ); +} + |