aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/gist/gistvacuum.c
diff options
context:
space:
mode:
authorTeodor Sigaev <teodor@sigaev.ru>2005-06-20 10:29:37 +0000
committerTeodor Sigaev <teodor@sigaev.ru>2005-06-20 10:29:37 +0000
commitd544ec8bbd70d50c1b50a00437b8061cabeeb5f2 (patch)
tree2eed78c3728c26840f525aa2aa8e2a4e6092f69e /src/backend/access/gist/gistvacuum.c
parent0b62bbe086261a12cc6779244e979c54233da055 (diff)
downloadpostgresql-d544ec8bbd70d50c1b50a00437b8061cabeeb5f2.tar.gz
postgresql-d544ec8bbd70d50c1b50a00437b8061cabeeb5f2.zip
1. full functional WAL for GiST
2. improve vacuum for gist - use FSM - full vacuum: - reforms parent tuple if it's needed ( tuples was deleted on child page or parent tuple remains invalid after crash recovery ) - truncate index file if possible 3. fixes bugs and mistakes
Diffstat (limited to 'src/backend/access/gist/gistvacuum.c')
-rw-r--r--src/backend/access/gist/gistvacuum.c519
1 files changed, 519 insertions, 0 deletions
diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c
new file mode 100644
index 00000000000..8f8e7f75070
--- /dev/null
+++ b/src/backend/access/gist/gistvacuum.c
@@ -0,0 +1,519 @@
+/*-------------------------------------------------------------------------
+ *
+ * gistvacuum.c
+ * interface routines for the postgres GiST index access method.
+ *
+ *
+ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.1 2005/06/20 10:29:36 teodor Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/gist_private.h"
+#include "access/gistscan.h"
+#include "access/heapam.h"
+#include "catalog/index.h"
+#include "commands/vacuum.h"
+#include "miscadmin.h"
+#include "utils/memutils.h"
+#include "storage/freespace.h"
+#include "storage/smgr.h"
+
+/* filled by gistbulkdelete, cleared by gistvacuumpcleanup */
+static bool needFullVacuum = false;
+
+
+typedef struct {
+ GISTSTATE giststate;
+ Relation index;
+ MemoryContext opCtx;
+ IndexBulkDeleteResult *result;
+
+ /* path to root */
+ BlockNumber *path;
+ int pathlen;
+ int curpathlen;
+} GistVacuum;
+
+static void
+shiftPath(GistVacuum *gv, BlockNumber blkno) {
+ if ( gv->pathlen == 0 ) {
+ gv->pathlen = 8;
+ gv->path = (BlockNumber*) palloc( MAXALIGN(sizeof(BlockNumber)*gv->pathlen) );
+ } else if ( gv->pathlen == gv->curpathlen ) {
+ gv->pathlen *= 2;
+ gv->path = (BlockNumber*) repalloc( gv->path, MAXALIGN(sizeof(BlockNumber)*gv->pathlen) );
+ }
+
+ if ( gv->curpathlen )
+ memmove( gv->path+1, gv->path, sizeof(BlockNumber)*gv->curpathlen );
+ gv->curpathlen++;
+ gv->path[0] = blkno;
+}
+
+static void
+unshiftPath(GistVacuum *gv) {
+ gv->curpathlen--;
+ if ( gv->curpathlen )
+ memmove( gv->path, gv->path+1, sizeof(BlockNumber)*gv->curpathlen );
+}
+
+typedef struct {
+ IndexTuple *itup;
+ int ituplen;
+ bool emptypage;
+} ArrayTuple;
+
+
+static ArrayTuple
+gistVacuumUpdate( GistVacuum *gv, BlockNumber blkno, bool needunion ) {
+ ArrayTuple res = {NULL, 0, false};
+ Buffer buffer;
+ Page page;
+ OffsetNumber i, maxoff;
+ ItemId iid;
+ int lenaddon=4, curlenaddon=0, ntodelete=0;
+ IndexTuple idxtuple, *addon=NULL;
+ bool needwrite=false;
+ OffsetNumber *todelete=NULL;
+ ItemPointerData *completed=NULL;
+ int ncompleted=0, lencompleted=16;
+
+ buffer = ReadBuffer(gv->index, blkno);
+ page = (Page) BufferGetPage(buffer);
+ maxoff = PageGetMaxOffsetNumber(page);
+
+
+ if ( GistPageIsLeaf(page) ) {
+ if ( GistTuplesDeleted(page) ) {
+ needunion = needwrite = true;
+ GistClearTuplesDeleted(page);
+ }
+ } else {
+ todelete = (OffsetNumber*)palloc( MAXALIGN(sizeof(OffsetNumber)*(maxoff+1)) );
+ completed = (ItemPointerData*)palloc( sizeof(ItemPointerData)*lencompleted );
+ addon=(IndexTuple*)palloc(sizeof(IndexTuple)*lenaddon);
+
+ shiftPath(gv, blkno);
+ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) {
+ ArrayTuple chldtuple;
+ bool needchildunion;
+
+ iid = PageGetItemId(page, i);
+ idxtuple = (IndexTuple) PageGetItem(page, iid);
+ needchildunion = (GistTupleIsInvalid(idxtuple)) ? true : false;
+
+ if ( needchildunion )
+ elog(DEBUG2,"gistVacuumUpdate: Need union for block %u", ItemPointerGetBlockNumber(&(idxtuple->t_tid)));
+
+ chldtuple = gistVacuumUpdate( gv, ItemPointerGetBlockNumber(&(idxtuple->t_tid)),
+ needchildunion );
+ if ( chldtuple.ituplen || chldtuple.emptypage ) {
+ /* adjust any scans that will be affected by this deletion */
+ gistadjscans(gv->index, GISTOP_DEL, blkno, i);
+ PageIndexTupleDelete(page, i);
+ todelete[ ntodelete++ ] = i;
+ i--; maxoff--;
+ needwrite=needunion=true;
+
+ if ( chldtuple.ituplen ) {
+ while( curlenaddon + chldtuple.ituplen >= lenaddon ) {
+ lenaddon*=2;
+ addon=(IndexTuple*)repalloc( addon, sizeof(IndexTuple)*lenaddon );
+ }
+
+ memcpy( addon + curlenaddon, chldtuple.itup, chldtuple.ituplen * sizeof(IndexTuple) );
+
+ curlenaddon += chldtuple.ituplen;
+
+ if ( chldtuple.ituplen > 1 ) {
+ /* child was splitted, so we need mark completion insert(split) */
+ int j;
+
+ while( ncompleted + chldtuple.ituplen > lencompleted ) {
+ lencompleted*=2;
+ completed = (ItemPointerData*)repalloc(completed, sizeof(ItemPointerData) * lencompleted);
+ }
+ for(j=0;j<chldtuple.ituplen;j++) {
+ ItemPointerCopy( &(chldtuple.itup[j]->t_tid), completed + ncompleted );
+ ncompleted++;
+ }
+ }
+ pfree( chldtuple.itup );
+ }
+ }
+ }
+
+ if ( curlenaddon ) {
+ /* insert updated tuples */
+ if (gistnospace(page, addon, curlenaddon)) {
+ /* there is no space on page to insert tuples */
+ IndexTuple *vec;
+ SplitedPageLayout *dist=NULL,*ptr;
+ int i;
+ MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx);
+
+ vec = gistextractbuffer(buffer, &(res.ituplen));
+ vec = gistjoinvector(vec, &(res.ituplen), addon, curlenaddon);
+ res.itup = gistSplit(gv->index, buffer, vec, &(res.ituplen), &dist, &(gv->giststate));
+ MemoryContextSwitchTo(oldCtx);
+
+ vec = (IndexTuple*)palloc( sizeof(IndexTuple) * res.ituplen );
+ for(i=0;i<res.ituplen;i++) {
+ vec[i] = (IndexTuple)palloc( IndexTupleSize(res.itup[i]) );
+ memcpy( vec[i], res.itup[i], IndexTupleSize(res.itup[i]) );
+ }
+ res.itup = vec;
+
+ if ( !gv->index->rd_istemp ) {
+ XLogRecPtr recptr;
+ XLogRecData *rdata;
+ ItemPointerData key; /* set key for incomplete insert */
+
+ ItemPointerSet(&key, blkno, TUPLE_IS_VALID);
+
+ oldCtx = MemoryContextSwitchTo(gv->opCtx);
+
+ /* path is need to recovery because there is new pages, in a case of
+ crash it's needed to add inner tuple pointers on parent page */
+ rdata = formSplitRdata(gv->index->rd_node, blkno,
+ todelete, ntodelete, addon, curlenaddon,
+ &key, gv->path, gv->curpathlen, dist);
+
+ MemoryContextSwitchTo(oldCtx);
+
+ START_CRIT_SECTION();
+
+ recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);
+ ptr = dist;
+ while(ptr) {
+ PageSetLSN(BufferGetPage(ptr->buffer), recptr);
+ PageSetTLI(BufferGetPage(ptr->buffer), ThisTimeLineID);
+ ptr=ptr->next;
+ }
+
+ END_CRIT_SECTION();
+
+ }
+
+ ptr = dist;
+ while(ptr) {
+ WriteBuffer(ptr->buffer);
+ ptr=ptr->next;
+ }
+
+ if ( blkno == GIST_ROOT_BLKNO ) {
+ ItemPointerData key; /* set key for incomplete insert */
+
+ ItemPointerSet(&key, blkno, TUPLE_IS_VALID);
+
+ oldCtx = MemoryContextSwitchTo(gv->opCtx);
+ gistnewroot(gv->index, res.itup, res.ituplen, &key);
+ MemoryContextSwitchTo(oldCtx);
+ }
+
+ needwrite=false;
+
+ MemoryContextReset(gv->opCtx);
+
+ needunion = false; /* gistSplit already forms unions */
+ } else {
+ OffsetNumber off = (PageIsEmpty(page)) ?
+ FirstOffsetNumber
+ :
+ OffsetNumberNext(PageGetMaxOffsetNumber(page));
+
+ /* enough free space */
+ gistfillbuffer(gv->index, page, addon, curlenaddon, off);
+ }
+ }
+ unshiftPath(gv);
+ }
+
+ if ( needunion ) {
+ /* forms union for page or check empty*/
+ if ( PageIsEmpty(page) ) {
+ if ( blkno == GIST_ROOT_BLKNO ) {
+ needwrite=true;
+ GistPageSetLeaf( page );
+ } else {
+ needwrite=true;
+ res.emptypage=true;
+ GistPageSetDeleted( page );
+ gv->result->pages_deleted++;
+ }
+ } else {
+ IndexTuple *vec, tmp;
+ int veclen=0;
+ MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx);
+
+ vec = gistextractbuffer(buffer, &veclen);
+ tmp = gistunion(gv->index, vec, veclen, &(gv->giststate));
+ MemoryContextSwitchTo(oldCtx);
+
+ res.itup=(IndexTuple*)palloc( sizeof(IndexTuple) );
+ res.ituplen = 1;
+ res.itup[0] = (IndexTuple)palloc( IndexTupleSize(tmp) );
+ memcpy( res.itup[0], tmp, IndexTupleSize(tmp) );
+
+ ItemPointerSetBlockNumber(&(res.itup[0]->t_tid), blkno);
+ GistTupleSetValid( res.itup[0] );
+
+ MemoryContextReset(gv->opCtx);
+ }
+ }
+
+ if ( needwrite ) {
+ if ( !gv->index->rd_istemp ) {
+ XLogRecData *rdata;
+ XLogRecPtr recptr;
+ MemoryContext oldCtx = MemoryContextSwitchTo(gv->opCtx);
+
+ /* In a vacuum, it's not need to push path, because
+ there is no new inserted keys */
+ rdata = formUpdateRdata(gv->index->rd_node, blkno, todelete, ntodelete,
+ res.emptypage, addon, curlenaddon, NULL, NULL, 0);
+ MemoryContextSwitchTo(oldCtx);
+
+
+ START_CRIT_SECTION();
+ recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_ENTRY_UPDATE, rdata);
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
+ END_CRIT_SECTION();
+ MemoryContextReset(gv->opCtx);
+ }
+ WriteBuffer( buffer );
+ } else
+ ReleaseBuffer( buffer );
+
+ if ( ncompleted && !gv->index->rd_istemp )
+ gistxlogInsertCompletion( gv->index->rd_node, completed, ncompleted );
+
+ for(i=0;i<curlenaddon;i++)
+ pfree( addon[i] );
+ if (addon) pfree(addon);
+ if (todelete) pfree(todelete);
+ if (completed) pfree(completed);
+ return res;
+}
+
+/*
+ * For usial vacuum just update FSM, for full vacuum
+ * reforms parent tuples if some of childs was deleted or changed,
+ * update invalid tuples (they can exsist from last crash recovery only),
+ * tries to get smaller index
+ */
+
+Datum
+gistvacuumcleanup(PG_FUNCTION_ARGS) {
+ Relation rel = (Relation) PG_GETARG_POINTER(0);
+ IndexVacuumCleanupInfo *info = (IndexVacuumCleanupInfo *) PG_GETARG_POINTER(1);
+ IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(2);
+ BlockNumber npages, blkno;
+ BlockNumber nFreePages, *freePages, maxFreePages;
+ BlockNumber lastBlock = GIST_ROOT_BLKNO, lastFilledBlock = GIST_ROOT_BLKNO;
+
+ /* LockRelation(rel, AccessExclusiveLock); */
+
+ /* gistVacuumUpdate may cause hard work */
+ if ( info->vacuum_full ) {
+ GistVacuum gv;
+ ArrayTuple res;
+
+ gv.index = rel;
+ initGISTstate(&(gv.giststate), rel);
+ gv.opCtx = createTempGistContext();
+ gv.result = stats;
+
+ gv.path=NULL;
+ gv.pathlen = gv.curpathlen = 0;
+
+ /* walk through the entire index for update tuples */
+ res = gistVacuumUpdate( &gv, GIST_ROOT_BLKNO, false );
+ /* cleanup */
+ if (res.itup) {
+ int i;
+ for(i=0;i<res.ituplen;i++)
+ pfree( res.itup[i] );
+ pfree( res.itup );
+ }
+ if ( gv.path )
+ pfree( gv.path );
+ freeGISTstate(&(gv.giststate));
+ MemoryContextDelete(gv.opCtx);
+ } else if (needFullVacuum) {
+ elog(NOTICE,"It's desirable to vacuum full or reindex GiST index '%s' due to crash recovery",
+ RelationGetRelationName(rel));
+ }
+
+ needFullVacuum = false;
+
+ /* try to find deleted pages */
+ npages = RelationGetNumberOfBlocks(rel);
+ maxFreePages = RelationGetNumberOfBlocks(rel);
+ if ( maxFreePages > MaxFSMPages )
+ maxFreePages = MaxFSMPages;
+ nFreePages = 0;
+ freePages = (BlockNumber*) palloc (sizeof(BlockNumber) * maxFreePages);
+ for(blkno=GIST_ROOT_BLKNO+1;blkno<npages;blkno++) {
+ Buffer buffer = ReadBuffer(rel, blkno);
+ Page page=(Page)BufferGetPage(buffer);
+
+ if ( GistPageIsDeleted(page) ) {
+ if (nFreePages < maxFreePages) {
+ freePages[ nFreePages ] = blkno;
+ nFreePages++;
+ }
+ } else
+ lastFilledBlock = blkno;
+ ReleaseBuffer(buffer);
+ }
+ lastBlock = npages-1;
+
+ if ( nFreePages > 0 ) {
+ if ( info->vacuum_full ) { /* try to truncate index */
+ int i;
+ for(i=0;i<nFreePages;i++)
+ if ( freePages[i] >= lastFilledBlock ) {
+ nFreePages = i;
+ break;
+ }
+
+ if ( lastBlock > lastFilledBlock )
+ RelationTruncate( rel, lastFilledBlock+1 );
+ stats->pages_removed = lastBlock - lastFilledBlock;
+ }
+
+ if ( nFreePages > 0 )
+ RecordIndexFreeSpace( &rel->rd_node, nFreePages, freePages );
+ }
+ pfree( freePages );
+
+ /* return statistics */
+ stats->pages_free = nFreePages;
+ stats->num_pages = RelationGetNumberOfBlocks(rel);
+
+ /* UnlockRelation(rel, AccessExclusiveLock); */
+
+ PG_RETURN_POINTER(stats);
+}
+
+typedef struct GistBDItem {
+ BlockNumber blkno;
+ struct GistBDItem *next;
+} GistBDItem;
+
+/*
+ * Bulk deletion of all index entries pointing to a set of heap tuples and
+ * update invalid tuples after crash recovery.
+ * The set of target tuples is specified via a callback routine that tells
+ * whether any given heap tuple (identified by ItemPointer) is being deleted.
+ *
+ * Result: a palloc'd struct containing statistical info for VACUUM displays.
+ */
+Datum
+gistbulkdelete(PG_FUNCTION_ARGS) {
+ Relation rel = (Relation) PG_GETARG_POINTER(0);
+ IndexBulkDeleteCallback callback = (IndexBulkDeleteCallback) PG_GETARG_POINTER(1);
+ void* callback_state = (void *) PG_GETARG_POINTER(2);
+ IndexBulkDeleteResult *result = (IndexBulkDeleteResult*)palloc0(sizeof(IndexBulkDeleteResult));
+ GistBDItem *stack, *ptr;
+ MemoryContext opCtx = createTempGistContext();
+
+ stack = (GistBDItem*) palloc(sizeof(GistBDItem));
+
+ stack->blkno = GIST_ROOT_BLKNO;
+ stack->next = NULL;
+ needFullVacuum = false;
+
+ while( stack ) {
+ Buffer buffer = ReadBuffer(rel, stack->blkno);
+ Page page = (Page) BufferGetPage(buffer);
+ OffsetNumber i, maxoff = PageGetMaxOffsetNumber(page);
+ IndexTuple idxtuple;
+ ItemId iid;
+ OffsetNumber *todelete = NULL;
+ int ntodelete = 0;
+
+ if ( GistPageIsLeaf(page) ) {
+ ItemPointerData heapptr;
+
+ todelete = (OffsetNumber*)palloc( MAXALIGN(sizeof(OffsetNumber)*maxoff) );
+
+ for(i=FirstOffsetNumber;i<=maxoff;i=OffsetNumberNext(i)) {
+ iid = PageGetItemId(page, i);
+ idxtuple = (IndexTuple) PageGetItem(page, iid);
+ heapptr = idxtuple->t_tid;
+
+ if ( callback(&heapptr, callback_state) ) {
+ gistadjscans(rel, GISTOP_DEL, stack->blkno, i);
+ PageIndexTupleDelete(page, i);
+ todelete[ ntodelete++ ] = i;
+ i--; maxoff--;
+ result->tuples_removed += 1;
+ } else
+ result->num_index_tuples += 1;
+ }
+ } else {
+ for(i=FirstOffsetNumber;i<=maxoff;i=OffsetNumberNext(i)) {
+ iid = PageGetItemId(page, i);
+ idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+ ptr = (GistBDItem*) palloc(sizeof(GistBDItem));
+ ptr->blkno = ItemPointerGetBlockNumber( &(idxtuple->t_tid) );
+ ptr->next = stack->next;
+ stack->next = ptr;
+
+ if ( GistTupleIsInvalid(idxtuple) )
+ needFullVacuum = true;
+ }
+ }
+
+ if ( ntodelete && todelete ) {
+ GistMarkTuplesDeleted(page);
+
+ if (!rel->rd_istemp ) {
+ XLogRecData *rdata;
+ XLogRecPtr recptr;
+ MemoryContext oldCtx = MemoryContextSwitchTo(opCtx);
+
+ rdata = formUpdateRdata(rel->rd_node, stack->blkno, todelete, ntodelete,
+ false, NULL, 0, NULL, NULL, 0);
+ MemoryContextSwitchTo(oldCtx);
+
+ START_CRIT_SECTION();
+ recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_ENTRY_UPDATE, rdata);
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
+ END_CRIT_SECTION();
+
+ MemoryContextReset(opCtx);
+ }
+
+ WriteBuffer( buffer );
+ } else
+ ReleaseBuffer( buffer );
+
+ if ( todelete )
+ pfree( todelete );
+
+ ptr = stack->next;
+ pfree( stack );
+ stack = ptr;
+ }
+
+ MemoryContextDelete( opCtx );
+
+ result->num_pages = RelationGetNumberOfBlocks(rel);
+
+
+ PG_RETURN_POINTER( result );
+}
+