diff options
Diffstat (limited to 'src/backend/access/gin/gindatapage.c')
-rw-r--r-- | src/backend/access/gin/gindatapage.c | 569 |
1 files changed, 569 insertions, 0 deletions
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c new file mode 100644 index 00000000000..1069b19b92c --- /dev/null +++ b/src/backend/access/gin/gindatapage.c @@ -0,0 +1,569 @@ +/*------------------------------------------------------------------------- + * + * gindatapage.c + * page utilities routines for the postgres inverted index access method. + * + * + * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.1 2006/05/02 11:28:54 teodor Exp $ + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "access/genam.h" +#include "access/gin.h" +#include "access/heapam.h" +#include "catalog/index.h" +#include "miscadmin.h" +#include "storage/freespace.h" + +int +compareItemPointers( ItemPointer a, ItemPointer b ) { + if ( GinItemPointerGetBlockNumber(a) == GinItemPointerGetBlockNumber(b) ) { + if ( GinItemPointerGetOffsetNumber(a) == GinItemPointerGetOffsetNumber(b) ) + return 0; + return ( GinItemPointerGetOffsetNumber(a) > GinItemPointerGetOffsetNumber(b) ) ? 1 : -1; + } + + return ( GinItemPointerGetBlockNumber(a) > GinItemPointerGetBlockNumber(b) ) ? 1 : -1; +} + +/* + * Merge two ordered array of itempointer + */ +void +MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb) { + ItemPointerData *dptr = dst; + ItemPointerData *aptr = a, *bptr = b; + + while( aptr - a < na && bptr - b < nb ) { + if ( compareItemPointers(aptr, bptr) > 0 ) + *dptr++ = *bptr++; + else + *dptr++ = *aptr++; + } + + while( aptr - a < na ) + *dptr++ = *aptr++; + + while( bptr - b < nb ) + *dptr++ = *bptr++; +} + +/* + * Checks, should we move to right link... + * Compares inserting itemp pointer with right bound of current page + */ +static bool +dataIsMoveRight(GinBtree btree, Page page) { + ItemPointer iptr = GinDataPageGetRightBound(page); + + if ( GinPageRightMost(page) ) + return FALSE; + + return ( compareItemPointers( btree->items + btree->curitem, iptr ) > 0 ) ? TRUE : FALSE; +} + +/* + * Find correct PostingItem in non-leaf page. It supposed that + * page correctly choosen and searching value SHOULD be on page + */ +static BlockNumber +dataLocateItem(GinBtree btree, GinBtreeStack *stack) { + OffsetNumber low, high, maxoff; + PostingItem *pitem=NULL; + int result; + Page page = BufferGetPage( stack->buffer ); + + Assert( !GinPageIsLeaf(page) ); + Assert( GinPageIsData(page) ); + + if ( btree->fullScan ) { + stack->off = FirstOffsetNumber; + stack->predictNumber *= GinPageGetOpaque(page)->maxoff; + return btree->getLeftMostPage(btree, page); + } + + low = FirstOffsetNumber; + maxoff = high = GinPageGetOpaque(page)->maxoff; + Assert( high >= low ); + + high++; + + while (high > low) { + OffsetNumber mid = low + ((high - low) / 2); + pitem = (PostingItem*)GinDataPageGetItem(page,mid); + + if ( mid == maxoff ) + /* Right infinity, page already correctly choosen + with a help of dataIsMoveRight */ + result = -1; + else { + pitem = (PostingItem*)GinDataPageGetItem(page,mid); + result = compareItemPointers( btree->items + btree->curitem, &( pitem->key ) ); + } + + if ( result == 0 ) { + stack->off = mid; + return PostingItemGetBlockNumber(pitem); + } else if ( result > 0 ) + low = mid + 1; + else + high = mid; + } + + Assert( high>=FirstOffsetNumber && high <= maxoff ); + + stack->off = high; + pitem = (PostingItem*)GinDataPageGetItem(page,high); + return PostingItemGetBlockNumber(pitem); +} + +/* + * Searches correct position for value on leaf page. + * Page should be corrrectly choosen. + * Returns true if value found on page. + */ +static bool +dataLocateLeafItem(GinBtree btree, GinBtreeStack *stack) { + Page page = BufferGetPage( stack->buffer ); + OffsetNumber low, high; + int result; + + Assert( GinPageIsLeaf(page) ); + Assert( GinPageIsData(page) ); + + if ( btree->fullScan ) { + stack->off = FirstOffsetNumber; + return TRUE; + } + + low=FirstOffsetNumber; + high = GinPageGetOpaque(page)->maxoff; + + if ( high < low ) { + stack->off = FirstOffsetNumber; + return false; + } + + high++; + + while (high > low) { + OffsetNumber mid = low + ((high - low) / 2); + + result = compareItemPointers( btree->items + btree->curitem, (ItemPointer)GinDataPageGetItem(page,mid) ); + + if ( result == 0 ) { + stack->off = mid; + return true; + } else if ( result > 0 ) + low = mid + 1; + else + high = mid; + } + + stack->off = high; + return false; +} + +/* + * Finds links to blkno on non-leaf page, retuns + * offset of PostingItem + */ +static OffsetNumber +dataFindChildPtr(GinBtree btree, Page page, BlockNumber blkno, OffsetNumber storedOff) { + OffsetNumber i, maxoff = GinPageGetOpaque(page)->maxoff; + PostingItem *pitem; + + Assert( !GinPageIsLeaf(page) ); + Assert( GinPageIsData(page) ); + + /* if page isn't changed, we returns storedOff */ + if ( storedOff>= FirstOffsetNumber && storedOff<=maxoff) { + pitem = (PostingItem*)GinDataPageGetItem(page, storedOff); + if ( PostingItemGetBlockNumber(pitem) == blkno ) + return storedOff; + + /* we hope, that needed pointer goes to right. It's true + if there wasn't a deletion */ + for( i=storedOff+1 ; i <= maxoff ; i++ ) { + pitem = (PostingItem*)GinDataPageGetItem(page, i); + if ( PostingItemGetBlockNumber(pitem) == blkno ) + return i; + } + + maxoff = storedOff-1; + } + + /* last chance */ + for( i=FirstOffsetNumber; i <= maxoff ; i++ ) { + pitem = (PostingItem*)GinDataPageGetItem(page, i); + if ( PostingItemGetBlockNumber(pitem) == blkno ) + return i; + } + + return InvalidOffsetNumber; +} + +/* + * retunrs blkno of lefmost child + */ +static BlockNumber +dataGetLeftMostPage(GinBtree btree, Page page) { + PostingItem *pitem; + + Assert( !GinPageIsLeaf(page) ); + Assert( GinPageIsData(page) ); + Assert( GinPageGetOpaque(page)->maxoff >= FirstOffsetNumber ); + + pitem = (PostingItem*)GinDataPageGetItem(page, FirstOffsetNumber); + return PostingItemGetBlockNumber(pitem); +} + +/* + * add ItemPointer or PostingItem to page. data should points to + * correct value! depending on leaf or non-leaf page + */ +void +GinDataPageAddItem( Page page, void *data, OffsetNumber offset ) { + OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; + char *ptr; + + if ( offset == InvalidOffsetNumber ) { + ptr = GinDataPageGetItem(page,maxoff+1); + } else { + ptr = GinDataPageGetItem(page,offset); + if ( maxoff+1-offset != 0 ) + memmove( ptr+GinSizeOfItem(page), ptr, (maxoff-offset+1) * GinSizeOfItem(page) ); + } + memcpy( ptr, data, GinSizeOfItem(page) ); + + GinPageGetOpaque(page)->maxoff++; +} + +/* + * Deletes posting item from non-leaf page + */ +void +PageDeletePostingItem(Page page, OffsetNumber offset) { + OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; + + Assert( !GinPageIsLeaf(page) ); + Assert( offset>=FirstOffsetNumber && offset <= maxoff ); + + if ( offset != maxoff ) + memmove( GinDataPageGetItem(page,offset), GinDataPageGetItem(page,offset+1), + sizeof(PostingItem) * (maxoff-offset) ); + + GinPageGetOpaque(page)->maxoff--; +} + +/* + * checks space to install new value, + * item pointer never deletes! + */ +static bool +dataIsEnoughSpace( GinBtree btree, Buffer buf, OffsetNumber off ) { + Page page = BufferGetPage(buf); + + Assert( GinPageIsData(page) ); + Assert( !btree->isDelete ); + + if ( GinPageIsLeaf(page) ) { + if ( GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff ) { + if ( (btree->nitem - btree->curitem) * sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page) ) + return true; + } else if ( sizeof(ItemPointerData) <= GinDataPageGetFreeSpace(page) ) + return true; + } else if ( sizeof(PostingItem) <= GinDataPageGetFreeSpace(page) ) + return true; + + return false; +} + +/* + * In case of previous split update old child blkno to + * new right page + * item pointer never deletes! + */ +static BlockNumber +dataPrepareData( GinBtree btree, Page page, OffsetNumber off) { + BlockNumber ret = InvalidBlockNumber; + + Assert( GinPageIsData(page) ); + + if ( !GinPageIsLeaf(page) && btree->rightblkno != InvalidBlockNumber ) { + PostingItem *pitem = (PostingItem*)GinDataPageGetItem(page,off); + PostingItemSetBlockNumber( pitem, btree->rightblkno ); + ret = btree->rightblkno; + } + + btree->rightblkno = InvalidBlockNumber; + + return ret; +} + +/* + * Places keys to page and fills WAL record. In case leaf page and + * build mode puts all ItemPointers to page. + */ +static void +dataPlaceToPage(GinBtree btree, Buffer buf, OffsetNumber off, XLogRecData **prdata) { + Page page = BufferGetPage(buf); + static XLogRecData rdata[3]; + int sizeofitem = GinSizeOfItem(page); + static ginxlogInsert data; + + *prdata = rdata; + Assert( GinPageIsData(page) ); + + data.updateBlkno = dataPrepareData( btree, page, off ); + + data.node = btree->index->rd_node; + data.blkno = BufferGetBlockNumber( buf ); + data.offset = off; + data.nitem = 1; + data.isDelete = FALSE; + data.isData = TRUE; + data.isLeaf = GinPageIsLeaf(page) ? TRUE : FALSE; + + rdata[0].buffer = buf; + rdata[0].buffer_std = FALSE; + rdata[0].data = NULL; + rdata[0].len = 0; + rdata[0].next = &rdata[1]; + + rdata[1].buffer = InvalidBuffer; + rdata[1].data = (char *) &data; + rdata[1].len = sizeof(ginxlogInsert); + rdata[1].next = &rdata[2]; + + rdata[2].buffer = InvalidBuffer; + rdata[2].data = (GinPageIsLeaf(page)) ? ((char*)(btree->items+btree->curitem)) : ((char*)&(btree->pitem)); + rdata[2].len = sizeofitem; + rdata[2].next = NULL; + + if ( GinPageIsLeaf(page) ) { + if ( GinPageRightMost(page) && off > GinPageGetOpaque(page)->maxoff ) { + /* usually, create index... */ + uint32 savedPos = btree->curitem; + + while( btree->curitem < btree->nitem ) { + GinDataPageAddItem(page, btree->items+btree->curitem, off); + off++; + btree->curitem++; + } + data.nitem = btree->curitem-savedPos; + rdata[2].len = sizeofitem * data.nitem; + } else { + GinDataPageAddItem(page, btree->items+btree->curitem, off); + btree->curitem++; + } + } else + GinDataPageAddItem(page, &(btree->pitem), off); +} + +/* + * split page and fills WAL record. original buffer(lbuf) leaves untouched, + * returns shadow page of lbuf filled new data. In leaf page and build mode puts all + * ItemPointers to pages. Also, in build mode splits data by way to full fulled + * left page + */ +static Page +dataSplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogRecData **prdata) { + static ginxlogSplit data; + static XLogRecData rdata[4]; + static char vector[2*BLCKSZ]; + char *ptr; + OffsetNumber separator; + ItemPointer bound; + Page lpage = GinPageGetCopyPage( BufferGetPage( lbuf ) ); + ItemPointerData oldbound = *GinDataPageGetRightBound(lpage); + int sizeofitem = GinSizeOfItem(lpage); + OffsetNumber maxoff = GinPageGetOpaque(lpage)->maxoff; + Page rpage = BufferGetPage( rbuf ); + Size pageSize = PageGetPageSize( lpage ); + Size freeSpace; + uint32 nCopied = 1; + + GinInitPage( rpage, GinPageGetOpaque(lpage)->flags, pageSize ); + freeSpace = GinDataPageGetFreeSpace(rpage); + + *prdata = rdata; + data.leftChildBlkno = ( GinPageIsLeaf(lpage) ) ? + InvalidOffsetNumber : PostingItemGetBlockNumber( &(btree->pitem) ); + data.updateBlkno = dataPrepareData( btree, lpage, off ); + + memcpy(vector, GinDataPageGetItem(lpage, FirstOffsetNumber), + maxoff*sizeofitem); + + if ( GinPageIsLeaf(lpage) && GinPageRightMost(lpage) && off > GinPageGetOpaque(lpage)->maxoff ) { + nCopied = 0; + while( btree->curitem < btree->nitem && maxoff*sizeof(ItemPointerData) < 2*(freeSpace - sizeof(ItemPointerData)) ) { + memcpy( vector + maxoff*sizeof(ItemPointerData), btree->items+btree->curitem, + sizeof(ItemPointerData) ); + maxoff++; + nCopied++; + btree->curitem++; + } + } else { + ptr = vector + (off-1)*sizeofitem; + if ( maxoff+1-off != 0 ) + memmove( ptr+sizeofitem, ptr, (maxoff-off+1) * sizeofitem ); + if ( GinPageIsLeaf(lpage) ) { + memcpy(ptr, btree->items+btree->curitem, sizeofitem ); + btree->curitem++; + } else + memcpy(ptr, &(btree->pitem), sizeofitem ); + + maxoff++; + } + + /* we suppose that during index creation table scaned from + begin to end, so ItemPointers are monotonically increased.. */ + if ( btree->isBuild && GinPageRightMost(lpage) ) + separator=freeSpace/sizeofitem; + else + separator=maxoff/2; + + GinInitPage( rpage, GinPageGetOpaque(lpage)->flags, pageSize ); + GinInitPage( lpage, GinPageGetOpaque(rpage)->flags, pageSize ); + + memcpy( GinDataPageGetItem(lpage, FirstOffsetNumber), vector, separator * sizeofitem ); + GinPageGetOpaque(lpage)->maxoff = separator; + memcpy( GinDataPageGetItem(rpage, FirstOffsetNumber), + vector + separator * sizeofitem, (maxoff-separator) * sizeofitem ); + GinPageGetOpaque(rpage)->maxoff = maxoff-separator; + + PostingItemSetBlockNumber( &(btree->pitem), BufferGetBlockNumber(lbuf) ); + if ( GinPageIsLeaf(lpage) ) + btree->pitem.key = *(ItemPointerData*)GinDataPageGetItem(lpage, + GinPageGetOpaque(lpage)->maxoff); + else + btree->pitem.key = ((PostingItem*)GinDataPageGetItem(lpage, + GinPageGetOpaque(lpage)->maxoff))->key; + btree->rightblkno = BufferGetBlockNumber(rbuf); + + /* set up right bound for left page */ + bound = GinDataPageGetRightBound(lpage); + *bound = btree->pitem.key; + + /* set up right bound for right page */ + bound = GinDataPageGetRightBound(rpage); + *bound = oldbound; + + data.node = btree->index->rd_node; + data.rootBlkno = InvalidBlockNumber; + data.lblkno = BufferGetBlockNumber( lbuf ); + data.rblkno = BufferGetBlockNumber( rbuf ); + data.separator = separator; + data.nitem = maxoff; + data.isData = TRUE; + data.isLeaf = GinPageIsLeaf(lpage) ? TRUE : FALSE; + data.isRootSplit = FALSE; + data.rightbound = oldbound; + + rdata[0].buffer = InvalidBuffer; + rdata[0].data = (char *) &data; + rdata[0].len = sizeof(ginxlogSplit); + rdata[0].next = &rdata[1]; + + rdata[1].buffer = InvalidBuffer; + rdata[1].data = vector; + rdata[1].len = MAXALIGN( maxoff * sizeofitem ); + rdata[1].next = NULL; + + return lpage; +} + +/* + * Fills new root by right bound values from child. + * Also called from ginxlog, should not use btree + */ +void +dataFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf) { + Page page = BufferGetPage(root), + lpage = BufferGetPage(lbuf), + rpage = BufferGetPage(rbuf); + PostingItem li, ri; + + li.key = *GinDataPageGetRightBound(lpage); + PostingItemSetBlockNumber( &li, BufferGetBlockNumber(lbuf) ); + GinDataPageAddItem(page, &li, InvalidOffsetNumber ); + + ri.key = *GinDataPageGetRightBound(rpage); + PostingItemSetBlockNumber( &ri, BufferGetBlockNumber(rbuf) ); + GinDataPageAddItem(page, &ri, InvalidOffsetNumber ); +} + +void +prepareDataScan( GinBtree btree, Relation index) { + memset(btree, 0, sizeof(GinBtreeData)); + btree->index = index; + btree->isMoveRight = dataIsMoveRight; + btree->findChildPage = dataLocateItem; + btree->findItem = dataLocateLeafItem; + btree->findChildPtr = dataFindChildPtr; + btree->getLeftMostPage = dataGetLeftMostPage; + btree->isEnoughSpace = dataIsEnoughSpace; + btree->placeToPage = dataPlaceToPage; + btree->splitPage = dataSplitPage; + btree->fillRoot = dataFillRoot; + + btree->searchMode = FALSE; + btree->isDelete = FALSE; + btree->fullScan = FALSE; + btree->isBuild= FALSE; +} + +GinPostingTreeScan* +prepareScanPostingTree( Relation index, BlockNumber rootBlkno, bool searchMode) { + GinPostingTreeScan *gdi = (GinPostingTreeScan*)palloc0( sizeof(GinPostingTreeScan) ); + + prepareDataScan( &gdi->btree, index ); + + gdi->btree.searchMode = searchMode; + gdi->btree.fullScan = searchMode; + + gdi->stack = ginPrepareFindLeafPage( &gdi->btree, rootBlkno ); + + return gdi; +} + +/* + * Inserts array of item pointers, may execute several tree scan (very rare) + */ +void +insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem) { + BlockNumber rootBlkno = gdi->stack->blkno; + + gdi->btree.items = items; + gdi->btree.nitem = nitem; + gdi->btree.curitem = 0; + + while( gdi->btree.curitem < gdi->btree.nitem ) { + if (!gdi->stack) + gdi->stack = ginPrepareFindLeafPage( &gdi->btree, rootBlkno ); + + gdi->stack = ginFindLeafPage( &gdi->btree, gdi->stack ); + + if ( gdi->btree.findItem( &(gdi->btree), gdi->stack ) ) + elog(ERROR,"Item pointer(%d:%d) is already exists", + ItemPointerGetBlockNumber(gdi->btree.items + gdi->btree.curitem), + ItemPointerGetOffsetNumber(gdi->btree.items + gdi->btree.curitem)); + + ginInsertValue(&(gdi->btree), gdi->stack); + + gdi->stack=NULL; + } +} + +Buffer +scanBeginPostingTree( GinPostingTreeScan *gdi ) { + gdi->stack = ginFindLeafPage( &gdi->btree, gdi->stack ); + return gdi->stack->buffer; +} + |