aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/common/reloptions.c10
-rw-r--r--src/backend/access/gin/Makefile4
-rw-r--r--src/backend/access/gin/ginbulk.c4
-rw-r--r--src/backend/access/gin/gindatapage.c27
-rw-r--r--src/backend/access/gin/ginfast.c866
-rw-r--r--src/backend/access/gin/ginget.c481
-rw-r--r--src/backend/access/gin/gininsert.c58
-rw-r--r--src/backend/access/gin/ginutil.c48
-rw-r--r--src/backend/access/gin/ginvacuum.c46
-rw-r--r--src/backend/access/gin/ginxlog.c215
-rw-r--r--src/backend/access/gist/gistvacuum.c6
-rw-r--r--src/backend/access/hash/hash.c3
-rw-r--r--src/backend/access/index/indexam.c5
-rw-r--r--src/backend/access/nbtree/nbtree.c6
-rw-r--r--src/backend/catalog/index.c3
-rw-r--r--src/backend/commands/analyze.c24
-rw-r--r--src/backend/commands/vacuum.c4
-rw-r--r--src/backend/commands/vacuumlazy.c4
-rw-r--r--src/backend/nodes/tidbitmap.c20
-rw-r--r--src/include/access/genam.h3
-rw-r--r--src/include/access/gin.h166
-rw-r--r--src/include/catalog/catversion.h4
-rw-r--r--src/include/catalog/pg_am.h4
-rw-r--r--src/include/catalog/pg_proc.h4
-rw-r--r--src/include/nodes/tidbitmap.h3
25 files changed, 1871 insertions, 147 deletions
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index b926689c5cb..880f2db5266 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/common/reloptions.c,v 1.23 2009/03/23 16:36:27 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/common/reloptions.c,v 1.24 2009/03/24 20:17:09 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -56,6 +56,14 @@ static relopt_bool boolRelOpts[] =
},
true
},
+ {
+ {
+ "fastupdate",
+ "Enables \"fast update\" feature for this GIN index",
+ RELOPT_KIND_GIN
+ },
+ true
+ },
/* list terminator */
{ { NULL } }
};
diff --git a/src/backend/access/gin/Makefile b/src/backend/access/gin/Makefile
index 08946c88a73..23b75fc1d80 100644
--- a/src/backend/access/gin/Makefile
+++ b/src/backend/access/gin/Makefile
@@ -4,7 +4,7 @@
# Makefile for access/gin
#
# IDENTIFICATION
-# $PostgreSQL: pgsql/src/backend/access/gin/Makefile,v 1.3 2008/02/19 10:30:06 petere Exp $
+# $PostgreSQL: pgsql/src/backend/access/gin/Makefile,v 1.4 2009/03/24 20:17:10 tgl Exp $
#
#-------------------------------------------------------------------------
@@ -14,6 +14,6 @@ include $(top_builddir)/src/Makefile.global
OBJS = ginutil.o gininsert.o ginxlog.o ginentrypage.o gindatapage.o \
ginbtree.o ginscan.o ginget.o ginvacuum.o ginarrayproc.o \
- ginbulk.o
+ ginbulk.o ginfast.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/gin/ginbulk.c b/src/backend/access/gin/ginbulk.c
index 136f80d9977..a7258619aee 100644
--- a/src/backend/access/gin/ginbulk.c
+++ b/src/backend/access/gin/ginbulk.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.14 2009/01/01 17:23:34 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.15 2009/03/24 20:17:10 tgl Exp $
*-------------------------------------------------------------------------
*/
@@ -197,6 +197,8 @@ ginInsertRecordBA(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber att
if (nentry <= 0)
return;
+ Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber);
+
i = nentry - 1;
for (; i > 0; i >>= 1)
nbit++;
diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c
index d0e426c6560..a872d44880c 100644
--- a/src/backend/access/gin/gindatapage.c
+++ b/src/backend/access/gin/gindatapage.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.13 2009/01/01 17:23:34 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/gindatapage.c,v 1.14 2009/03/24 20:17:10 tgl Exp $
*-------------------------------------------------------------------------
*/
@@ -43,8 +43,16 @@ MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPoint
while (aptr - a < na && bptr - b < nb)
{
- if (compareItemPointers(aptr, bptr) > 0)
+ int cmp = compareItemPointers(aptr, bptr);
+
+ if (cmp > 0)
+ *dptr++ = *bptr++;
+ else if (cmp == 0)
+ {
+ /* we want only one copy of the identical items */
*dptr++ = *bptr++;
+ aptr++;
+ }
else
*dptr++ = *aptr++;
}
@@ -630,11 +638,16 @@ insertItemPointer(GinPostingTreeScan *gdi, ItemPointerData *items, uint32 nitem)
gdi->stack = ginFindLeafPage(&gdi->btree, gdi->stack);
if (gdi->btree.findItem(&(gdi->btree), gdi->stack))
- elog(ERROR, "item pointer (%u,%d) already exists",
- ItemPointerGetBlockNumber(gdi->btree.items + gdi->btree.curitem),
- ItemPointerGetOffsetNumber(gdi->btree.items + gdi->btree.curitem));
-
- ginInsertValue(&(gdi->btree), gdi->stack);
+ {
+ /*
+ * gdi->btree.items[gdi->btree.curitem] already exists in index
+ */
+ gdi->btree.curitem++;
+ LockBuffer(gdi->stack->buffer, GIN_UNLOCK);
+ freeGinBtreeStack(gdi->stack);
+ }
+ else
+ ginInsertValue(&(gdi->btree), gdi->stack);
gdi->stack = NULL;
}
diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c
new file mode 100644
index 00000000000..d8624237ec1
--- /dev/null
+++ b/src/backend/access/gin/ginfast.c
@@ -0,0 +1,866 @@
+/*-------------------------------------------------------------------------
+ *
+ * ginfast.c
+ * Fast insert routines for the Postgres inverted index access method.
+ * Pending entries are stored in linear list of pages. Later on
+ * (typically during VACUUM), ginInsertCleanup() will be invoked to
+ * transfer pending entries into the regular index structure. This
+ * wins because bulk insertion is much more efficient than retail.
+ *
+ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginfast.c,v 1.1 2009/03/24 20:17:10 tgl Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/gin.h"
+#include "access/tuptoaster.h"
+#include "catalog/index.h"
+#include "commands/vacuum.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "utils/memutils.h"
+
+
+#define GIN_PAGE_FREESIZE \
+ ( BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(GinPageOpaqueData)) )
+
+typedef struct DatumArray
+{
+ Datum *values; /* expansible array */
+ int32 nvalues; /* current number of valid entries */
+ int32 maxvalues; /* allocated size of array */
+} DatumArray;
+
+
+/*
+ * Build a pending-list page from the given array of tuples, and write it out.
+ */
+static int32
+writeListPage(Relation index, Buffer buffer,
+ IndexTuple *tuples, int32 ntuples, BlockNumber rightlink)
+{
+ Page page = BufferGetPage(buffer);
+ int i, freesize, size=0;
+ OffsetNumber l, off;
+ char *workspace;
+ char *ptr;
+
+ /* workspace could be a local array; we use palloc for alignment */
+ workspace = palloc(BLCKSZ);
+
+ START_CRIT_SECTION();
+
+ GinInitBuffer(buffer, GIN_LIST);
+
+ off = FirstOffsetNumber;
+ ptr = workspace;
+
+ for(i=0; i<ntuples; i++)
+ {
+ int this_size = IndexTupleSize(tuples[i]);
+
+ memcpy(ptr, tuples[i], this_size);
+ ptr += this_size;
+ size += this_size;
+
+ l = PageAddItem(page, (Item)tuples[i], this_size, off, false, false);
+
+ if (l == InvalidOffsetNumber)
+ elog(ERROR, "failed to add item to index page in \"%s\"",
+ RelationGetRelationName(index));
+
+ off++;
+ }
+
+ Assert(size <= BLCKSZ); /* else we overran workspace */
+
+ GinPageGetOpaque(page)->rightlink = rightlink;
+
+ /*
+ * tail page may contain only the whole row(s) or final
+ * part of row placed on previous pages
+ */
+ if ( rightlink == InvalidBlockNumber )
+ {
+ GinPageSetFullRow(page);
+ GinPageGetOpaque(page)->maxoff = 1;
+ }
+ else
+ {
+ GinPageGetOpaque(page)->maxoff = 0;
+ }
+
+ freesize = PageGetFreeSpace(page);
+
+ MarkBufferDirty(buffer);
+
+ if (!index->rd_istemp)
+ {
+ XLogRecData rdata[2];
+ ginxlogInsertListPage data;
+ XLogRecPtr recptr;
+
+ rdata[0].buffer = buffer;
+ rdata[0].buffer_std = true;
+ rdata[0].data = (char*)&data;
+ rdata[0].len = sizeof(ginxlogInsertListPage);
+ rdata[0].next = rdata+1;
+
+ rdata[1].buffer = InvalidBuffer;
+ rdata[1].data = workspace;
+ rdata[1].len = size;
+ rdata[1].next = NULL;
+
+ data.blkno = BufferGetBlockNumber(buffer);
+ data.rightlink = rightlink;
+ data.ntuples = ntuples;
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE, rdata);
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
+ }
+
+ UnlockReleaseBuffer(buffer);
+
+ END_CRIT_SECTION();
+
+ pfree(workspace);
+
+ return freesize;
+}
+
+static void
+makeSublist(Relation index, IndexTuple *tuples, int32 ntuples,
+ GinMetaPageData *res)
+{
+ Buffer curBuffer = InvalidBuffer;
+ Buffer prevBuffer = InvalidBuffer;
+ int i, size = 0, tupsize;
+ int startTuple = 0;
+
+ Assert(ntuples > 0);
+
+ /*
+ * Split tuples into pages
+ */
+ for(i=0;i<ntuples;i++)
+ {
+ if ( curBuffer == InvalidBuffer )
+ {
+ curBuffer = GinNewBuffer(index);
+
+ if ( prevBuffer != InvalidBuffer )
+ {
+ res->nPendingPages++;
+ writeListPage(index, prevBuffer,
+ tuples+startTuple, i-startTuple,
+ BufferGetBlockNumber(curBuffer));
+ }
+ else
+ {
+ res->head = BufferGetBlockNumber(curBuffer);
+ }
+
+ prevBuffer = curBuffer;
+ startTuple = i;
+ size = 0;
+ }
+
+ tupsize = MAXALIGN(IndexTupleSize(tuples[i])) + sizeof(ItemIdData);
+
+ if ( size + tupsize >= GinListPageSize )
+ {
+ /* won't fit, force a new page and reprocess */
+ i--;
+ curBuffer = InvalidBuffer;
+ }
+ else
+ {
+ size += tupsize;
+ }
+ }
+
+ /*
+ * Write last page
+ */
+ res->tail = BufferGetBlockNumber(curBuffer);
+ res->tailFreeSize = writeListPage(index, curBuffer,
+ tuples+startTuple, ntuples-startTuple,
+ InvalidBlockNumber);
+ res->nPendingPages++;
+ /* that was only one heap tuple */
+ res->nPendingHeapTuples = 1;
+}
+
+/*
+ * Inserts collected values during normal insertion. Function guarantees
+ * that all values of heap will be stored sequentially, preserving order
+ */
+void
+ginHeapTupleFastInsert(Relation index, GinState *ginstate,
+ GinTupleCollector *collector)
+{
+ Buffer metabuffer;
+ Page metapage;
+ GinMetaPageData *metadata = NULL;
+ XLogRecData rdata[2];
+ Buffer buffer = InvalidBuffer;
+ Page page = NULL;
+ ginxlogUpdateMeta data;
+ bool separateList = false;
+ bool needCleanup = false;
+
+ if ( collector->ntuples == 0 )
+ return;
+
+ data.node = index->rd_node;
+ data.ntuples = 0;
+ data.newRightlink = data.prevTail = InvalidBlockNumber;
+
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].data = (char *) &data;
+ rdata[0].len = sizeof(ginxlogUpdateMeta);
+ rdata[0].next = NULL;
+
+ metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
+ metapage = BufferGetPage(metabuffer);
+
+ if ( collector->sumsize + collector->ntuples * sizeof(ItemIdData) > GIN_PAGE_FREESIZE )
+ {
+ /*
+ * Total size is greater than one page => make sublist
+ */
+ separateList = true;
+ }
+ else
+ {
+ LockBuffer(metabuffer, GIN_EXCLUSIVE);
+ metadata = GinPageGetMeta(metapage);
+
+ if ( metadata->head == InvalidBlockNumber ||
+ collector->sumsize + collector->ntuples * sizeof(ItemIdData) > metadata->tailFreeSize )
+ {
+ /*
+ * Pending list is empty or total size is greater than freespace
+ * on tail page => make sublist
+ *
+ * We unlock metabuffer to keep high concurrency
+ */
+ separateList = true;
+ LockBuffer(metabuffer, GIN_UNLOCK);
+ }
+ }
+
+ if ( separateList )
+ {
+ GinMetaPageData sublist;
+
+ /*
+ * We should make sublist separately and append it to the tail
+ */
+ memset( &sublist, 0, sizeof(GinMetaPageData) );
+
+ makeSublist(index, collector->tuples, collector->ntuples, &sublist);
+
+ /*
+ * metapage was unlocked, see above
+ */
+ LockBuffer(metabuffer, GIN_EXCLUSIVE);
+ metadata = GinPageGetMeta(metapage);
+
+ if ( metadata->head == InvalidBlockNumber )
+ {
+ /*
+ * Sublist becomes main list
+ */
+ START_CRIT_SECTION();
+ memcpy(metadata, &sublist, sizeof(GinMetaPageData) );
+ memcpy(&data.metadata, &sublist, sizeof(GinMetaPageData) );
+ }
+ else
+ {
+ /*
+ * merge lists
+ */
+
+ data.prevTail = metadata->tail;
+ buffer = ReadBuffer(index, metadata->tail);
+ LockBuffer(buffer, GIN_EXCLUSIVE);
+ page = BufferGetPage(buffer);
+ Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber);
+
+ START_CRIT_SECTION();
+
+ GinPageGetOpaque(page)->rightlink = sublist.head;
+ metadata->tail = sublist.tail;
+ metadata->tailFreeSize = sublist.tailFreeSize;
+
+ metadata->nPendingPages += sublist.nPendingPages;
+ metadata->nPendingHeapTuples += sublist.nPendingHeapTuples;
+
+ memcpy(&data.metadata, metadata, sizeof(GinMetaPageData) );
+ data.newRightlink = sublist.head;
+
+ MarkBufferDirty(buffer);
+ }
+ }
+ else
+ {
+ /*
+ * Insert into tail page, metapage is already locked
+ */
+
+ OffsetNumber l, off;
+ int i, tupsize;
+ char *ptr;
+
+ buffer = ReadBuffer(index, metadata->tail);
+ LockBuffer(buffer, GIN_EXCLUSIVE);
+ page = BufferGetPage(buffer);
+ off = (PageIsEmpty(page)) ? FirstOffsetNumber :
+ OffsetNumberNext(PageGetMaxOffsetNumber(page));
+
+ rdata[0].next = rdata + 1;
+
+ rdata[1].buffer = buffer;
+ rdata[1].buffer_std = true;
+ ptr = rdata[1].data = (char *) palloc( collector->sumsize );
+ rdata[1].len = collector->sumsize;
+ rdata[1].next = NULL;
+
+ data.ntuples = collector->ntuples;
+
+ START_CRIT_SECTION();
+
+ /*
+ * Increase counter of heap tuples
+ */
+ Assert( GinPageGetOpaque(page)->maxoff <= metadata->nPendingHeapTuples );
+ GinPageGetOpaque(page)->maxoff++;
+ metadata->nPendingHeapTuples++;
+
+ for(i=0; i<collector->ntuples; i++)
+ {
+ tupsize = IndexTupleSize(collector->tuples[i]);
+ l = PageAddItem(page, (Item)collector->tuples[i], tupsize, off, false, false);
+
+ if (l == InvalidOffsetNumber)
+ elog(ERROR, "failed to add item to index page in \"%s\"",
+ RelationGetRelationName(index));
+
+ memcpy(ptr, collector->tuples[i], tupsize);
+ ptr+=tupsize;
+
+ off++;
+ }
+
+ metadata->tailFreeSize -= collector->sumsize + collector->ntuples * sizeof(ItemIdData);
+ memcpy(&data.metadata, metadata, sizeof(GinMetaPageData) );
+ MarkBufferDirty(buffer);
+ }
+
+ /*
+ * Make real write
+ */
+
+ MarkBufferDirty(metabuffer);
+ if ( !index->rd_istemp )
+ {
+ XLogRecPtr recptr;
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, rdata);
+ PageSetLSN(metapage, recptr);
+ PageSetTLI(metapage, ThisTimeLineID);
+
+ if ( buffer != InvalidBuffer )
+ {
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
+ }
+ }
+
+ if (buffer != InvalidBuffer)
+ UnlockReleaseBuffer(buffer);
+
+ /*
+ * Force pending list cleanup when it becomes too long.
+ * And, ginInsertCleanup could take significant amount of
+ * time, so we prefer to call it when it can do all the work in a
+ * single collection cycle. In non-vacuum mode, it shouldn't
+ * require maintenance_work_mem, so fire it while pending list is
+ * still small enough to fit into work_mem.
+ *
+ * ginInsertCleanup() should not be called inside our CRIT_SECTION.
+ */
+ if ( metadata->nPendingPages * GIN_PAGE_FREESIZE > work_mem * 1024L )
+ needCleanup = true;
+
+ UnlockReleaseBuffer(metabuffer);
+
+ END_CRIT_SECTION();
+
+ if ( needCleanup )
+ ginInsertCleanup(index, ginstate, false, NULL);
+}
+
+/*
+ * Collect values from one tuples to be indexed. All values for
+ * one tuples should be written at once - to guarantee consistent state
+ */
+uint32
+ginHeapTupleFastCollect(Relation index, GinState *ginstate,
+ GinTupleCollector *collector,
+ OffsetNumber attnum, Datum value, ItemPointer item)
+{
+ Datum *entries;
+ int32 i,
+ nentries;
+
+ entries = extractEntriesSU(ginstate, attnum, value, &nentries);
+
+ if (nentries == 0)
+ /* nothing to insert */
+ return 0;
+
+ /*
+ * Allocate/reallocate memory for storing collected tuples
+ */
+ if ( collector->tuples == NULL )
+ {
+ collector->lentuples = nentries * index->rd_att->natts;
+ collector->tuples = (IndexTuple*)palloc(sizeof(IndexTuple) * collector->lentuples);
+ }
+
+ while ( collector->ntuples + nentries > collector->lentuples )
+ {
+ collector->lentuples *= 2;
+ collector->tuples = (IndexTuple*)repalloc( collector->tuples,
+ sizeof(IndexTuple) * collector->lentuples);
+ }
+
+ /*
+ * Creates tuple's array
+ */
+ for (i = 0; i < nentries; i++)
+ {
+ int32 tupsize;
+
+ collector->tuples[collector->ntuples + i] = GinFormTuple(ginstate, attnum, entries[i], NULL, 0);
+ collector->tuples[collector->ntuples + i]->t_tid = *item;
+ tupsize = IndexTupleSize(collector->tuples[collector->ntuples + i]);
+
+ if ( tupsize > TOAST_INDEX_TARGET || tupsize >= GinMaxItemSize)
+ elog(ERROR, "huge tuple");
+
+ collector->sumsize += tupsize;
+ }
+
+ collector->ntuples += nentries;
+
+ return nentries;
+}
+
+/*
+ * Deletes pending list pages up to (not including) newHead page.
+ * If newHead == InvalidBlockNumber then function drops the whole list.
+ *
+ * metapage is pinned and exclusive-locked throughout this function.
+ *
+ * Returns true if another cleanup process is running concurrently
+ * (if so, we can just abandon our own efforts)
+ */
+static bool
+shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
+ IndexBulkDeleteResult *stats)
+{
+ Page metapage;
+ GinMetaPageData *metadata;
+ BlockNumber blknoToDelete;
+
+ metapage = BufferGetPage(metabuffer);
+ metadata = GinPageGetMeta(metapage);
+ blknoToDelete = metadata->head;
+
+ do
+ {
+ Page page;
+ int i;
+ int64 nDeletedHeapTuples = 0;
+ ginxlogDeleteListPages data;
+ XLogRecData rdata[1];
+ Buffer buffers[GIN_NDELETE_AT_ONCE];
+
+ data.node = index->rd_node;
+
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].data = (char *) &data;
+ rdata[0].len = sizeof(ginxlogDeleteListPages);
+ rdata[0].next = NULL;
+
+ data.ndeleted = 0;
+ while (data.ndeleted < GIN_NDELETE_AT_ONCE && blknoToDelete != newHead)
+ {
+ data.toDelete[ data.ndeleted ] = blknoToDelete;
+ buffers[ data.ndeleted ] = ReadBuffer(index, blknoToDelete);
+ LockBuffer( buffers[ data.ndeleted ], GIN_EXCLUSIVE );
+ page = BufferGetPage( buffers[ data.ndeleted ] );
+
+ data.ndeleted++;
+
+ if ( GinPageIsDeleted(page) )
+ {
+ /* concurrent cleanup process is detected */
+ for(i=0;i<data.ndeleted;i++)
+ UnlockReleaseBuffer( buffers[i] );
+
+ return true;
+ }
+
+ nDeletedHeapTuples += GinPageGetOpaque(page)->maxoff;
+ blknoToDelete = GinPageGetOpaque( page )->rightlink;
+ }
+
+ if (stats)
+ stats->pages_deleted += data.ndeleted;
+
+ START_CRIT_SECTION();
+
+ metadata->head = blknoToDelete;
+
+ Assert( metadata->nPendingPages >= data.ndeleted );
+ metadata->nPendingPages -= data.ndeleted;
+ Assert( metadata->nPendingHeapTuples >= nDeletedHeapTuples );
+ metadata->nPendingHeapTuples -= nDeletedHeapTuples;
+
+ if ( blknoToDelete == InvalidBlockNumber )
+ {
+ metadata->tail = InvalidBlockNumber;
+ metadata->tailFreeSize = 0;
+ metadata->nPendingPages = 0;
+ metadata->nPendingHeapTuples = 0;
+ }
+ memcpy( &data.metadata, metadata, sizeof(GinMetaPageData));
+
+ MarkBufferDirty( metabuffer );
+
+ for(i=0; i<data.ndeleted; i++)
+ {
+ page = BufferGetPage( buffers[ i ] );
+ GinPageGetOpaque( page )->flags = GIN_DELETED;
+ MarkBufferDirty( buffers[ i ] );
+ }
+
+ if ( !index->rd_istemp )
+ {
+ XLogRecPtr recptr;
+
+ recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE, rdata);
+ PageSetLSN(metapage, recptr);
+ PageSetTLI(metapage, ThisTimeLineID);
+
+ for(i=0; i<data.ndeleted; i++)
+ {
+ page = BufferGetPage( buffers[ i ] );
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
+ }
+ }
+
+ for(i=0; i<data.ndeleted; i++)
+ UnlockReleaseBuffer( buffers[ i ] );
+
+ END_CRIT_SECTION();
+ } while( blknoToDelete != newHead );
+
+ return false;
+}
+
+/* Add datum to DatumArray, resizing if needed */
+static void
+addDatum(DatumArray *datums, Datum datum)
+{
+ if ( datums->nvalues >= datums->maxvalues)
+ {
+ datums->maxvalues *= 2;
+ datums->values = (Datum*)repalloc(datums->values,
+ sizeof(Datum)*datums->maxvalues);
+ }
+
+ datums->values[ datums->nvalues++ ] = datum;
+}
+
+/*
+ * Go through all tuples >= startoff on page and collect values in memory
+ *
+ * Note that da is just workspace --- it does not carry any state across
+ * calls.
+ */
+static void
+processPendingPage(BuildAccumulator *accum, DatumArray *da,
+ Page page, OffsetNumber startoff)
+{
+ ItemPointerData heapptr;
+ OffsetNumber i,maxoff;
+ OffsetNumber attrnum, curattnum;
+
+ /* reset *da to empty */
+ da->nvalues = 0;
+
+ maxoff = PageGetMaxOffsetNumber(page);
+ Assert( maxoff >= FirstOffsetNumber );
+ ItemPointerSetInvalid(&heapptr);
+ attrnum = 0;
+
+ for (i = startoff; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
+
+ curattnum = gintuple_get_attrnum(accum->ginstate, itup);
+
+ if ( !ItemPointerIsValid(&heapptr) )
+ {
+ heapptr = itup->t_tid;
+ attrnum = curattnum;
+ }
+ else if ( !(ItemPointerEquals(&heapptr, &itup->t_tid) &&
+ curattnum == attrnum) )
+ {
+ /*
+ * We can insert several datums per call, but only for one heap
+ * tuple and one column.
+ */
+ ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues);
+ da->nvalues = 0;
+ heapptr = itup->t_tid;
+ attrnum = curattnum;
+ }
+ addDatum(da, gin_index_getattr(accum->ginstate, itup));
+ }
+
+ ginInsertRecordBA(accum, &heapptr, attrnum, da->values, da->nvalues);
+}
+
+/*
+ * Move tuples from pending pages into regular GIN structure.
+ *
+ * This can be called concurrently by multiple backends, so it must cope.
+ * On first glance it looks completely not concurrent-safe and not crash-safe
+ * either. The reason it's okay is that multiple insertion of the same entry
+ * is detected and treated as a no-op by gininsert.c. If we crash after
+ * posting entries to the main index and before removing them from the
+ * pending list, it's okay because when we redo the posting later on, nothing
+ * bad will happen. Likewise, if two backends simultaneously try to post
+ * a pending entry into the main index, one will succeed and one will do
+ * nothing. We try to notice when someone else is a little bit ahead of
+ * us in the process, but that's just to avoid wasting cycles. Only the
+ * action of removing a page from the pending list really needs exclusive
+ * lock.
+ *
+ * vac_delay indicates that ginInsertCleanup is called from vacuum process,
+ * so call vacuum_delay_point() periodically.
+ * If stats isn't null, we count deleted pending pages into the counts.
+ */
+void
+ginInsertCleanup(Relation index, GinState *ginstate,
+ bool vac_delay, IndexBulkDeleteResult *stats)
+{
+ Buffer metabuffer, buffer;
+ Page metapage, page;
+ GinMetaPageData *metadata;
+ MemoryContext opCtx, oldCtx;
+ BuildAccumulator accum;
+ DatumArray datums;
+ BlockNumber blkno;
+
+ metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
+ LockBuffer(metabuffer, GIN_SHARE);
+ metapage = BufferGetPage(metabuffer);
+ metadata = GinPageGetMeta(metapage);
+
+ if ( metadata->head == InvalidBlockNumber )
+ {
+ /* Nothing to do */
+ UnlockReleaseBuffer(metabuffer);
+ return;
+ }
+
+ /*
+ * Read and lock head of pending list
+ */
+ blkno = metadata->head;
+ buffer = ReadBuffer(index, blkno);
+ LockBuffer(buffer, GIN_SHARE);
+ page = BufferGetPage(buffer);
+
+ LockBuffer(metabuffer, GIN_UNLOCK);
+
+ /*
+ * Initialize. All temporary space will be in opCtx
+ */
+ opCtx = AllocSetContextCreate(CurrentMemoryContext,
+ "GIN insert cleanup temporary context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+
+ oldCtx = MemoryContextSwitchTo(opCtx);
+
+ datums.maxvalues=128;
+ datums.nvalues = 0;
+ datums.values = (Datum*)palloc(sizeof(Datum)*datums.maxvalues);
+
+ ginInitBA(&accum);
+ accum.ginstate = ginstate;
+
+ /*
+ * At the top of this loop, we have pin and lock on the current page
+ * of the pending list. However, we'll release that before exiting
+ * the loop. Note we also have pin but not lock on the metapage.
+ */
+ for(;;)
+ {
+ if ( GinPageIsDeleted(page) )
+ {
+ /* another cleanup process is running concurrently */
+ UnlockReleaseBuffer( buffer );
+ break;
+ }
+
+ /*
+ * read page's datums into memory
+ */
+ processPendingPage(&accum, &datums, page, FirstOffsetNumber);
+
+ if (vac_delay)
+ vacuum_delay_point();
+
+ /*
+ * Is it time to flush memory to disk? Flush if we are at the end
+ * of the pending list, or if we have a full row and memory is
+ * getting full.
+ *
+ * XXX using up maintenance_work_mem here is probably unreasonably
+ * much, since vacuum might already be using that much.
+ */
+ if ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber ||
+ ( GinPageHasFullRow(page) &&
+ accum.allocatedMemory > maintenance_work_mem * 1024L ) )
+ {
+ ItemPointerData *list;
+ uint32 nlist;
+ Datum entry;
+ OffsetNumber maxoff, attnum;
+
+ /*
+ * Unlock current page to increase performance.
+ * Changes of page will be checked later by comparing
+ * maxoff after completion of memory flush.
+ */
+ maxoff = PageGetMaxOffsetNumber(page);
+ LockBuffer(buffer, GIN_UNLOCK);
+
+ /*
+ * Moving collected data into regular structure can take
+ * significant amount of time - so, run it without locking pending
+ * list.
+ */
+ while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
+ {
+ ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
+ if (vac_delay)
+ vacuum_delay_point();
+ }
+
+ /*
+ * Lock the whole list to remove pages
+ */
+ LockBuffer(metabuffer, GIN_EXCLUSIVE);
+ LockBuffer(buffer, GIN_SHARE);
+
+ if ( GinPageIsDeleted(page) )
+ {
+ /* another cleanup process is running concurrently */
+ UnlockReleaseBuffer(buffer);
+ LockBuffer(metabuffer, GIN_UNLOCK);
+ break;
+ }
+
+ /*
+ * While we left the page unlocked, more stuff might have gotten
+ * added to it. If so, process those entries immediately. There
+ * shouldn't be very many, so we don't worry about the fact that
+ * we're doing this with exclusive lock. Insertion algorithm
+ * gurantees that inserted row(s) will not continue on next page.
+ * NOTE: intentionally no vacuum_delay_point in this loop.
+ */
+ if ( PageGetMaxOffsetNumber(page) != maxoff )
+ {
+ ginInitBA(&accum);
+ processPendingPage(&accum, &datums, page, maxoff+1);
+
+ while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
+ ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
+ }
+
+ /*
+ * Remember next page - it will become the new list head
+ */
+ blkno = GinPageGetOpaque(page)->rightlink;
+ UnlockReleaseBuffer(buffer); /* shiftList will do exclusive locking */
+
+ /*
+ * remove readed pages from pending list, at this point all
+ * content of readed pages is in regular structure
+ */
+ if ( shiftList(index, metabuffer, blkno, stats) )
+ {
+ /* another cleanup process is running concurrently */
+ LockBuffer(metabuffer, GIN_UNLOCK);
+ break;
+ }
+
+ Assert( blkno == metadata->head );
+ LockBuffer(metabuffer, GIN_UNLOCK);
+
+ /*
+ * if we removed the whole pending list just exit
+ */
+ if ( blkno == InvalidBlockNumber )
+ break;
+
+ /*
+ * release memory used so far and reinit state
+ */
+ MemoryContextReset(opCtx);
+ ginInitBA(&accum);
+ datums.nvalues = 0;
+ datums.values = (Datum*)palloc(sizeof(Datum)*datums.maxvalues);
+ }
+ else
+ {
+ blkno = GinPageGetOpaque(page)->rightlink;
+ UnlockReleaseBuffer(buffer);
+ }
+
+ /*
+ * Read next page in pending list
+ */
+ CHECK_FOR_INTERRUPTS();
+ buffer = ReadBuffer(index, blkno);
+ LockBuffer(buffer, GIN_SHARE);
+ page = BufferGetPage(buffer);
+ }
+
+ ReleaseBuffer(metabuffer);
+
+ /* Clean up temporary space */
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextDelete(opCtx);
+}
diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c
index 182981498c1..7f9f1236605 100644
--- a/src/backend/access/gin/ginget.c
+++ b/src/backend/access/gin/ginget.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.22 2009/01/10 21:08:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.23 2009/03/24 20:17:10 tgl Exp $
*-------------------------------------------------------------------------
*/
@@ -23,6 +23,15 @@
#include "utils/memutils.h"
+typedef struct pendingPosition
+{
+ Buffer pendingBuffer;
+ OffsetNumber firstOffset;
+ OffsetNumber lastOffset;
+ ItemPointerData item;
+} pendingPosition;
+
+
/*
* Tries to refind previously taken ItemPointer on page.
*/
@@ -258,7 +267,7 @@ computePartialMatchList( GinBtreeData *btree, GinBtreeStack *stack, GinScanEntry
}
/*
- * Start* functions setup begining state of searches: finds correct buffer and pins it.
+ * Start* functions setup beginning state of searches: finds correct buffer and pins it.
*/
static void
startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
@@ -268,6 +277,15 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
Page page;
bool needUnlock = TRUE;
+ entry->buffer = InvalidBuffer;
+ entry->offset = InvalidOffsetNumber;
+ entry->list = NULL;
+ entry->nlist = 0;
+ entry->partialMatch = NULL;
+ entry->partialMatchResult = NULL;
+ entry->reduceResult = FALSE;
+ entry->predictNumberResult = 0;
+
if (entry->master != NULL)
{
entry->isFinished = entry->master->isFinished;
@@ -285,15 +303,6 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
page = BufferGetPage(stackEntry->buffer);
entry->isFinished = TRUE;
- entry->buffer = InvalidBuffer;
- entry->offset = InvalidOffsetNumber;
- entry->list = NULL;
- entry->nlist = 0;
- entry->partialMatch = NULL;
- entry->partialMatchIterator = NULL;
- entry->partialMatchResult = NULL;
- entry->reduceResult = FALSE;
- entry->predictNumberResult = 0;
if ( entry->isPartialMatch )
{
@@ -354,9 +363,10 @@ startScanEntry(Relation index, GinState *ginstate, GinScanEntry entry)
entry->buffer = scanBeginPostingTree(gdi);
/*
- * We keep buffer pinned because we need to prevent deletition
+ * We keep buffer pinned because we need to prevent deletion of
* page during scan. See GIN's vacuum implementation. RefCount
- * is increased to keep buffer pinned after freeGinBtreeStack() call.
+ * is increased to keep buffer pinned after freeGinBtreeStack()
+ * call.
*/
IncrBufferRefCount(entry->buffer);
@@ -536,9 +546,10 @@ entryGetItem(Relation index, GinScanEntry entry)
{
do
{
- if ( entry->partialMatchResult == NULL || entry->offset >= entry->partialMatchResult->ntuples )
+ if (entry->partialMatchResult == NULL ||
+ entry->offset >= entry->partialMatchResult->ntuples)
{
- entry->partialMatchResult = tbm_iterate( entry->partialMatchIterator );
+ entry->partialMatchResult = tbm_iterate(entry->partialMatchIterator);
if ( entry->partialMatchResult == NULL )
{
@@ -548,23 +559,37 @@ entryGetItem(Relation index, GinScanEntry entry)
entry->isFinished = TRUE;
break;
}
- else if ( entry->partialMatchResult->ntuples < 0 )
- {
- /* bitmap became lossy */
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("not enough memory to store result of partial match operator" ),
- errhint("Increase the \"work_mem\" parameter.")));
- }
+
+ /*
+ * reset counter to the beginning of entry->partialMatchResult.
+ * Note: entry->offset is still greater than
+ * partialMatchResult->ntuples if partialMatchResult is
+ * lossy. So, on next call we will get next result from
+ * TIDBitmap.
+ */
entry->offset = 0;
}
- ItemPointerSet(&entry->curItem,
- entry->partialMatchResult->blockno,
- entry->partialMatchResult->offsets[ entry->offset ]);
- entry->offset ++;
+ if ( entry->partialMatchResult->ntuples < 0 )
+ {
+ /*
+ * lossy result, so we need to check the whole page
+ */
+ ItemPointerSetLossyPage(&entry->curItem,
+ entry->partialMatchResult->blockno);
+ /*
+ * We might as well fall out of the loop; we could not
+ * estimate number of results on this page to support correct
+ * reducing of result even if it's enabled
+ */
+ break;
+ }
- } while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry));
+ ItemPointerSet(&entry->curItem,
+ entry->partialMatchResult->blockno,
+ entry->partialMatchResult->offsets[entry->offset]);
+ entry->offset++;
+ } while (entry->reduceResult == TRUE && dropItem(entry));
}
else if (!BufferIsValid(entry->buffer))
{
@@ -618,6 +643,10 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
if (key->entryRes[i])
{
+ /*
+ * Move forward only entries which was the least
+ * on previous call
+ */
if (entry->isFinished == FALSE && entryGetItem(index, entry) == FALSE)
{
if (compareItemPointers(&entry->curItem, &key->curItem) < 0)
@@ -664,6 +693,13 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
*/
*keyrecheck = true;
+ /*
+ * If one of the entry's scans returns lossy result, return it without
+ * checking - we can't suggest anything helpful to consistentFn.
+ */
+ if (ItemPointerIsLossyPage(&key->curItem))
+ return FALSE;
+
oldCtx = MemoryContextSwitchTo(tempCtx);
res = DatumGetBool(FunctionCall4(&ginstate->consistentFn[key->attnum-1],
PointerGetDatum(key->entryRes),
@@ -677,6 +713,337 @@ keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx,
return FALSE;
}
+
+/*
+ * Get ItemPointer of next heap row to be checked from pending list.
+ * Returns false if there are no more.
+ *
+ * The pendingBuffer is presumed pinned and share-locked on entry, and is
+ * pinned and share-locked on success exit. On failure exit it's released.
+ */
+static bool
+scanGetCandidate(IndexScanDesc scan, pendingPosition *pos)
+{
+ OffsetNumber maxoff;
+ Page page;
+ IndexTuple itup;
+
+ ItemPointerSetInvalid( &pos->item );
+ for(;;)
+ {
+ page = BufferGetPage(pos->pendingBuffer);
+
+ maxoff = PageGetMaxOffsetNumber(page);
+ if ( pos->firstOffset > maxoff )
+ {
+ BlockNumber blkno = GinPageGetOpaque(page)->rightlink;
+ if ( blkno == InvalidBlockNumber )
+ {
+ UnlockReleaseBuffer(pos->pendingBuffer);
+ pos->pendingBuffer=InvalidBuffer;
+
+ return false;
+ }
+ else
+ {
+ /*
+ * Here we must prevent deletion of next page by
+ * insertcleanup process, which may be trying to obtain
+ * exclusive lock on current page. So, we lock next
+ * page before releasing the current one
+ */
+ Buffer tmpbuf = ReadBuffer(scan->indexRelation, blkno);
+
+ LockBuffer(tmpbuf, GIN_SHARE);
+ UnlockReleaseBuffer(pos->pendingBuffer);
+
+ pos->pendingBuffer = tmpbuf;
+ pos->firstOffset = FirstOffsetNumber;
+ }
+ }
+ else
+ {
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->firstOffset));
+ pos->item = itup->t_tid;
+ if ( GinPageHasFullRow(page) )
+ {
+ /*
+ * find itempointer to the next row
+ */
+ for(pos->lastOffset = pos->firstOffset+1; pos->lastOffset<=maxoff; pos->lastOffset++)
+ {
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, pos->lastOffset));
+ if (!ItemPointerEquals(&pos->item, &itup->t_tid))
+ break;
+ }
+ }
+ else
+ {
+ /*
+ * All itempointers are the same on this page
+ */
+ pos->lastOffset = maxoff + 1;
+ }
+ break;
+ }
+ }
+
+ return true;
+}
+
+static bool
+matchPartialInPendingList(GinState *ginstate, Page page,
+ OffsetNumber off, OffsetNumber maxoff,
+ Datum value, OffsetNumber attrnum,
+ Datum *datum, bool *datumExtracted,
+ StrategyNumber strategy)
+{
+ IndexTuple itup;
+ int res;
+
+ while ( off < maxoff )
+ {
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, off));
+ if ( attrnum != gintuple_get_attrnum(ginstate, itup) )
+ return false;
+
+ if (datumExtracted[ off-1 ] == false)
+ {
+ datum[ off-1 ] = gin_index_getattr(ginstate, itup);
+ datumExtracted[ off-1 ] = true;
+ }
+
+ res = DatumGetInt32(FunctionCall3(&ginstate->comparePartialFn[attrnum],
+ value,
+ datum[ off-1 ],
+ UInt16GetDatum(strategy)));
+ if ( res == 0 )
+ return true;
+ else if (res>0)
+ return false;
+ }
+
+ return false;
+}
+
+/*
+ * Sets entryRes array for each key by looking at
+ * every entry per indexed value (row) in pending list.
+ * returns true if at least one of datum was matched by key's entry
+ *
+ * The pendingBuffer is presumed pinned and share-locked on entry.
+ */
+static bool
+collectDatumForItem(IndexScanDesc scan, pendingPosition *pos)
+{
+ GinScanOpaque so = (GinScanOpaque) scan->opaque;
+ OffsetNumber attrnum;
+ Page page;
+ IndexTuple itup;
+ int i, j;
+ bool hasMatch = false;
+
+ /*
+ * Resets entryRes
+ */
+ for (i = 0; i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+ memset( key->entryRes, FALSE, key->nentries );
+ }
+
+ for(;;)
+ {
+ Datum datum[ BLCKSZ/sizeof(IndexTupleData) ];
+ bool datumExtracted[ BLCKSZ/sizeof(IndexTupleData) ];
+
+ Assert( pos->lastOffset > pos->firstOffset );
+ memset(datumExtracted + pos->firstOffset - 1, 0, sizeof(bool) * (pos->lastOffset - pos->firstOffset ));
+
+ page = BufferGetPage(pos->pendingBuffer);
+
+ for(i = 0; i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+
+ for(j=0; j<key->nentries; j++)
+ {
+ OffsetNumber StopLow = pos->firstOffset,
+ StopHigh = pos->lastOffset,
+ StopMiddle;
+ GinScanEntry entry = key->scanEntry + j;
+
+ if ( key->entryRes[j] )
+ continue;
+
+ while (StopLow < StopHigh)
+ {
+ StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+
+ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, StopMiddle));
+ attrnum = gintuple_get_attrnum(&so->ginstate, itup);
+
+ if (key->attnum < attrnum)
+ StopHigh = StopMiddle;
+ else if (key->attnum > attrnum)
+ StopLow = StopMiddle + 1;
+ else
+ {
+ int res;
+
+ if (datumExtracted[ StopMiddle-1 ] == false)
+ {
+ datum[ StopMiddle-1 ] = gin_index_getattr(&so->ginstate, itup);
+ datumExtracted[ StopMiddle-1 ] = true;
+ }
+ res = compareEntries(&so->ginstate,
+ entry->attnum,
+ entry->entry,
+ datum[ StopMiddle-1 ]);
+
+ if ( res == 0 )
+ {
+ if ( entry->isPartialMatch )
+ key->entryRes[j] =
+ matchPartialInPendingList(&so->ginstate,
+ page, StopMiddle,
+ pos->lastOffset,
+ entry->entry,
+ entry->attnum,
+ datum,
+ datumExtracted,
+ entry->strategy);
+ else
+ key->entryRes[j] = true;
+ break;
+ }
+ else if ( res < 0 )
+ StopHigh = StopMiddle;
+ else
+ StopLow = StopMiddle + 1;
+ }
+ }
+
+ if ( StopLow>=StopHigh && entry->isPartialMatch )
+ key->entryRes[j] =
+ matchPartialInPendingList(&so->ginstate,
+ page, StopHigh,
+ pos->lastOffset,
+ entry->entry,
+ entry->attnum,
+ datum,
+ datumExtracted,
+ entry->strategy);
+
+ hasMatch |= key->entryRes[j];
+ }
+ }
+
+ pos->firstOffset = pos->lastOffset;
+
+ if ( GinPageHasFullRow(page) )
+ {
+ /*
+ * We scan all values from one tuple, go to next one
+ */
+
+ return hasMatch;
+ }
+ else
+ {
+ ItemPointerData item = pos->item;
+
+ if ( scanGetCandidate(scan, pos) == false || !ItemPointerEquals(&pos->item, &item) )
+ elog(ERROR,"Could not process tuple"); /* XXX should not be here ! */
+ }
+ }
+
+ return hasMatch;
+}
+
+/*
+ * Collect all matched rows from pending list in bitmap
+ */
+static void
+scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids)
+{
+ GinScanOpaque so = (GinScanOpaque) scan->opaque;
+ MemoryContext oldCtx;
+ bool recheck, keyrecheck, match;
+ int i;
+ pendingPosition pos;
+ Buffer metabuffer = ReadBuffer(scan->indexRelation, GIN_METAPAGE_BLKNO);
+ BlockNumber blkno;
+
+ *ntids = 0;
+
+ LockBuffer(metabuffer, GIN_SHARE);
+ blkno = GinPageGetMeta(BufferGetPage(metabuffer))->head;
+
+ /*
+ * fetch head of list before unlocking metapage.
+ * head page must be pinned to prevent deletion by vacuum process
+ */
+ if ( blkno == InvalidBlockNumber )
+ {
+ /* No pending list, so proceed with normal scan */
+ UnlockReleaseBuffer( metabuffer );
+ return;
+ }
+
+ pos.pendingBuffer = ReadBuffer(scan->indexRelation, blkno);
+ LockBuffer(pos.pendingBuffer, GIN_SHARE);
+ pos.firstOffset = FirstOffsetNumber;
+ UnlockReleaseBuffer( metabuffer );
+
+ /*
+ * loop for each heap row
+ */
+ while( scanGetCandidate(scan, &pos) )
+ {
+
+ /*
+ * Check entries in rows and setup entryRes array
+ */
+ if (!collectDatumForItem(scan, &pos))
+ continue;
+
+ /*
+ * check for consistent
+ */
+ oldCtx = MemoryContextSwitchTo(so->tempCtx);
+ recheck = false;
+ match = true;
+
+ for (i = 0; match && i < so->nkeys; i++)
+ {
+ GinScanKey key = so->keys + i;
+
+ keyrecheck = true;
+
+ if ( DatumGetBool(FunctionCall4(&so->ginstate.consistentFn[ key->attnum-1 ],
+ PointerGetDatum(key->entryRes),
+ UInt16GetDatum(key->strategy),
+ key->query,
+ PointerGetDatum(&keyrecheck))) == false )
+ {
+ match = false;
+ }
+
+ recheck |= keyrecheck;
+ }
+
+ MemoryContextSwitchTo(oldCtx);
+ MemoryContextReset(so->tempCtx);
+
+ if ( match )
+ {
+ tbm_add_tuples(tbm, &pos.item, 1, recheck);
+ (*ntids)++;
+ }
+ }
+}
+
/*
* Get heap item pointer from scan
* returns true if found
@@ -720,6 +1087,18 @@ scanGetItem(IndexScanDesc scan, ItemPointerData *item, bool *recheck)
{
int cmp = compareItemPointers(item, &key->curItem);
+ if ( cmp != 0 && (ItemPointerIsLossyPage(item) || ItemPointerIsLossyPage(&key->curItem)) )
+ {
+ /*
+ * if one of ItemPointers points to the whole page then
+ * compare only page's number
+ */
+ if ( ItemPointerGetBlockNumber(item) == ItemPointerGetBlockNumber(&key->curItem) )
+ cmp = 0;
+ else
+ cmp = (ItemPointerGetBlockNumber(item) > ItemPointerGetBlockNumber(&key->curItem)) ? 1 : -1;
+ }
+
if (cmp == 0)
break;
else if (cmp > 0)
@@ -757,9 +1136,26 @@ gingetbitmap(PG_FUNCTION_ARGS)
if (GinIsVoidRes(scan))
PG_RETURN_INT64(0);
+ ntids = 0;
+
+ /*
+ * First, scan the pending list and collect any matching entries into
+ * the bitmap. After we scan a pending item, some other backend could
+ * post it into the main index, and so we might visit it a second time
+ * during the main scan. This is okay because we'll just re-set the
+ * same bit in the bitmap. (The possibility of duplicate visits is a
+ * major reason why GIN can't support the amgettuple API, however.)
+ * Note that it would not do to scan the main index before the pending
+ * list, since concurrent cleanup could then make us miss entries
+ * entirely.
+ */
+ scanPendingInsert(scan, tbm, &ntids);
+
+ /*
+ * Now scan the main index.
+ */
startScan(scan);
- ntids = 0;
for (;;)
{
ItemPointerData iptr;
@@ -770,31 +1166,12 @@ gingetbitmap(PG_FUNCTION_ARGS)
if (!scanGetItem(scan, &iptr, &recheck))
break;
- tbm_add_tuples(tbm, &iptr, 1, recheck);
+ if ( ItemPointerIsLossyPage(&iptr) )
+ tbm_add_page(tbm, ItemPointerGetBlockNumber(&iptr));
+ else
+ tbm_add_tuples(tbm, &iptr, 1, recheck);
ntids++;
}
PG_RETURN_INT64(ntids);
}
-
-Datum
-gingettuple(PG_FUNCTION_ARGS)
-{
- IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0);
- ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1);
- bool res;
-
- if (dir != ForwardScanDirection)
- elog(ERROR, "GIN doesn't support other scan directions than forward");
-
- if (GinIsNewKey(scan))
- newScanKey(scan);
-
- if (GinIsVoidRes(scan))
- PG_RETURN_BOOL(false);
-
- startScan(scan);
- res = scanGetItem(scan, &scan->xs_ctup.t_self, &scan->xs_recheck);
-
- PG_RETURN_BOOL(res);
-}
diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c
index 2ab1105423c..d05882cdb94 100644
--- a/src/backend/access/gin/gininsert.c
+++ b/src/backend/access/gin/gininsert.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.18 2009/01/01 17:23:34 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.19 2009/03/24 20:17:11 tgl Exp $
*-------------------------------------------------------------------------
*/
@@ -138,9 +138,11 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack,
/*
* Inserts only one entry to the index, but it can add more than 1 ItemPointer.
*/
-static void
-ginEntryInsert(Relation index, GinState *ginstate, OffsetNumber attnum, Datum value,
- ItemPointerData *items, uint32 nitem, bool isBuild)
+void
+ginEntryInsert(Relation index, GinState *ginstate,
+ OffsetNumber attnum, Datum value,
+ ItemPointerData *items, uint32 nitem,
+ bool isBuild)
{
GinBtreeData btree;
GinBtreeStack *stack;
@@ -273,7 +275,7 @@ ginbuild(PG_FUNCTION_ARGS)
IndexBuildResult *result;
double reltuples;
GinBuildState buildstate;
- Buffer buffer;
+ Buffer RootBuffer, MetaBuffer;
ItemPointerData *list;
Datum entry;
uint32 nlist;
@@ -286,11 +288,17 @@ ginbuild(PG_FUNCTION_ARGS)
initGinState(&buildstate.ginstate, index);
+ /* initialize the meta page */
+ MetaBuffer = GinNewBuffer(index);
+
/* initialize the root page */
- buffer = GinNewBuffer(index);
+ RootBuffer = GinNewBuffer(index);
+
START_CRIT_SECTION();
- GinInitBuffer(buffer, GIN_LEAF);
- MarkBufferDirty(buffer);
+ GinInitMetabuffer(MetaBuffer);
+ MarkBufferDirty(MetaBuffer);
+ GinInitBuffer(RootBuffer, GIN_LEAF);
+ MarkBufferDirty(RootBuffer);
if (!index->rd_istemp)
{
@@ -303,16 +311,19 @@ ginbuild(PG_FUNCTION_ARGS)
rdata.len = sizeof(RelFileNode);
rdata.next = NULL;
- page = BufferGetPage(buffer);
-
-
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata);
+
+ page = BufferGetPage(RootBuffer);
PageSetLSN(page, recptr);
PageSetTLI(page, ThisTimeLineID);
+ page = BufferGetPage(MetaBuffer);
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
}
- UnlockReleaseBuffer(buffer);
+ UnlockReleaseBuffer(MetaBuffer);
+ UnlockReleaseBuffer(RootBuffer);
END_CRIT_SECTION();
/* build the index */
@@ -417,9 +428,26 @@ gininsert(PG_FUNCTION_ARGS)
initGinState(&ginstate, index);
- for(i=0; i<ginstate.origTupdesc->natts;i++)
- if ( !isnull[i] )
- res += ginHeapTupleInsert(index, &ginstate, (OffsetNumber)(i+1), values[i], ht_ctid);
+ if ( GinGetUseFastUpdate(index) )
+ {
+ GinTupleCollector collector;
+
+ memset(&collector, 0, sizeof(GinTupleCollector));
+ for(i=0; i<ginstate.origTupdesc->natts;i++)
+ if ( !isnull[i] )
+ res += ginHeapTupleFastCollect(index, &ginstate, &collector,
+ (OffsetNumber)(i+1), values[i], ht_ctid);
+
+ ginHeapTupleFastInsert(index, &ginstate, &collector);
+ }
+ else
+ {
+ for(i=0; i<ginstate.origTupdesc->natts;i++)
+ if ( !isnull[i] )
+ res += ginHeapTupleInsert(index, &ginstate,
+ (OffsetNumber)(i+1), values[i], ht_ctid);
+
+ }
MemoryContextSwitchTo(oldCtx);
MemoryContextDelete(insertCtx);
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index 222ea677883..e0951a6a4f8 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.20 2009/01/05 17:14:28 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginutil.c,v 1.21 2009/03/24 20:17:11 tgl Exp $
*-------------------------------------------------------------------------
*/
@@ -57,7 +57,7 @@ initGinState(GinState *state, Relation index)
CurrentMemoryContext);
/*
- * Check opclass capability to do partial match.
+ * Check opclass capability to do partial match.
*/
if ( index_getprocid(index, i+1, GIN_COMPARE_PARTIAL_PROC) != InvalidOid )
{
@@ -88,7 +88,7 @@ gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple)
bool isnull;
/*
- * First attribute is always int16, so we can safely use any
+ * First attribute is always int16, so we can safely use any
* tuple descriptor to obtain first attribute of tuple
*/
res = index_getattr(tuple, FirstOffsetNumber, ginstate->tupdesc[0],
@@ -213,6 +213,22 @@ GinInitBuffer(Buffer b, uint32 f)
GinInitPage(BufferGetPage(b), f, BufferGetPageSize(b));
}
+void
+GinInitMetabuffer(Buffer b)
+{
+ GinMetaPageData *metadata;
+ Page page = BufferGetPage(b);
+
+ GinInitPage(page, GIN_META, BufferGetPageSize(b));
+
+ metadata = GinPageGetMeta(page);
+
+ metadata->head = metadata->tail = InvalidBlockNumber;
+ metadata->tailFreeSize = 0;
+ metadata->nPendingPages = 0;
+ metadata->nPendingHeapTuples = 0;
+}
+
int
compareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, Datum b)
{
@@ -315,10 +331,26 @@ ginoptions(PG_FUNCTION_ARGS)
{
Datum reloptions = PG_GETARG_DATUM(0);
bool validate = PG_GETARG_BOOL(1);
- bytea *result;
+ relopt_value *options;
+ GinOptions *rdopts;
+ int numoptions;
+ static const relopt_parse_elt tab[] = {
+ {"fastupdate", RELOPT_TYPE_BOOL, offsetof(GinOptions, useFastUpdate)}
+ };
+
+ options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIN,
+ &numoptions);
+
+ /* if none set, we're done */
+ if (numoptions == 0)
+ PG_RETURN_NULL();
+
+ rdopts = allocateReloptStruct(sizeof(GinOptions), options, numoptions);
+
+ fillRelOptions((void *) rdopts, sizeof(GinOptions), options, numoptions,
+ validate, tab, lengthof(tab));
+
+ pfree(options);
- result = default_reloptions(reloptions, validate, RELOPT_KIND_GIN);
- if (result)
- PG_RETURN_BYTEA_P(result);
- PG_RETURN_NULL();
+ PG_RETURN_BYTEA_P(rdopts);
}
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index be614a3c9c8..dd98b9fd284 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.27 2009/01/01 17:23:34 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginvacuum.c,v 1.28 2009/03/24 20:17:11 tgl Exp $
*-------------------------------------------------------------------------
*/
@@ -19,8 +19,8 @@
#include "catalog/storage.h"
#include "commands/vacuum.h"
#include "miscadmin.h"
+#include "postmaster/autovacuum.h"
#include "storage/bufmgr.h"
-#include "storage/freespace.h"
#include "storage/indexfsm.h"
#include "storage/lmgr.h"
@@ -593,18 +593,24 @@ ginbulkdelete(PG_FUNCTION_ARGS)
BlockNumber rootOfPostingTree[BLCKSZ / (sizeof(IndexTupleData) + sizeof(ItemId))];
uint32 nRoot;
+ gvs.index = index;
+ gvs.callback = callback;
+ gvs.callback_state = callback_state;
+ gvs.strategy = info->strategy;
+ initGinState(&gvs.ginstate, index);
+
/* first time through? */
if (stats == NULL)
+ {
+ /* Yes, so initialize stats to zeroes */
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+ /* and cleanup any pending inserts */
+ ginInsertCleanup(index, &gvs.ginstate, true, stats);
+ }
+
/* we'll re-count the tuples each time */
stats->num_index_tuples = 0;
-
- gvs.index = index;
gvs.result = stats;
- gvs.callback = callback;
- gvs.callback_state = callback_state;
- gvs.strategy = info->strategy;
- initGinState(&gvs.ginstate, index);
buffer = ReadBufferExtended(index, MAIN_FORKNUM, blkno,
RBM_NORMAL, info->strategy);
@@ -702,10 +708,32 @@ ginvacuumcleanup(PG_FUNCTION_ARGS)
BlockNumber totFreePages;
BlockNumber lastBlock = GIN_ROOT_BLKNO,
lastFilledBlock = GIN_ROOT_BLKNO;
+ GinState ginstate;
- /* Set up all-zero stats if ginbulkdelete wasn't called */
+ /*
+ * In an autovacuum analyze, we want to clean up pending insertions.
+ * Otherwise, an ANALYZE-only call is a no-op.
+ */
+ if (info->analyze_only)
+ {
+ if (IsAutoVacuumWorkerProcess())
+ {
+ initGinState(&ginstate, index);
+ ginInsertCleanup(index, &ginstate, true, stats);
+ }
+ PG_RETURN_POINTER(stats);
+ }
+
+ /*
+ * Set up all-zero stats and cleanup pending inserts
+ * if ginbulkdelete wasn't called
+ */
if (stats == NULL)
+ {
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
+ initGinState(&ginstate, index);
+ ginInsertCleanup(index, &ginstate, true, stats);
+ }
/*
* XXX we always report the heap tuple count as the number of index
diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c
index 362709de330..03cdc1129cf 100644
--- a/src/backend/access/gin/ginxlog.c
+++ b/src/backend/access/gin/ginxlog.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gin/ginxlog.c,v 1.17 2009/01/20 18:59:36 heikki Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gin/ginxlog.c,v 1.18 2009/03/24 20:17:11 tgl Exp $
*-------------------------------------------------------------------------
*/
#include "postgres.h"
@@ -71,20 +71,30 @@ static void
ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
{
RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
- Buffer buffer;
+ Buffer RootBuffer, MetaBuffer;
Page page;
- buffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
- Assert(BufferIsValid(buffer));
- page = (Page) BufferGetPage(buffer);
+ MetaBuffer = XLogReadBuffer(*node, GIN_METAPAGE_BLKNO, true);
+ Assert(BufferIsValid(MetaBuffer));
+ GinInitMetabuffer(MetaBuffer);
+
+ page = (Page) BufferGetPage(MetaBuffer);
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
- GinInitBuffer(buffer, GIN_LEAF);
+ RootBuffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
+ Assert(BufferIsValid(RootBuffer));
+ page = (Page) BufferGetPage(RootBuffer);
+
+ GinInitBuffer(RootBuffer, GIN_LEAF);
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
- MarkBufferDirty(buffer);
- UnlockReleaseBuffer(buffer);
+ MarkBufferDirty(MetaBuffer);
+ UnlockReleaseBuffer(MetaBuffer);
+ MarkBufferDirty(RootBuffer);
+ UnlockReleaseBuffer(RootBuffer);
}
static void
@@ -433,6 +443,174 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
}
}
+static void
+ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
+{
+ ginxlogUpdateMeta *data = (ginxlogUpdateMeta*) XLogRecGetData(record);
+ Buffer metabuffer;
+ Page metapage;
+
+ metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
+ metapage = BufferGetPage(metabuffer);
+
+ if (!XLByteLE(lsn, PageGetLSN(metapage)))
+ {
+ memcpy( GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
+ PageSetLSN(metapage, lsn);
+ PageSetTLI(metapage, ThisTimeLineID);
+ MarkBufferDirty(metabuffer);
+ }
+
+ if ( data->ntuples > 0 )
+ {
+ /*
+ * insert into tail page
+ */
+ if (!(record->xl_info & XLR_BKP_BLOCK_1))
+ {
+ Buffer buffer = XLogReadBuffer(data->node, data->metadata.tail, false);
+ Page page = BufferGetPage(buffer);
+
+ if ( !XLByteLE(lsn, PageGetLSN(page)))
+ {
+ OffsetNumber l, off = (PageIsEmpty(page)) ? FirstOffsetNumber :
+ OffsetNumberNext(PageGetMaxOffsetNumber(page));
+ int i, tupsize;
+ IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogUpdateMeta));
+
+ for(i=0; i<data->ntuples; i++)
+ {
+ tupsize = IndexTupleSize(tuples);
+
+ l = PageAddItem(page, (Item)tuples, tupsize, off, false, false);
+
+ if (l == InvalidOffsetNumber)
+ elog(ERROR, "failed to add item to index page");
+
+ tuples = (IndexTuple)( ((char*)tuples) + tupsize );
+ }
+
+ /*
+ * Increase counter of heap tuples
+ */
+ GinPageGetOpaque(page)->maxoff++;
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ }
+ UnlockReleaseBuffer(buffer);
+ }
+ }
+ else if ( data->prevTail != InvalidBlockNumber )
+ {
+ /*
+ * New tail
+ */
+
+ Buffer buffer = XLogReadBuffer(data->node, data->prevTail, false);
+ Page page = BufferGetPage(buffer);
+
+ if ( !XLByteLE(lsn, PageGetLSN(page)))
+ {
+ GinPageGetOpaque(page)->rightlink = data->newRightlink;
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ }
+ UnlockReleaseBuffer(buffer);
+ }
+
+ UnlockReleaseBuffer(metabuffer);
+}
+
+static void
+ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
+{
+ ginxlogInsertListPage *data = (ginxlogInsertListPage*) XLogRecGetData(record);
+ Buffer buffer;
+ Page page;
+ OffsetNumber l, off = FirstOffsetNumber;
+ int i, tupsize;
+ IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsertListPage));
+
+ if (record->xl_info & XLR_BKP_BLOCK_1)
+ return;
+
+ buffer = XLogReadBuffer(data->node, data->blkno, true);
+ page = BufferGetPage(buffer);
+
+ GinInitBuffer(buffer, GIN_LIST);
+ GinPageGetOpaque(page)->rightlink = data->rightlink;
+ if ( data->rightlink == InvalidBlockNumber )
+ {
+ /* tail of sublist */
+ GinPageSetFullRow(page);
+ GinPageGetOpaque(page)->maxoff = 1;
+ }
+ else
+ {
+ GinPageGetOpaque(page)->maxoff = 0;
+ }
+
+ for(i=0; i<data->ntuples; i++)
+ {
+ tupsize = IndexTupleSize(tuples);
+
+ l = PageAddItem(page, (Item)tuples, tupsize, off, false, false);
+
+ if (l == InvalidOffsetNumber)
+ elog(ERROR, "failed to add item to index page");
+
+ tuples = (IndexTuple)( ((char*)tuples) + tupsize );
+ }
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+
+ UnlockReleaseBuffer(buffer);
+}
+
+static void
+ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
+{
+ ginxlogDeleteListPages *data = (ginxlogDeleteListPages*) XLogRecGetData(record);
+ Buffer metabuffer;
+ Page metapage;
+ int i;
+
+ metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
+ metapage = BufferGetPage(metabuffer);
+
+ if (!XLByteLE(lsn, PageGetLSN(metapage)))
+ {
+ memcpy( GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
+ PageSetLSN(metapage, lsn);
+ PageSetTLI(metapage, ThisTimeLineID);
+ MarkBufferDirty(metabuffer);
+ }
+
+ for(i=0; i<data->ndeleted; i++)
+ {
+ Buffer buffer = XLogReadBuffer(data->node,data->toDelete[i],false);
+ Page page = BufferGetPage(buffer);
+
+ if ( !XLByteLE(lsn, PageGetLSN(page)))
+ {
+ GinPageGetOpaque(page)->flags = GIN_DELETED;
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ }
+
+ UnlockReleaseBuffer(buffer);
+ }
+ UnlockReleaseBuffer(metabuffer);
+}
+
void
gin_redo(XLogRecPtr lsn, XLogRecord *record)
{
@@ -461,6 +639,15 @@ gin_redo(XLogRecPtr lsn, XLogRecord *record)
case XLOG_GIN_DELETE_PAGE:
ginRedoDeletePage(lsn, record);
break;
+ case XLOG_GIN_UPDATE_META_PAGE:
+ ginRedoUpdateMetapage(lsn, record);
+ break;
+ case XLOG_GIN_INSERT_LISTPAGE:
+ ginRedoInsertListPage(lsn, record);
+ break;
+ case XLOG_GIN_DELETE_LISTPAGE:
+ ginRedoDeleteListPages(lsn, record);
+ break;
default:
elog(PANIC, "gin_redo: unknown op code %u", info);
}
@@ -516,6 +703,18 @@ gin_desc(StringInfo buf, uint8 xl_info, char *rec)
appendStringInfo(buf, "Delete page, ");
desc_node(buf, ((ginxlogDeletePage *) rec)->node, ((ginxlogDeletePage *) rec)->blkno);
break;
+ case XLOG_GIN_UPDATE_META_PAGE:
+ appendStringInfo(buf, "Update metapage, ");
+ desc_node(buf, ((ginxlogUpdateMeta *) rec)->node, ((ginxlogUpdateMeta *) rec)->metadata.tail);
+ break;
+ case XLOG_GIN_INSERT_LISTPAGE:
+ appendStringInfo(buf, "Insert new list page, ");
+ desc_node(buf, ((ginxlogInsertListPage *) rec)->node, ((ginxlogInsertListPage *) rec)->blkno);
+ break;
+ case XLOG_GIN_DELETE_LISTPAGE:
+ appendStringInfo(buf, "Delete list pages (%d), ", ((ginxlogDeleteListPages *) rec)->ndeleted);
+ desc_node(buf, ((ginxlogDeleteListPages *) rec)->node, ((ginxlogDeleteListPages *) rec)->metadata.head);
+ break;
default:
elog(PANIC, "gin_desc: unknown op code %u", info);
}
diff --git a/src/backend/access/gist/gistvacuum.c b/src/backend/access/gist/gistvacuum.c
index fcf471cf2e9..01b8512d070 100644
--- a/src/backend/access/gist/gistvacuum.c
+++ b/src/backend/access/gist/gistvacuum.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.42 2009/01/01 17:23:35 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/gist/gistvacuum.c,v 1.43 2009/03/24 20:17:11 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -515,6 +515,10 @@ gistvacuumcleanup(PG_FUNCTION_ARGS)
lastFilledBlock = GIST_ROOT_BLKNO;
bool needLock;
+ /* No-op in ANALYZE ONLY mode */
+ if (info->analyze_only)
+ PG_RETURN_POINTER(stats);
+
/* Set up all-zero stats if gistbulkdelete wasn't called */
if (stats == NULL)
{
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index ab2f67c6385..42fe9554f0f 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.108 2009/01/01 17:23:35 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.109 2009/03/24 20:17:11 tgl Exp $
*
* NOTES
* This file contains only the public interface routines.
@@ -647,6 +647,7 @@ hashvacuumcleanup(PG_FUNCTION_ARGS)
BlockNumber num_pages;
/* If hashbulkdelete wasn't called, return NULL signifying no change */
+ /* Note: this covers the analyze_only case too */
if (stats == NULL)
PG_RETURN_POINTER(NULL);
diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c
index a03b4595ba1..197fa3b041d 100644
--- a/src/backend/access/index/indexam.c
+++ b/src/backend/access/index/indexam.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.112 2009/01/01 17:23:35 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/index/indexam.c,v 1.113 2009/03/24 20:17:12 tgl Exp $
*
* INTERFACE ROUTINES
* index_open - open an index relation by relation OID
@@ -647,7 +647,8 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
* item slot could have been replaced by a newer tuple by the time we get
* to it.
*
- * Returns the number of matching tuples found.
+ * Returns the number of matching tuples found. (Note: this might be only
+ * approximate, so it should only be used for statistical purposes.)
* ----------------
*/
int64
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 84f409e1aca..b8bb1ad4906 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.167 2009/01/01 17:23:35 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.168 2009/03/24 20:17:12 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -557,6 +557,10 @@ btvacuumcleanup(PG_FUNCTION_ARGS)
IndexVacuumInfo *info = (IndexVacuumInfo *) PG_GETARG_POINTER(0);
IndexBulkDeleteResult *stats = (IndexBulkDeleteResult *) PG_GETARG_POINTER(1);
+ /* No-op in ANALYZE ONLY mode */
+ if (info->analyze_only)
+ PG_RETURN_POINTER(stats);
+
/*
* If btbulkdelete was called, we need not do anything, just return the
* stats from the latest btbulkdelete call. If it wasn't called, we must
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index e53f4f52dcf..d1889e16c2c 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.312 2009/01/22 20:16:01 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.313 2009/03/24 20:17:12 tgl Exp $
*
*
* INTERFACE ROUTINES
@@ -1938,6 +1938,7 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
*/
ivinfo.index = indexRelation;
ivinfo.vacuum_full = false;
+ ivinfo.analyze_only = false;
ivinfo.message_level = DEBUG2;
ivinfo.num_heap_tuples = -1;
ivinfo.strategy = NULL;
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 33447b671f1..176ebde0efd 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.133 2009/01/22 20:16:01 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.134 2009/03/24 20:17:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -496,6 +496,28 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt,
/* We skip to here if there were no analyzable columns */
cleanup:
+ /* If this isn't part of VACUUM ANALYZE, let index AMs do cleanup */
+ if (!vacstmt->vacuum)
+ {
+ for (ind = 0; ind < nindexes; ind++)
+ {
+ IndexBulkDeleteResult *stats;
+ IndexVacuumInfo ivinfo;
+
+ ivinfo.index = Irel[ind];
+ ivinfo.vacuum_full = false;
+ ivinfo.analyze_only = true;
+ ivinfo.message_level = elevel;
+ ivinfo.num_heap_tuples = -1; /* not known for sure */
+ ivinfo.strategy = vac_strategy;
+
+ stats = index_vacuum_cleanup(&ivinfo, NULL);
+
+ if (stats)
+ pfree(stats);
+ }
+ }
+
/* Done with indexes */
vac_close_indexes(nindexes, Irel, NoLock);
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 4020bf1b294..78b179827ea 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -13,7 +13,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.385 2009/01/16 13:27:23 heikki Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.386 2009/03/24 20:17:13 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -3388,6 +3388,7 @@ scan_index(Relation indrel, double num_tuples)
ivinfo.index = indrel;
ivinfo.vacuum_full = true;
+ ivinfo.analyze_only = false;
ivinfo.message_level = elevel;
ivinfo.num_heap_tuples = num_tuples;
ivinfo.strategy = vac_strategy;
@@ -3454,6 +3455,7 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
ivinfo.index = indrel;
ivinfo.vacuum_full = true;
+ ivinfo.analyze_only = false;
ivinfo.message_level = elevel;
ivinfo.num_heap_tuples = num_tuples + keep_tuples;
ivinfo.strategy = vac_strategy;
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 4e4624cb132..cb73cfa87a7 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -29,7 +29,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.118 2009/01/22 19:25:00 heikki Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.119 2009/03/24 20:17:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -875,6 +875,7 @@ lazy_vacuum_index(Relation indrel,
ivinfo.index = indrel;
ivinfo.vacuum_full = false;
+ ivinfo.analyze_only = false;
ivinfo.message_level = elevel;
/* We don't yet know rel_tuples, so pass -1 */
ivinfo.num_heap_tuples = -1;
@@ -906,6 +907,7 @@ lazy_cleanup_index(Relation indrel,
ivinfo.index = indrel;
ivinfo.vacuum_full = false;
+ ivinfo.analyze_only = false;
ivinfo.message_level = elevel;
ivinfo.num_heap_tuples = vacrelstats->rel_tuples;
ivinfo.strategy = vac_strategy;
diff --git a/src/backend/nodes/tidbitmap.c b/src/backend/nodes/tidbitmap.c
index e214bbb7634..e56b4696b4f 100644
--- a/src/backend/nodes/tidbitmap.c
+++ b/src/backend/nodes/tidbitmap.c
@@ -32,7 +32,7 @@
* Copyright (c) 2003-2009, PostgreSQL Global Development Group
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/nodes/tidbitmap.c,v 1.17 2009/01/10 21:08:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/nodes/tidbitmap.c,v 1.18 2009/03/24 20:17:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -310,6 +310,22 @@ tbm_add_tuples(TIDBitmap *tbm, const ItemPointer tids, int ntids,
}
/*
+ * tbm_add_page - add a whole page to a TIDBitmap
+ *
+ * This causes the whole page to be reported (with the recheck flag)
+ * when the TIDBitmap is scanned.
+ */
+void
+tbm_add_page(TIDBitmap *tbm, BlockNumber pageno)
+{
+ /* Enter the page in the bitmap, or mark it lossy if already present */
+ tbm_mark_page_lossy(tbm, pageno);
+ /* If we went over the memory limit, lossify some more pages */
+ if (tbm->nentries > tbm->maxentries)
+ tbm_lossify(tbm);
+}
+
+/*
* tbm_union - set union
*
* a is modified in-place, b is not changed
@@ -496,7 +512,7 @@ tbm_intersect_page(TIDBitmap *a, PagetableEntry *apage, const TIDBitmap *b)
{
/*
* Some of the tuples in 'a' might not satisfy the quals for 'b',
- * but because the page 'b' is lossy, we don't know which ones.
+ * but because the page 'b' is lossy, we don't know which ones.
* Therefore we mark 'a' as requiring rechecks, to indicate that
* at most those tuples set in 'a' are matches.
*/
diff --git a/src/include/access/genam.h b/src/include/access/genam.h
index b1048504c2c..65fd7f73310 100644
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/genam.h,v 1.75 2009/01/01 17:23:55 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/access/genam.h,v 1.76 2009/03/24 20:17:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -41,6 +41,7 @@ typedef struct IndexVacuumInfo
{
Relation index; /* the index being vacuumed */
bool vacuum_full; /* VACUUM FULL (we have exclusive lock) */
+ bool analyze_only; /* ANALYZE (without any actual vacuum) */
int message_level; /* ereport level for progress messages */
double num_heap_tuples; /* tuples remaining in heap */
BufferAccessStrategy strategy; /* access strategy for reads */
diff --git a/src/include/access/gin.h b/src/include/access/gin.h
index 1425333221d..f0f45bc5e8a 100644
--- a/src/include/access/gin.h
+++ b/src/include/access/gin.h
@@ -4,11 +4,9 @@
*
* Copyright (c) 2006-2009, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.28 2009/01/10 21:08:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.29 2009/03/24 20:17:14 tgl Exp $
*--------------------------------------------------------------------------
*/
-
-
#ifndef GIN_H
#define GIN_H
@@ -16,11 +14,6 @@
#include "access/itup.h"
#include "access/xlog.h"
#include "fmgr.h"
-#include "nodes/tidbitmap.h"
-#include "storage/block.h"
-#include "storage/buf.h"
-#include "storage/off.h"
-#include "storage/relfilenode.h"
/*
@@ -43,20 +36,52 @@
typedef struct GinPageOpaqueData
{
BlockNumber rightlink; /* next page if any */
- OffsetNumber maxoff; /* number entries on GIN_DATA page: number of
+ OffsetNumber maxoff; /* number entries on GIN_DATA page; number of
* heap ItemPointer on GIN_DATA|GIN_LEAF page
* and number of records on GIN_DATA &
- * ~GIN_LEAF page */
+ * ~GIN_LEAF page. On GIN_LIST page, number of
+ * heap tuples. */
uint16 flags; /* see bit definitions below */
} GinPageOpaqueData;
typedef GinPageOpaqueData *GinPageOpaque;
-#define GIN_ROOT_BLKNO (0)
-
#define GIN_DATA (1 << 0)
#define GIN_LEAF (1 << 1)
#define GIN_DELETED (1 << 2)
+#define GIN_META (1 << 3)
+#define GIN_LIST (1 << 4)
+#define GIN_LIST_FULLROW (1 << 5) /* makes sense only on GIN_LIST page */
+
+/* Page numbers of fixed-location pages */
+#define GIN_METAPAGE_BLKNO (0)
+#define GIN_ROOT_BLKNO (1)
+
+typedef struct GinMetaPageData
+{
+ /*
+ * Pointers to head and tail of pending list, which consists of GIN_LIST
+ * pages. These store fast-inserted entries that haven't yet been moved
+ * into the regular GIN structure.
+ */
+ BlockNumber head;
+ BlockNumber tail;
+
+ /*
+ * Free space in bytes in the pending list's tail page.
+ */
+ uint32 tailFreeSize;
+
+ /*
+ * We store both number of pages and number of heap tuples
+ * that are in the pending list.
+ */
+ BlockNumber nPendingPages;
+ int64 nPendingHeapTuples;
+} GinMetaPageData;
+
+#define GinPageGetMeta(p) \
+ ((GinMetaPageData *) PageGetContents(p))
/*
* Works on page
@@ -68,6 +93,8 @@ typedef GinPageOpaqueData *GinPageOpaque;
#define GinPageSetNonLeaf(page) ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF )
#define GinPageIsData(page) ( GinPageGetOpaque(page)->flags & GIN_DATA )
#define GinPageSetData(page) ( GinPageGetOpaque(page)->flags |= GIN_DATA )
+#define GinPageHasFullRow(page) ( GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW )
+#define GinPageSetFullRow(page) ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW )
#define GinPageIsDeleted(page) ( GinPageGetOpaque(page)->flags & GIN_DELETED)
#define GinPageSetDeleted(page) ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
@@ -76,8 +103,8 @@ typedef GinPageOpaqueData *GinPageOpaque;
#define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber)
/*
- * Define our ItemPointerGet(BlockNumber|GetOffsetNumber)
- * to prevent asserts
+ * We use our own ItemPointerGet(BlockNumber|GetOffsetNumber)
+ * to avoid Asserts, since sometimes the ip_posid isn't "valid"
*/
#define GinItemPointerGetBlockNumber(pointer) \
@@ -86,6 +113,22 @@ typedef GinPageOpaqueData *GinPageOpaque;
#define GinItemPointerGetOffsetNumber(pointer) \
((pointer)->ip_posid)
+#define ItemPointerSetMin(p) \
+ ItemPointerSet((p), (BlockNumber)0, (OffsetNumber)0)
+#define ItemPointerIsMin(p) \
+ (ItemPointerGetOffsetNumber(p) == (OffsetNumber)0 && \
+ ItemPointerGetBlockNumber(p) == (BlockNumber)0)
+#define ItemPointerSetMax(p) \
+ ItemPointerSet((p), InvalidBlockNumber, (OffsetNumber)0xffff)
+#define ItemPointerIsMax(p) \
+ (ItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
+ ItemPointerGetBlockNumber(p) == InvalidBlockNumber)
+#define ItemPointerSetLossyPage(p, b) \
+ ItemPointerSet((p), (b), (OffsetNumber)0xffff)
+#define ItemPointerIsLossyPage(p) \
+ (ItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
+ ItemPointerGetBlockNumber(p) != InvalidBlockNumber)
+
typedef struct
{
BlockIdData child_blkno; /* use it instead of BlockNumber to save space
@@ -135,6 +178,26 @@ typedef struct
- GinPageGetOpaque(page)->maxoff * GinSizeOfItem(page) \
- MAXALIGN(sizeof(GinPageOpaqueData)))
+/*
+ * List pages
+ */
+#define GinListPageSize \
+ ( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GinPageOpaqueData)) )
+
+/*
+ * Storage type for GIN's reloptions
+ */
+typedef struct GinOptions
+{
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ bool useFastUpdate; /* use fast updates? */
+} GinOptions;
+
+#define GIN_DEFAULT_USE_FASTUPDATE true
+#define GinGetUseFastUpdate(relation) \
+ ((relation)->rd_options ? \
+ ((GinOptions *) (relation)->rd_options)->useFastUpdate : GIN_DEFAULT_USE_FASTUPDATE)
+
#define GIN_UNLOCK BUFFER_LOCK_UNLOCK
#define GIN_SHARE BUFFER_LOCK_SHARE
@@ -234,14 +297,52 @@ typedef struct ginxlogDeletePage
BlockNumber rightLink;
} ginxlogDeletePage;
+#define XLOG_GIN_UPDATE_META_PAGE 0x60
+
+typedef struct ginxlogUpdateMeta
+{
+ RelFileNode node;
+ GinMetaPageData metadata;
+ BlockNumber prevTail;
+ BlockNumber newRightlink;
+ int32 ntuples; /* if ntuples > 0 then metadata.tail was updated
+ * with that many tuples; else new sub list was
+ * inserted */
+ /* array of inserted tuples follows */
+} ginxlogUpdateMeta;
+
+#define XLOG_GIN_INSERT_LISTPAGE 0x70
+
+typedef struct ginxlogInsertListPage
+{
+ RelFileNode node;
+ BlockNumber blkno;
+ BlockNumber rightlink;
+ int32 ntuples;
+ /* array of inserted tuples follows */
+} ginxlogInsertListPage;
+
+#define XLOG_GIN_DELETE_LISTPAGE 0x80
+
+#define GIN_NDELETE_AT_ONCE 16
+typedef struct ginxlogDeleteListPages
+{
+ RelFileNode node;
+ GinMetaPageData metadata;
+ int32 ndeleted;
+ BlockNumber toDelete[GIN_NDELETE_AT_ONCE];
+} ginxlogDeleteListPages;
+
+
/* ginutil.c */
extern Datum ginoptions(PG_FUNCTION_ARGS);
extern void initGinState(GinState *state, Relation index);
extern Buffer GinNewBuffer(Relation index);
extern void GinInitBuffer(Buffer b, uint32 f);
extern void GinInitPage(Page page, uint32 f, Size pageSize);
+extern void GinInitMetabuffer(Buffer b);
extern int compareEntries(GinState *ginstate, OffsetNumber attnum, Datum a, Datum b);
-extern int compareAttEntries(GinState *ginstate, OffsetNumber attnum_a, Datum a,
+extern int compareAttEntries(GinState *ginstate, OffsetNumber attnum_a, Datum a,
OffsetNumber attnum_b, Datum b);
extern Datum *extractEntriesS(GinState *ginstate, OffsetNumber attnum, Datum value,
int32 *nentries, bool *needUnique);
@@ -249,9 +350,14 @@ extern Datum *extractEntriesSU(GinState *ginstate, OffsetNumber attnum, Datum va
extern Datum gin_index_getattr(GinState *ginstate, IndexTuple tuple);
extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
+
/* gininsert.c */
extern Datum ginbuild(PG_FUNCTION_ARGS);
extern Datum gininsert(PG_FUNCTION_ARGS);
+extern void ginEntryInsert(Relation index, GinState *ginstate,
+ OffsetNumber attnum, Datum value,
+ ItemPointerData *items, uint32 nitem,
+ bool isBuild);
/* ginxlog.c */
extern void gin_redo(XLogRecPtr lsn, XLogRecord *record);
@@ -319,7 +425,7 @@ extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack);
extern void findParents(GinBtree btree, GinBtreeStack *stack, BlockNumber rootBlkno);
/* ginentrypage.c */
-extern IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key,
+extern IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key,
ItemPointerData *ipd, uint32 nipd);
extern void prepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum,
Datum value, GinState *ginstate);
@@ -440,13 +546,7 @@ extern void newScanKey(IndexScanDesc scan);
/* ginget.c */
extern PGDLLIMPORT int GinFuzzySearchLimit;
-#define ItemPointerSetMax(p) ItemPointerSet( (p), (BlockNumber)0xffffffff, (OffsetNumber)0xffff )
-#define ItemPointerIsMax(p) ( ItemPointerGetBlockNumber(p) == (BlockNumber)0xffffffff && ItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff )
-#define ItemPointerSetMin(p) ItemPointerSet( (p), (BlockNumber)0, (OffsetNumber)0)
-#define ItemPointerIsMin(p) ( ItemPointerGetBlockNumber(p) == (BlockNumber)0 && ItemPointerGetOffsetNumber(p) == (OffsetNumber)0 )
-
extern Datum gingetbitmap(PG_FUNCTION_ARGS);
-extern Datum gingettuple(PG_FUNCTION_ARGS);
/* ginvacuum.c */
extern Datum ginbulkdelete(PG_FUNCTION_ARGS);
@@ -485,8 +585,26 @@ typedef struct
extern void ginInitBA(BuildAccumulator *accum);
extern void ginInsertRecordBA(BuildAccumulator *accum,
- ItemPointer heapptr,
+ ItemPointer heapptr,
OffsetNumber attnum, Datum *entries, int32 nentry);
extern ItemPointerData *ginGetEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *entry, uint32 *n);
-#endif
+/* ginfast.c */
+
+typedef struct GinTupleCollector
+{
+ IndexTuple *tuples;
+ uint32 ntuples;
+ uint32 lentuples;
+ uint32 sumsize;
+} GinTupleCollector;
+
+extern void ginHeapTupleFastInsert(Relation index, GinState *ginstate,
+ GinTupleCollector *collector);
+extern uint32 ginHeapTupleFastCollect(Relation index, GinState *ginstate,
+ GinTupleCollector *collector,
+ OffsetNumber attnum, Datum value, ItemPointer item);
+extern void ginInsertCleanup(Relation index, GinState *ginstate,
+ bool vac_delay, IndexBulkDeleteResult *stats);
+
+#endif /* GIN_H */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 784ba688919..3d4fdc33bd3 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.524 2009/02/24 10:06:34 petere Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.525 2009/03/24 20:17:15 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 200902242
+#define CATALOG_VERSION_NO 200903241
#endif
diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h
index 7736cb6e58a..a92c1f49971 100644
--- a/src/include/catalog/pg_am.h
+++ b/src/include/catalog/pg_am.h
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.61 2009/03/05 23:06:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_am.h,v 1.62 2009/03/24 20:17:15 tgl Exp $
*
* NOTES
* the genbki.sh script reads this file and generates .bki
@@ -118,7 +118,7 @@ DESCR("hash index access method");
DATA(insert OID = 783 ( gist 0 7 f f f t t t t t t 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions ));
DESCR("GiST index access method");
#define GIST_AM_OID 783
-DATA(insert OID = 2742 ( gin 0 5 f f f t t f f t f 0 gininsert ginbeginscan gingettuple gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions ));
+DATA(insert OID = 2742 ( gin 0 5 f f f t t f f t f 0 gininsert ginbeginscan - gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions ));
DESCR("GIN index access method");
#define GIN_AM_OID 2742
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index b0c5be4323f..2f0dbeb2656 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.537 2009/02/24 10:06:34 petere Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.538 2009/03/24 20:17:15 tgl Exp $
*
* NOTES
* The script catalog/genbki.sh reads this file and generates .bki
@@ -4184,8 +4184,6 @@ DATA(insert OID = 2592 ( gist_circle_compress PGNSP PGUID 12 1 0 0 f f f t f i
DESCR("GiST support");
/* GIN */
-DATA(insert OID = 2730 ( gingettuple PGNSP PGUID 12 1 0 0 f f f t f v 2 0 16 "2281 2281" _null_ _null_ _null_ _null_ gingettuple _null_ _null_ _null_ ));
-DESCR("gin(internal)");
DATA(insert OID = 2731 ( gingetbitmap PGNSP PGUID 12 1 0 0 f f f t f v 2 0 20 "2281 2281" _null_ _null_ _null_ _null_ gingetbitmap _null_ _null_ _null_ ));
DESCR("gin(internal)");
DATA(insert OID = 2732 ( gininsert PGNSP PGUID 12 1 0 0 f f f t f v 6 0 16 "2281 2281 2281 2281 2281 2281" _null_ _null_ _null_ _null_ gininsert _null_ _null_ _null_ ));
diff --git a/src/include/nodes/tidbitmap.h b/src/include/nodes/tidbitmap.h
index 93658543e42..97e1d4c9c40 100644
--- a/src/include/nodes/tidbitmap.h
+++ b/src/include/nodes/tidbitmap.h
@@ -15,7 +15,7 @@
*
* Copyright (c) 2003-2009, PostgreSQL Global Development Group
*
- * $PostgreSQL: pgsql/src/include/nodes/tidbitmap.h,v 1.9 2009/01/10 21:08:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/tidbitmap.h,v 1.10 2009/03/24 20:17:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -52,6 +52,7 @@ extern void tbm_free(TIDBitmap *tbm);
extern void tbm_add_tuples(TIDBitmap *tbm,
const ItemPointer tids, int ntids,
bool recheck);
+extern void tbm_add_page(TIDBitmap *tbm, BlockNumber pageno);
extern void tbm_union(TIDBitmap *a, const TIDBitmap *b);
extern void tbm_intersect(TIDBitmap *a, const TIDBitmap *b);