aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/gin/ginbtree.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/gin/ginbtree.c')
-rw-r--r--src/backend/access/gin/ginbtree.c189
1 files changed, 102 insertions, 87 deletions
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index dc593c259fe..fa383719e65 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -17,6 +17,7 @@
#include "access/gin_private.h"
#include "access/xloginsert.h"
#include "miscadmin.h"
+#include "utils/memutils.h"
#include "utils/rel.h"
static void ginFindParents(GinBtree btree, GinBtreeStack *stack);
@@ -312,15 +313,16 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack)
* Insert a new item to a page.
*
* Returns true if the insertion was finished. On false, the page was split and
- * the parent needs to be updated. (a root split returns true as it doesn't
- * need any further action by the caller to complete)
+ * the parent needs to be updated. (A root split returns true as it doesn't
+ * need any further action by the caller to complete.)
*
* When inserting a downlink to an internal page, 'childbuf' contains the
* child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared
- * atomically with the insert. Also, the existing item at the given location
- * is updated to point to 'updateblkno'.
+ * atomically with the insert. Also, the existing item at offset stack->off
+ * in the target page is updated to point to updateblkno.
*
* stack->buffer is locked on entry, and is kept locked.
+ * Likewise for childbuf, if given.
*/
static bool
ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
@@ -328,11 +330,28 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
Buffer childbuf, GinStatsData *buildStats)
{
Page page = BufferGetPage(stack->buffer);
+ bool result;
GinPlaceToPageRC rc;
uint16 xlflags = 0;
Page childpage = NULL;
Page newlpage = NULL,
newrpage = NULL;
+ void *ptp_workspace = NULL;
+ MemoryContext tmpCxt;
+ MemoryContext oldCxt;
+
+ /*
+ * We do all the work of this function and its subfunctions in a temporary
+ * memory context. This avoids leakages and simplifies APIs, since some
+ * subfunctions allocate storage that has to survive until we've finished
+ * the WAL insertion.
+ */
+ tmpCxt = AllocSetContextCreate(CurrentMemoryContext,
+ "ginPlaceToPage temporary context",
+ ALLOCSET_DEFAULT_MINSIZE,
+ ALLOCSET_DEFAULT_INITSIZE,
+ ALLOCSET_DEFAULT_MAXSIZE);
+ oldCxt = MemoryContextSwitchTo(tmpCxt);
if (GinPageIsData(page))
xlflags |= GIN_INSERT_ISDATA;
@@ -350,40 +369,42 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
}
/*
- * Try to put the incoming tuple on the page. placeToPage will decide if
- * the page needs to be split.
- *
- * WAL-logging this operation is a bit funny:
- *
- * We're responsible for calling XLogBeginInsert() and XLogInsert().
- * XLogBeginInsert() must be called before placeToPage, because
- * placeToPage can register some data to the WAL record.
- *
- * If placeToPage returns INSERTED, placeToPage has already called
- * START_CRIT_SECTION() and XLogBeginInsert(), and registered any data
- * required to replay the operation, in block index 0. We're responsible
- * for filling in the main data portion of the WAL record, calling
- * XLogInsert(), and END_CRIT_SECTION.
- *
- * If placeToPage returns SPLIT, we're wholly responsible for WAL logging.
- * Splits happen infrequently, so we just make a full-page image of all
- * the pages involved.
+ * See if the incoming tuple will fit on the page. beginPlaceToPage will
+ * decide if the page needs to be split, and will compute the split
+ * contents if so. See comments for beginPlaceToPage and execPlaceToPage
+ * functions for more details of the API here.
*/
- rc = btree->placeToPage(btree, stack->buffer, stack,
- insertdata, updateblkno,
- &newlpage, &newrpage);
- if (rc == UNMODIFIED)
+ rc = btree->beginPlaceToPage(btree, stack->buffer, stack,
+ insertdata, updateblkno,
+ &ptp_workspace,
+ &newlpage, &newrpage);
+
+ if (rc == GPTP_NO_WORK)
{
- XLogResetInsertion();
- return true;
+ /* Nothing to do */
+ result = true;
}
- else if (rc == INSERTED)
+ else if (rc == GPTP_INSERT)
{
- /* placeToPage did START_CRIT_SECTION() */
+ /* It will fit, perform the insertion */
+ START_CRIT_SECTION();
+
+ if (RelationNeedsWAL(btree->index))
+ {
+ XLogBeginInsert();
+ XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD);
+ if (BufferIsValid(childbuf))
+ XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
+ }
+
+ /* Perform the page update, and register any extra WAL data */
+ btree->execPlaceToPage(btree, stack->buffer, stack,
+ insertdata, updateblkno, ptp_workspace);
+
MarkBufferDirty(stack->buffer);
/* An insert to an internal page finishes the split of the child. */
- if (childbuf != InvalidBuffer)
+ if (BufferIsValid(childbuf))
{
GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
MarkBufferDirty(childbuf);
@@ -395,21 +416,15 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
ginxlogInsert xlrec;
BlockIdData childblknos[2];
- /*
- * placetopage already registered stack->buffer as block 0.
- */
xlrec.flags = xlflags;
- if (childbuf != InvalidBuffer)
- XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
-
XLogRegisterData((char *) &xlrec, sizeof(ginxlogInsert));
/*
* Log information about child if this was an insertion of a
* downlink.
*/
- if (childbuf != InvalidBuffer)
+ if (BufferIsValid(childbuf))
{
BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);
@@ -419,23 +434,29 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT);
PageSetLSN(page, recptr);
- if (childbuf != InvalidBuffer)
+ if (BufferIsValid(childbuf))
PageSetLSN(childpage, recptr);
}
END_CRIT_SECTION();
- return true;
+ /* Insertion is complete. */
+ result = true;
}
- else if (rc == SPLIT)
+ else if (rc == GPTP_SPLIT)
{
- /* Didn't fit, had to split */
+ /*
+ * Didn't fit, need to split. The split has been computed in newlpage
+ * and newrpage, which are pointers to palloc'd pages, not associated
+ * with buffers. stack->buffer is not touched yet.
+ */
Buffer rbuffer;
BlockNumber savedRightLink;
ginxlogSplit data;
Buffer lbuffer = InvalidBuffer;
Page newrootpg = NULL;
+ /* Get a new index page to become the right page */
rbuffer = GinNewBuffer(btree->index);
/* During index build, count the new page */
@@ -449,19 +470,11 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
savedRightLink = GinPageGetOpaque(page)->rightlink;
- /*
- * newlpage and newrpage are pointers to memory pages, not associated
- * with buffers. stack->buffer is not touched yet.
- */
-
+ /* Begin setting up WAL record */
data.node = btree->index->rd_node;
data.flags = xlflags;
- if (childbuf != InvalidBuffer)
+ if (BufferIsValid(childbuf))
{
- Page childpage = BufferGetPage(childbuf);
-
- GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
-
data.leftChildBlkno = BufferGetBlockNumber(childbuf);
data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink;
}
@@ -471,12 +484,12 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
if (stack->parent == NULL)
{
/*
- * split root, so we need to allocate new left page and place
- * pointer on root to left and right page
+ * splitting the root, so we need to allocate new left page and
+ * place pointers to left and right page on root page.
*/
lbuffer = GinNewBuffer(btree->index);
- /* During index build, count the newly-added root page */
+ /* During index build, count the new left page */
if (buildStats)
{
if (btree->isData)
@@ -493,9 +506,9 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
/*
* Construct a new root page containing downlinks to the new left
- * and right pages. (do this in a temporary copy first rather than
- * overwriting the original page directly, so that we can still
- * abort gracefully if this fails.)
+ * and right pages. (Do this in a temporary copy rather than
+ * overwriting the original page directly, since we're not in the
+ * critical section yet.)
*/
newrootpg = PageGetTempPage(newrpage);
GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~(GIN_LEAF | GIN_COMPRESSED), BLCKSZ);
@@ -506,7 +519,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
}
else
{
- /* split non-root page */
+ /* splitting a non-root page */
data.rrlink = savedRightLink;
GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
@@ -515,41 +528,44 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
}
/*
- * Ok, we have the new contents of the left page in a temporary copy
- * now (newlpage), and the newly-allocated right block has been filled
- * in. The original page is still unchanged.
+ * OK, we have the new contents of the left page in a temporary copy
+ * now (newlpage), and likewise for the new contents of the
+ * newly-allocated right block. The original page is still unchanged.
*
* If this is a root split, we also have a temporary page containing
- * the new contents of the root. Copy the new left page to a
- * newly-allocated block, and initialize the (original) root page the
- * new copy. Otherwise, copy over the temporary copy of the new left
- * page over the old left page.
+ * the new contents of the root.
*/
START_CRIT_SECTION();
MarkBufferDirty(rbuffer);
MarkBufferDirty(stack->buffer);
- if (BufferIsValid(childbuf))
- MarkBufferDirty(childbuf);
/*
- * Restore the temporary copies over the real buffers. But don't free
- * the temporary copies yet, WAL record data points to them.
+ * Restore the temporary copies over the real buffers.
*/
if (stack->parent == NULL)
{
+ /* Splitting the root, three pages to update */
MarkBufferDirty(lbuffer);
- memcpy(BufferGetPage(stack->buffer), newrootpg, BLCKSZ);
+ memcpy(page, newrootpg, BLCKSZ);
memcpy(BufferGetPage(lbuffer), newlpage, BLCKSZ);
memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
}
else
{
- memcpy(BufferGetPage(stack->buffer), newlpage, BLCKSZ);
+ /* Normal split, only two pages to update */
+ memcpy(page, newlpage, BLCKSZ);
memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ);
}
+ /* We also clear childbuf's INCOMPLETE_SPLIT flag, if passed */
+ if (BufferIsValid(childbuf))
+ {
+ GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT;
+ MarkBufferDirty(childbuf);
+ }
+
/* write WAL record */
if (RelationNeedsWAL(btree->index))
{
@@ -574,12 +590,13 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
}
if (BufferIsValid(childbuf))
- XLogRegisterBuffer(3, childbuf, 0);
+ XLogRegisterBuffer(3, childbuf, REGBUF_STANDARD);
XLogRegisterData((char *) &data, sizeof(ginxlogSplit));
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT);
- PageSetLSN(BufferGetPage(stack->buffer), recptr);
+
+ PageSetLSN(page, recptr);
PageSetLSN(BufferGetPage(rbuffer), recptr);
if (stack->parent == NULL)
PageSetLSN(BufferGetPage(lbuffer), recptr);
@@ -589,33 +606,31 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
END_CRIT_SECTION();
/*
- * We can release the lock on the right page now, but keep the
- * original buffer locked.
+ * We can release the locks/pins on the new pages now, but keep
+ * stack->buffer locked. childbuf doesn't get unlocked either.
*/
UnlockReleaseBuffer(rbuffer);
if (stack->parent == NULL)
UnlockReleaseBuffer(lbuffer);
- pfree(newlpage);
- pfree(newrpage);
- if (newrootpg)
- pfree(newrootpg);
-
/*
* If we split the root, we're done. Otherwise the split is not
* complete until the downlink for the new page has been inserted to
* the parent.
*/
- if (stack->parent == NULL)
- return true;
- else
- return false;
+ result = (stack->parent == NULL);
}
else
{
- elog(ERROR, "unknown return code from GIN placeToPage method: %d", rc);
- return false; /* keep compiler quiet */
+ elog(ERROR, "invalid return code from GIN placeToPage method: %d", rc);
+ result = false; /* keep compiler quiet */
}
+
+ /* Clean up temp context */
+ MemoryContextSwitchTo(oldCxt);
+ MemoryContextDelete(tmpCxt);
+
+ return result;
}
/*