diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2016-04-20 14:25:15 -0400 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2016-04-20 14:25:15 -0400 |
commit | bde361fef5ea3c65074a0c95c724fae5ac8a1bb5 (patch) | |
tree | c4afbd5713553e1596aeee77c916cdd8fb7f6bf2 /src/backend/access/gin/ginbtree.c | |
parent | a343e223a5c33a7283a6d8b255c9dbc48dbc5061 (diff) | |
download | postgresql-bde361fef5ea3c65074a0c95c724fae5ac8a1bb5.tar.gz postgresql-bde361fef5ea3c65074a0c95c724fae5ac8a1bb5.zip |
Fix memory leak and other bugs in ginPlaceToPage() & subroutines.
Commit 36a35c550ac114ca turned the interface between ginPlaceToPage and
its subroutines in gindatapage.c and ginentrypage.c into a royal mess:
page-update critical sections were started in one place and finished in
another place not even in the same file, and the very same subroutine
might return having started a critical section or not. Subsequent patches
band-aided over some of the problems with this design by making things
even messier.
One user-visible resulting problem is memory leaks caused by the need for
the subroutines to allocate storage that would survive until ginPlaceToPage
calls XLogInsert (as reported by Julien Rouhaud). This would not typically
be noticeable during retail index updates. It could be visible in a GIN
index build, in the form of memory consumption swelling to several times
the commanded maintenance_work_mem.
Another rather nasty problem is that in the internal-page-splitting code
path, we would clear the child page's GIN_INCOMPLETE_SPLIT flag well before
entering the critical section that it's supposed to be cleared in; a
failure in between would leave the index in a corrupt state. There were
also assorted coding-rule violations with little immediate consequence but
possible long-term hazards, such as beginning an XLogInsert sequence before
entering a critical section, or calling elog(DEBUG) inside a critical
section.
To fix, redefine the API between ginPlaceToPage() and its subroutines
by splitting the subroutines into two parts. The "beginPlaceToPage"
subroutine does what can be done outside a critical section, including
full computation of the result pages into temporary storage when we're
going to split the target page. The "execPlaceToPage" subroutine is called
within a critical section established by ginPlaceToPage(), and it handles
the actual page update in the non-split code path. The critical section,
as well as the XLOG insertion call sequence, are both now always started
and finished in ginPlaceToPage(). Also, make ginPlaceToPage() create and
work in a short-lived memory context to eliminate the leakage problem.
(Since a short-lived memory context had been getting created in the most
common code path in the subroutines, this shouldn't cause any noticeable
performance penalty; we're just moving the overhead up one call level.)
In passing, fix a bunch of comments that had gone unmaintained throughout
all this klugery.
Report: <571276DD.5050303@dalibo.com>
Diffstat (limited to 'src/backend/access/gin/ginbtree.c')
-rw-r--r-- | src/backend/access/gin/ginbtree.c | 189 |
1 files changed, 102 insertions, 87 deletions
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c index dc593c259fe..fa383719e65 100644 --- a/src/backend/access/gin/ginbtree.c +++ b/src/backend/access/gin/ginbtree.c @@ -17,6 +17,7 @@ #include "access/gin_private.h" #include "access/xloginsert.h" #include "miscadmin.h" +#include "utils/memutils.h" #include "utils/rel.h" static void ginFindParents(GinBtree btree, GinBtreeStack *stack); @@ -312,15 +313,16 @@ ginFindParents(GinBtree btree, GinBtreeStack *stack) * Insert a new item to a page. * * Returns true if the insertion was finished. On false, the page was split and - * the parent needs to be updated. (a root split returns true as it doesn't - * need any further action by the caller to complete) + * the parent needs to be updated. (A root split returns true as it doesn't + * need any further action by the caller to complete.) * * When inserting a downlink to an internal page, 'childbuf' contains the * child page that was split. Its GIN_INCOMPLETE_SPLIT flag will be cleared - * atomically with the insert. Also, the existing item at the given location - * is updated to point to 'updateblkno'. + * atomically with the insert. Also, the existing item at offset stack->off + * in the target page is updated to point to updateblkno. * * stack->buffer is locked on entry, and is kept locked. + * Likewise for childbuf, if given. */ static bool ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, @@ -328,11 +330,28 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, Buffer childbuf, GinStatsData *buildStats) { Page page = BufferGetPage(stack->buffer); + bool result; GinPlaceToPageRC rc; uint16 xlflags = 0; Page childpage = NULL; Page newlpage = NULL, newrpage = NULL; + void *ptp_workspace = NULL; + MemoryContext tmpCxt; + MemoryContext oldCxt; + + /* + * We do all the work of this function and its subfunctions in a temporary + * memory context. This avoids leakages and simplifies APIs, since some + * subfunctions allocate storage that has to survive until we've finished + * the WAL insertion. + */ + tmpCxt = AllocSetContextCreate(CurrentMemoryContext, + "ginPlaceToPage temporary context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); + oldCxt = MemoryContextSwitchTo(tmpCxt); if (GinPageIsData(page)) xlflags |= GIN_INSERT_ISDATA; @@ -350,40 +369,42 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, } /* - * Try to put the incoming tuple on the page. placeToPage will decide if - * the page needs to be split. - * - * WAL-logging this operation is a bit funny: - * - * We're responsible for calling XLogBeginInsert() and XLogInsert(). - * XLogBeginInsert() must be called before placeToPage, because - * placeToPage can register some data to the WAL record. - * - * If placeToPage returns INSERTED, placeToPage has already called - * START_CRIT_SECTION() and XLogBeginInsert(), and registered any data - * required to replay the operation, in block index 0. We're responsible - * for filling in the main data portion of the WAL record, calling - * XLogInsert(), and END_CRIT_SECTION. - * - * If placeToPage returns SPLIT, we're wholly responsible for WAL logging. - * Splits happen infrequently, so we just make a full-page image of all - * the pages involved. + * See if the incoming tuple will fit on the page. beginPlaceToPage will + * decide if the page needs to be split, and will compute the split + * contents if so. See comments for beginPlaceToPage and execPlaceToPage + * functions for more details of the API here. */ - rc = btree->placeToPage(btree, stack->buffer, stack, - insertdata, updateblkno, - &newlpage, &newrpage); - if (rc == UNMODIFIED) + rc = btree->beginPlaceToPage(btree, stack->buffer, stack, + insertdata, updateblkno, + &ptp_workspace, + &newlpage, &newrpage); + + if (rc == GPTP_NO_WORK) { - XLogResetInsertion(); - return true; + /* Nothing to do */ + result = true; } - else if (rc == INSERTED) + else if (rc == GPTP_INSERT) { - /* placeToPage did START_CRIT_SECTION() */ + /* It will fit, perform the insertion */ + START_CRIT_SECTION(); + + if (RelationNeedsWAL(btree->index)) + { + XLogBeginInsert(); + XLogRegisterBuffer(0, stack->buffer, REGBUF_STANDARD); + if (BufferIsValid(childbuf)) + XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD); + } + + /* Perform the page update, and register any extra WAL data */ + btree->execPlaceToPage(btree, stack->buffer, stack, + insertdata, updateblkno, ptp_workspace); + MarkBufferDirty(stack->buffer); /* An insert to an internal page finishes the split of the child. */ - if (childbuf != InvalidBuffer) + if (BufferIsValid(childbuf)) { GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT; MarkBufferDirty(childbuf); @@ -395,21 +416,15 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, ginxlogInsert xlrec; BlockIdData childblknos[2]; - /* - * placetopage already registered stack->buffer as block 0. - */ xlrec.flags = xlflags; - if (childbuf != InvalidBuffer) - XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD); - XLogRegisterData((char *) &xlrec, sizeof(ginxlogInsert)); /* * Log information about child if this was an insertion of a * downlink. */ - if (childbuf != InvalidBuffer) + if (BufferIsValid(childbuf)) { BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf)); BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink); @@ -419,23 +434,29 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT); PageSetLSN(page, recptr); - if (childbuf != InvalidBuffer) + if (BufferIsValid(childbuf)) PageSetLSN(childpage, recptr); } END_CRIT_SECTION(); - return true; + /* Insertion is complete. */ + result = true; } - else if (rc == SPLIT) + else if (rc == GPTP_SPLIT) { - /* Didn't fit, had to split */ + /* + * Didn't fit, need to split. The split has been computed in newlpage + * and newrpage, which are pointers to palloc'd pages, not associated + * with buffers. stack->buffer is not touched yet. + */ Buffer rbuffer; BlockNumber savedRightLink; ginxlogSplit data; Buffer lbuffer = InvalidBuffer; Page newrootpg = NULL; + /* Get a new index page to become the right page */ rbuffer = GinNewBuffer(btree->index); /* During index build, count the new page */ @@ -449,19 +470,11 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, savedRightLink = GinPageGetOpaque(page)->rightlink; - /* - * newlpage and newrpage are pointers to memory pages, not associated - * with buffers. stack->buffer is not touched yet. - */ - + /* Begin setting up WAL record */ data.node = btree->index->rd_node; data.flags = xlflags; - if (childbuf != InvalidBuffer) + if (BufferIsValid(childbuf)) { - Page childpage = BufferGetPage(childbuf); - - GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT; - data.leftChildBlkno = BufferGetBlockNumber(childbuf); data.rightChildBlkno = GinPageGetOpaque(childpage)->rightlink; } @@ -471,12 +484,12 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, if (stack->parent == NULL) { /* - * split root, so we need to allocate new left page and place - * pointer on root to left and right page + * splitting the root, so we need to allocate new left page and + * place pointers to left and right page on root page. */ lbuffer = GinNewBuffer(btree->index); - /* During index build, count the newly-added root page */ + /* During index build, count the new left page */ if (buildStats) { if (btree->isData) @@ -493,9 +506,9 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, /* * Construct a new root page containing downlinks to the new left - * and right pages. (do this in a temporary copy first rather than - * overwriting the original page directly, so that we can still - * abort gracefully if this fails.) + * and right pages. (Do this in a temporary copy rather than + * overwriting the original page directly, since we're not in the + * critical section yet.) */ newrootpg = PageGetTempPage(newrpage); GinInitPage(newrootpg, GinPageGetOpaque(newlpage)->flags & ~(GIN_LEAF | GIN_COMPRESSED), BLCKSZ); @@ -506,7 +519,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, } else { - /* split non-root page */ + /* splitting a non-root page */ data.rrlink = savedRightLink; GinPageGetOpaque(newrpage)->rightlink = savedRightLink; @@ -515,41 +528,44 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, } /* - * Ok, we have the new contents of the left page in a temporary copy - * now (newlpage), and the newly-allocated right block has been filled - * in. The original page is still unchanged. + * OK, we have the new contents of the left page in a temporary copy + * now (newlpage), and likewise for the new contents of the + * newly-allocated right block. The original page is still unchanged. * * If this is a root split, we also have a temporary page containing - * the new contents of the root. Copy the new left page to a - * newly-allocated block, and initialize the (original) root page the - * new copy. Otherwise, copy over the temporary copy of the new left - * page over the old left page. + * the new contents of the root. */ START_CRIT_SECTION(); MarkBufferDirty(rbuffer); MarkBufferDirty(stack->buffer); - if (BufferIsValid(childbuf)) - MarkBufferDirty(childbuf); /* - * Restore the temporary copies over the real buffers. But don't free - * the temporary copies yet, WAL record data points to them. + * Restore the temporary copies over the real buffers. */ if (stack->parent == NULL) { + /* Splitting the root, three pages to update */ MarkBufferDirty(lbuffer); - memcpy(BufferGetPage(stack->buffer), newrootpg, BLCKSZ); + memcpy(page, newrootpg, BLCKSZ); memcpy(BufferGetPage(lbuffer), newlpage, BLCKSZ); memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ); } else { - memcpy(BufferGetPage(stack->buffer), newlpage, BLCKSZ); + /* Normal split, only two pages to update */ + memcpy(page, newlpage, BLCKSZ); memcpy(BufferGetPage(rbuffer), newrpage, BLCKSZ); } + /* We also clear childbuf's INCOMPLETE_SPLIT flag, if passed */ + if (BufferIsValid(childbuf)) + { + GinPageGetOpaque(childpage)->flags &= ~GIN_INCOMPLETE_SPLIT; + MarkBufferDirty(childbuf); + } + /* write WAL record */ if (RelationNeedsWAL(btree->index)) { @@ -574,12 +590,13 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD); } if (BufferIsValid(childbuf)) - XLogRegisterBuffer(3, childbuf, 0); + XLogRegisterBuffer(3, childbuf, REGBUF_STANDARD); XLogRegisterData((char *) &data, sizeof(ginxlogSplit)); recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT); - PageSetLSN(BufferGetPage(stack->buffer), recptr); + + PageSetLSN(page, recptr); PageSetLSN(BufferGetPage(rbuffer), recptr); if (stack->parent == NULL) PageSetLSN(BufferGetPage(lbuffer), recptr); @@ -589,33 +606,31 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack, END_CRIT_SECTION(); /* - * We can release the lock on the right page now, but keep the - * original buffer locked. + * We can release the locks/pins on the new pages now, but keep + * stack->buffer locked. childbuf doesn't get unlocked either. */ UnlockReleaseBuffer(rbuffer); if (stack->parent == NULL) UnlockReleaseBuffer(lbuffer); - pfree(newlpage); - pfree(newrpage); - if (newrootpg) - pfree(newrootpg); - /* * If we split the root, we're done. Otherwise the split is not * complete until the downlink for the new page has been inserted to * the parent. */ - if (stack->parent == NULL) - return true; - else - return false; + result = (stack->parent == NULL); } else { - elog(ERROR, "unknown return code from GIN placeToPage method: %d", rc); - return false; /* keep compiler quiet */ + elog(ERROR, "invalid return code from GIN placeToPage method: %d", rc); + result = false; /* keep compiler quiet */ } + + /* Clean up temp context */ + MemoryContextSwitchTo(oldCxt); + MemoryContextDelete(tmpCxt); + + return result; } /* |