aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/gin/README15
-rw-r--r--src/backend/access/gin/ginbtree.c2
-rw-r--r--src/backend/access/gin/ginvacuum.c236
-rw-r--r--src/include/access/gin_private.h2
4 files changed, 145 insertions, 110 deletions
diff --git a/src/backend/access/gin/README b/src/backend/access/gin/README
index fade0cbb617..990b5ffa581 100644
--- a/src/backend/access/gin/README
+++ b/src/backend/access/gin/README
@@ -314,10 +314,17 @@ deleted.
The previous paragraph's reasoning only applies to searches, and only to
posting trees. To protect from inserters following a downlink to a deleted
page, vacuum simply locks out all concurrent insertions to the posting tree,
-by holding a super-exclusive lock on the posting tree root. Inserters hold a
-pin on the root page, but searches do not, so while new searches cannot begin
-while root page is locked, any already-in-progress scans can continue
-concurrently with vacuum. In the entry tree, we never delete pages.
+by holding a super-exclusive lock on the parent page of subtree with deletable
+pages. Inserters hold a pin on the root page, but searches do not, so while
+new searches cannot begin while root page is locked, any already-in-progress
+scans can continue concurrently with vacuum in corresponding subtree of
+posting tree. To exclude interference with readers vacuum takes exclusive
+locks in a depth-first scan in left-to-right order of page tuples. Leftmost
+page is never deleted. Thus before deleting any page we obtain exclusive
+lock on any left page, effectively excluding deadlock with any reader, despite
+taking parent lock before current and left lock after current. We take left
+lock not for a concurrency reasons, but rather in need to mark page dirty.
+In the entry tree, we never delete pages.
(This is quite different from the mechanism the btree indexam uses to make
page-deletions safe; it stamps the deleted pages with an XID and keeps the
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index 538ad5bb587..b02cb8ae58f 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -31,7 +31,7 @@ static void ginFinishSplit(GinBtree btree, GinBtreeStack *stack,
/*
* Lock buffer by needed method for search.
*/
-static int
+int
ginTraverseLock(Buffer buffer, bool searchMode)
{
Page page;
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index c9ccfeece88..26c077a7bb9 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -109,75 +109,17 @@ xlogVacuumPage(Relation index, Buffer buffer)
PageSetLSN(page, recptr);
}
-static bool
-ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, Buffer *rootBuffer)
-{
- Buffer buffer;
- Page page;
- bool hasVoidPage = FALSE;
- MemoryContext oldCxt;
-
- buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
- RBM_NORMAL, gvs->strategy);
- page = BufferGetPage(buffer);
-
- /*
- * We should be sure that we don't concurrent with inserts, insert process
- * never release root page until end (but it can unlock it and lock
- * again). New scan can't start but previously started ones work
- * concurrently.
- */
- if (isRoot)
- LockBufferForCleanup(buffer);
- else
- LockBuffer(buffer, GIN_EXCLUSIVE);
-
- Assert(GinPageIsData(page));
- if (GinPageIsLeaf(page))
- {
- oldCxt = MemoryContextSwitchTo(gvs->tmpCxt);
- ginVacuumPostingTreeLeaf(gvs->index, buffer, gvs);
- MemoryContextSwitchTo(oldCxt);
- MemoryContextReset(gvs->tmpCxt);
-
- /* if root is a leaf page, we don't desire further processing */
- if (!isRoot && !hasVoidPage && GinDataLeafPageIsEmpty(page))
- hasVoidPage = TRUE;
- }
- else
- {
- OffsetNumber i;
- bool isChildHasVoid = FALSE;
-
- for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++)
- {
- PostingItem *pitem = GinDataPageGetPostingItem(page, i);
-
- if (ginVacuumPostingTreeLeaves(gvs, PostingItemGetBlockNumber(pitem), FALSE, NULL))
- isChildHasVoid = TRUE;
- }
-
- if (isChildHasVoid)
- hasVoidPage = TRUE;
- }
+typedef struct DataPageDeleteStack
+{
+ struct DataPageDeleteStack *child;
+ struct DataPageDeleteStack *parent;
- /*
- * if we have root and there are empty pages in tree, then we don't
- * release lock to go further processing and guarantee that tree is unused
- */
- if (!(isRoot && hasVoidPage))
- {
- UnlockReleaseBuffer(buffer);
- }
- else
- {
- Assert(rootBuffer);
- *rootBuffer = buffer;
- }
+ BlockNumber blkno; /* current block number */
+ BlockNumber leftBlkno; /* rightest non-deleted page on left */
+ bool isRoot;
+} DataPageDeleteStack;
- return hasVoidPage;
-}
/*
* Delete a posting tree page.
@@ -194,8 +136,13 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
BlockNumber rightlink;
/*
- * Lock the pages in the same order as an insertion would, to avoid
- * deadlocks: left, then right, then parent.
+ * This function MUST be called only if someone of parent pages hold
+ * exclusive cleanup lock. This guarantees that no insertions currently
+ * happen in this subtree. Caller also acquire Exclusive lock on deletable
+ * page and is acquiring and releasing exclusive lock on left page before.
+ * Left page was locked and released. Then parent and this page are locked.
+ * We acquire left page lock here only to mark page dirty after changing
+ * right pointer.
*/
lBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, leftBlkno,
RBM_NORMAL, gvs->strategy);
@@ -205,10 +152,6 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
RBM_NORMAL, gvs->strategy);
LockBuffer(lBuffer, GIN_EXCLUSIVE);
- LockBuffer(dBuffer, GIN_EXCLUSIVE);
- if (!isParentRoot) /* parent is already locked by
- * LockBufferForCleanup() */
- LockBuffer(pBuffer, GIN_EXCLUSIVE);
START_CRIT_SECTION();
@@ -272,26 +215,15 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
PageSetLSN(BufferGetPage(lBuffer), recptr);
}
- if (!isParentRoot)
- LockBuffer(pBuffer, GIN_UNLOCK);
ReleaseBuffer(pBuffer);
UnlockReleaseBuffer(lBuffer);
- UnlockReleaseBuffer(dBuffer);
+ ReleaseBuffer(dBuffer);
END_CRIT_SECTION();
gvs->result->pages_deleted++;
}
-typedef struct DataPageDeleteStack
-{
- struct DataPageDeleteStack *child;
- struct DataPageDeleteStack *parent;
-
- BlockNumber blkno; /* current block number */
- BlockNumber leftBlkno; /* rightest non-deleted page on left */
- bool isRoot;
-} DataPageDeleteStack;
/*
* scans posting tree and deletes empty pages
@@ -325,6 +257,10 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
RBM_NORMAL, gvs->strategy);
+
+ if(!isRoot)
+ LockBuffer(buffer, GIN_EXCLUSIVE);
+
page = BufferGetPage(buffer);
Assert(GinPageIsData(page));
@@ -359,6 +295,9 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
}
}
+ if(!isRoot)
+ LockBuffer(buffer, GIN_UNLOCK);
+
ReleaseBuffer(buffer);
if (!meDelete)
@@ -367,37 +306,124 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
return meDelete;
}
-static void
-ginVacuumPostingTree(GinVacuumState *gvs, BlockNumber rootBlkno)
+
+/*
+ * Scan through posting tree, delete empty tuples from leaf pages.
+ * Also, this function collects empty subtrees (with all empty leafs).
+ * For parents of these subtrees CleanUp lock is taken, then we call
+ * ScanToDelete. This is done for every inner page, which points to
+ * empty subtree.
+ */
+static bool
+ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot)
{
- Buffer rootBuffer = InvalidBuffer;
- DataPageDeleteStack root,
- *ptr,
- *tmp;
+ Buffer buffer;
+ Page page;
+ bool hasVoidPage = FALSE;
+ MemoryContext oldCxt;
+
+ buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
+ RBM_NORMAL, gvs->strategy);
+ page = BufferGetPage(buffer);
+
+ ginTraverseLock(buffer,false);
- if (ginVacuumPostingTreeLeaves(gvs, rootBlkno, TRUE, &rootBuffer) == FALSE)
+ Assert(GinPageIsData(page));
+
+ if (GinPageIsLeaf(page))
{
- Assert(rootBuffer == InvalidBuffer);
- return;
+ oldCxt = MemoryContextSwitchTo(gvs->tmpCxt);
+ ginVacuumPostingTreeLeaf(gvs->index, buffer, gvs);
+ MemoryContextSwitchTo(oldCxt);
+ MemoryContextReset(gvs->tmpCxt);
+
+ /* if root is a leaf page, we don't desire further processing */
+ if (GinDataLeafPageIsEmpty(page))
+ hasVoidPage = TRUE;
+
+ UnlockReleaseBuffer(buffer);
+
+ return hasVoidPage;
}
+ else
+ {
+ OffsetNumber i;
+ bool hasEmptyChild = FALSE;
+ bool hasNonEmptyChild = FALSE;
+ OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
+ BlockNumber* children = palloc(sizeof(BlockNumber) * (maxoff + 1));
+
+ /*
+ * Read all children BlockNumbers.
+ * Not sure it is safe if there are many concurrent vacuums.
+ */
- memset(&root, 0, sizeof(DataPageDeleteStack));
- root.leftBlkno = InvalidBlockNumber;
- root.isRoot = TRUE;
+ for (i = FirstOffsetNumber; i <= maxoff; i++)
+ {
+ PostingItem *pitem = GinDataPageGetPostingItem(page, i);
- vacuum_delay_point();
+ children[i] = PostingItemGetBlockNumber(pitem);
+ }
- ginScanToDelete(gvs, rootBlkno, TRUE, &root, InvalidOffsetNumber);
+ UnlockReleaseBuffer(buffer);
- ptr = root.child;
- while (ptr)
- {
- tmp = ptr->child;
- pfree(ptr);
- ptr = tmp;
+ for (i = FirstOffsetNumber; i <= maxoff; i++)
+ {
+ if (ginVacuumPostingTreeLeaves(gvs, children[i], FALSE))
+ hasEmptyChild = TRUE;
+ else
+ hasNonEmptyChild = TRUE;
+ }
+
+ pfree(children);
+
+ vacuum_delay_point();
+
+ /*
+ * All subtree is empty - just return TRUE to indicate that parent must
+ * do a cleanup. Unless we are ROOT an there is way to go upper.
+ */
+
+ if(hasEmptyChild && !hasNonEmptyChild && !isRoot)
+ return TRUE;
+
+ if(hasEmptyChild)
+ {
+ DataPageDeleteStack root,
+ *ptr,
+ *tmp;
+
+ buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
+ RBM_NORMAL, gvs->strategy);
+ LockBufferForCleanup(buffer);
+
+ memset(&root, 0, sizeof(DataPageDeleteStack));
+ root.leftBlkno = InvalidBlockNumber;
+ root.isRoot = TRUE;
+
+ ginScanToDelete(gvs, blkno, TRUE, &root, InvalidOffsetNumber);
+
+ ptr = root.child;
+
+ while (ptr)
+ {
+ tmp = ptr->child;
+ pfree(ptr);
+ ptr = tmp;
+ }
+
+ UnlockReleaseBuffer(buffer);
+ }
+
+ /* Here we have deleted all empty subtrees */
+ return FALSE;
}
+}
- UnlockReleaseBuffer(rootBuffer);
+static void
+ginVacuumPostingTree(GinVacuumState *gvs, BlockNumber rootBlkno)
+{
+ ginVacuumPostingTreeLeaves(gvs, rootBlkno, TRUE);
}
/*
diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h
index 34e7339f055..824cc1c9d89 100644
--- a/src/include/access/gin_private.h
+++ b/src/include/access/gin_private.h
@@ -471,4 +471,6 @@ ginCompareItemPointers(ItemPointer a, ItemPointer b)
return -1;
}
+extern int ginTraverseLock(Buffer buffer, bool searchMode);
+
#endif /* GIN_PRIVATE_H */