aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/gin
diff options
context:
space:
mode:
authorAlexander Korotkov <akorotkov@postgresql.org>2018-12-13 06:12:31 +0300
committerAlexander Korotkov <akorotkov@postgresql.org>2018-12-13 06:55:34 +0300
commit52ac6cd2d0cd70e01291e0ac4ee6d068b69bc478 (patch)
tree7ff664d30de7023776e3338a6c22a635bc065d5f /src/backend/access/gin
parentc6ade7a8cd3135af0c5d29abf39a6a83b9f6a66a (diff)
downloadpostgresql-52ac6cd2d0cd70e01291e0ac4ee6d068b69bc478.tar.gz
postgresql-52ac6cd2d0cd70e01291e0ac4ee6d068b69bc478.zip
Prevent GIN deleted pages from being reclaimed too early
When GIN vacuum deletes a posting tree page, it assumes that no concurrent searchers can access it, thanks to ginStepRight() locking two pages at once. However, since 9.4 searches can skip parts of posting trees descending from the root. That leads to the risk that page is deleted and reclaimed before concurrent search can access it. This commit prevents the risk of above by waiting for every transaction, which might wait to reference this page, to finish. Due to binary compatibility we can't change GinPageOpaqueData to store corresponding transaction id. Instead we reuse page header pd_prune_xid field, which is unused in index pages. Discussion: https://postgr.es/m/31a702a.14dd.166c1366ac1.Coremail.chjischj%40163.com Author: Andrey Borodin, Alexander Korotkov Reviewed-by: Alexander Korotkov Backpatch-through: 9.4
Diffstat (limited to 'src/backend/access/gin')
-rw-r--r--src/backend/access/gin/README10
-rw-r--r--src/backend/access/gin/ginutil.c7
-rw-r--r--src/backend/access/gin/ginvacuum.c6
-rw-r--r--src/backend/access/gin/ginxlog.c1
4 files changed, 11 insertions, 13 deletions
diff --git a/src/backend/access/gin/README b/src/backend/access/gin/README
index 421b5b26d5b..30c0867829e 100644
--- a/src/backend/access/gin/README
+++ b/src/backend/access/gin/README
@@ -304,12 +304,10 @@ the lock on next page has been acquired.
The downlink is more tricky. A search descending the tree must release the
lock on the parent page before locking the child, or it could deadlock with
a concurrent split of the child page; a page split locks the parent, while
-already holding a lock on the child page. However, posting trees are only
-fully searched from left to right, starting from the leftmost leaf. (The
-tree-structure is only needed by insertions, to quickly find the correct
-insert location). So as long as we don't delete the leftmost page on each
-level, a search can never follow a downlink to page that's about to be
-deleted.
+already holding a lock on the child page. So, deleted page cannot be reclaimed
+immediately. Instead, we have to wait for every transaction, which might wait
+to reference this page, to finish. Corresponding processes must observe that
+the page is marked deleted and recover accordingly.
The previous paragraph's reasoning only applies to searches, and only to
posting trees. To protect from inserters following a downlink to a deleted
diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c
index d7696a1ad03..5ba99f6a346 100644
--- a/src/backend/access/gin/ginutil.c
+++ b/src/backend/access/gin/ginutil.c
@@ -309,12 +309,7 @@ GinNewBuffer(Relation index)
*/
if (ConditionalLockBuffer(buffer))
{
- Page page = BufferGetPage(buffer);
-
- if (PageIsNew(page))
- return buffer; /* OK to use, if never initialized */
-
- if (GinPageIsDeleted(page))
+ if (GinPageIsRecyclable(BufferGetPage(buffer)))
return buffer; /* OK to use */
LockBuffer(buffer, GIN_UNLOCK);
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index bdeb0bf4f52..96609835eeb 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -157,6 +157,9 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
page = BufferGetPage(dBuffer);
rightlink = GinPageGetOpaque(page)->rightlink;
+ /* For deleted page remember last xid which could knew its address */
+ GinPageSetDeleteXid(page, ReadNewTransactionId());
+
/*
* Any insert which would have gone on the leaf block will now go to its
* right sibling.
@@ -213,6 +216,7 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
data.parentOffset = myoff;
data.rightLink = GinPageGetOpaque(page)->rightlink;
+ data.deleteXid = GinPageGetDeleteXid(page);
XLogRegisterData((char *) &data, sizeof(ginxlogDeletePage));
@@ -732,7 +736,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
LockBuffer(buffer, GIN_SHARE);
page = (Page) BufferGetPage(buffer);
- if (PageIsNew(page) || GinPageIsDeleted(page))
+ if (GinPageIsRecyclable(page))
{
Assert(blkno != GIN_ROOT_BLKNO);
RecordFreeIndexPage(index, blkno);
diff --git a/src/backend/access/gin/ginxlog.c b/src/backend/access/gin/ginxlog.c
index b626a219dec..b84ecf2ab15 100644
--- a/src/backend/access/gin/ginxlog.c
+++ b/src/backend/access/gin/ginxlog.c
@@ -531,6 +531,7 @@ ginRedoDeletePage(XLogReaderState *record)
page = BufferGetPage(dbuffer);
Assert(GinPageIsData(page));
GinPageGetOpaque(page)->flags = GIN_DELETED;
+ GinPageSetDeleteXid(page, data->deleteXid);
PageSetLSN(page, lsn);
MarkBufferDirty(dbuffer);
}