aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/gist/gistxlog.c
diff options
context:
space:
mode:
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>2019-03-22 13:21:20 +0200
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>2019-03-22 13:21:45 +0200
commit7df159a620b760e289f1795b13542ed1b3e13b87 (patch)
treee4ca41fc59cf7263e32264791617b9ae5a2dca8e /src/backend/access/gist/gistxlog.c
parentdf816f6ad532ad685a3897869a2e64d3a53fe312 (diff)
downloadpostgresql-7df159a620b760e289f1795b13542ed1b3e13b87.tar.gz
postgresql-7df159a620b760e289f1795b13542ed1b3e13b87.zip
Delete empty pages during GiST VACUUM.
To do this, we scan GiST two times. In the first pass we make note of empty leaf pages and internal pages. At second pass we scan through internal pages, looking for downlinks to the empty pages. Deleting internal pages is still not supported, like in nbtree, the last child of an internal page is never deleted. That means that if you have a workload where new keys are always inserted to different area than where old keys are removed, the index will still grow without bound. But the rate of growth will be an order of magnitude slower than before. Author: Andrey Borodin Discussion: https://www.postgresql.org/message-id/B1E4DF12-6CD3-4706-BDBD-BF3283328F60@yandex-team.ru
Diffstat (limited to 'src/backend/access/gist/gistxlog.c')
-rw-r--r--src/backend/access/gist/gistxlog.c115
1 files changed, 115 insertions, 0 deletions
diff --git a/src/backend/access/gist/gistxlog.c b/src/backend/access/gist/gistxlog.c
index 408bd5390af..cb80ab00cd7 100644
--- a/src/backend/access/gist/gistxlog.c
+++ b/src/backend/access/gist/gistxlog.c
@@ -23,6 +23,7 @@
#include "miscadmin.h"
#include "storage/procarray.h"
#include "utils/memutils.h"
+#include "utils/rel.h"
static MemoryContext opCtx; /* working memory for operations */
@@ -508,6 +509,64 @@ gistRedoCreateIndex(XLogReaderState *record)
UnlockReleaseBuffer(buffer);
}
+/* redo page deletion */
+static void
+gistRedoPageDelete(XLogReaderState *record)
+{
+ XLogRecPtr lsn = record->EndRecPtr;
+ gistxlogPageDelete *xldata = (gistxlogPageDelete *) XLogRecGetData(record);
+ Buffer parentBuffer;
+ Buffer leafBuffer;
+
+ if (XLogReadBufferForRedo(record, 0, &leafBuffer) == BLK_NEEDS_REDO)
+ {
+ Page page = (Page) BufferGetPage(leafBuffer);
+
+ GistPageSetDeleteXid(page, xldata->deleteXid);
+ GistPageSetDeleted(page);
+
+ PageSetLSN(page, lsn);
+ MarkBufferDirty(leafBuffer);
+ }
+
+ if (XLogReadBufferForRedo(record, 1, &parentBuffer) == BLK_NEEDS_REDO)
+ {
+ Page page = (Page) BufferGetPage(parentBuffer);
+
+ PageIndexTupleDelete(page, xldata->downlinkOffset);
+
+ PageSetLSN(page, lsn);
+ MarkBufferDirty(parentBuffer);
+ }
+
+ if (BufferIsValid(parentBuffer))
+ UnlockReleaseBuffer(parentBuffer);
+ if (BufferIsValid(leafBuffer))
+ UnlockReleaseBuffer(leafBuffer);
+}
+
+static void
+gistRedoPageReuse(XLogReaderState *record)
+{
+ gistxlogPageReuse *xlrec = (gistxlogPageReuse *) XLogRecGetData(record);
+
+ /*
+ * PAGE_REUSE records exist to provide a conflict point when we reuse
+ * pages in the index via the FSM. That's all they do though.
+ *
+ * latestRemovedXid was the page's deleteXid. The deleteXid <
+ * RecentGlobalXmin test in gistPageRecyclable() conceptually mirrors the
+ * pgxact->xmin > limitXmin test in GetConflictingVirtualXIDs().
+ * Consequently, one XID value achieves the same exclusion effect on
+ * master and standby.
+ */
+ if (InHotStandby)
+ {
+ ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid,
+ xlrec->node);
+ }
+}
+
void
gist_redo(XLogReaderState *record)
{
@@ -529,12 +588,18 @@ gist_redo(XLogReaderState *record)
case XLOG_GIST_DELETE:
gistRedoDeleteRecord(record);
break;
+ case XLOG_GIST_PAGE_REUSE:
+ gistRedoPageReuse(record);
+ break;
case XLOG_GIST_PAGE_SPLIT:
gistRedoPageSplitRecord(record);
break;
case XLOG_GIST_CREATE_INDEX:
gistRedoCreateIndex(record);
break;
+ case XLOG_GIST_PAGE_DELETE:
+ gistRedoPageDelete(record);
+ break;
default:
elog(PANIC, "gist_redo: unknown op code %u", info);
}
@@ -654,6 +719,56 @@ gistXLogSplit(bool page_is_leaf,
}
/*
+ * Write XLOG record describing a page deletion. This also includes removal of
+ * downlink from the parent page.
+ */
+XLogRecPtr
+gistXLogPageDelete(Buffer buffer, TransactionId xid,
+ Buffer parentBuffer, OffsetNumber downlinkOffset)
+{
+ gistxlogPageDelete xlrec;
+ XLogRecPtr recptr;
+
+ xlrec.deleteXid = xid;
+ xlrec.downlinkOffset = downlinkOffset;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, SizeOfGistxlogPageDelete);
+
+ XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
+ XLogRegisterBuffer(1, parentBuffer, REGBUF_STANDARD);
+
+ recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_DELETE);
+
+ return recptr;
+}
+
+/*
+ * Write XLOG record about reuse of a deleted page.
+ */
+void
+gistXLogPageReuse(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
+{
+ gistxlogPageReuse xlrec_reuse;
+
+ /*
+ * Note that we don't register the buffer with the record, because this
+ * operation doesn't modify the page. This record only exists to provide a
+ * conflict point for Hot Standby.
+ */
+
+ /* XLOG stuff */
+ xlrec_reuse.node = rel->rd_node;
+ xlrec_reuse.block = blkno;
+ xlrec_reuse.latestRemovedXid = latestRemovedXid;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec_reuse, SizeOfGistxlogPageReuse);
+
+ XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_REUSE);
+}
+
+/*
* Write XLOG record describing a page update. The update can include any
* number of deletions and/or insertions of tuples on a single index page.
*