aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSimon Riggs <simon@2ndQuadrant.com>2010-02-13 00:59:58 +0000
committerSimon Riggs <simon@2ndQuadrant.com>2010-02-13 00:59:58 +0000
commitfafa374f2d1e04ab265d56cdadb634124364646f (patch)
tree8eed47882514a949f2b7ea7b35939a92c15f151c /src
parent4688869f41ed716a88bb88bf2642e47e27c57e99 (diff)
downloadpostgresql-fafa374f2d1e04ab265d56cdadb634124364646f.tar.gz
postgresql-fafa374f2d1e04ab265d56cdadb634124364646f.zip
Introduce WAL records to log reuse of btree pages, allowing conflict
resolution during Hot Standby. Page reuse interlock requested by Tom. Analysis and patch by me.
Diffstat (limited to 'src')
-rw-r--r--src/backend/access/nbtree/nbtpage.c58
-rw-r--r--src/backend/access/nbtree/nbtxlog.c60
-rw-r--r--src/include/access/nbtree.h15
3 files changed, 111 insertions, 22 deletions
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index b0eff770d0b..5df975e4ec5 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.118 2010/02/08 04:33:53 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.119 2010/02/13 00:59:58 sriggs Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
@@ -447,6 +447,48 @@ _bt_checkpage(Relation rel, Buffer buf)
}
/*
+ * Log the reuse of a page from the FSM.
+ */
+static void
+_bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
+{
+ if (rel->rd_istemp)
+ return;
+
+ /* No ereport(ERROR) until changes are logged */
+ START_CRIT_SECTION();
+
+ /*
+ * We don't do MarkBufferDirty here because we're about initialise
+ * the page, and nobody else can see it yet.
+ */
+
+ /* XLOG stuff */
+ {
+ XLogRecPtr recptr;
+ XLogRecData rdata[1];
+ xl_btree_reuse_page xlrec_reuse;
+
+ xlrec_reuse.node = rel->rd_node;
+ xlrec_reuse.block = blkno;
+ xlrec_reuse.latestRemovedXid = latestRemovedXid;
+ rdata[0].data = (char *) &xlrec_reuse;
+ rdata[0].len = SizeOfBtreeReusePage;
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].next = NULL;
+
+ recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE, rdata);
+
+ /*
+ * We don't do PageSetLSN or PageSetTLI here because
+ * we're about initialise the page, so no need.
+ */
+ }
+
+ END_CRIT_SECTION();
+}
+
+/*
* _bt_getbuf() -- Get a buffer by block number for read or write.
*
* blkno == P_NEW means to get an unallocated index page. The page
@@ -510,7 +552,19 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
{
page = BufferGetPage(buf);
if (_bt_page_recyclable(page))
- {
+ {
+ /*
+ * If we are generating WAL for Hot Standby then create
+ * a WAL record that will allow us to conflict with
+ * queries running on standby.
+ */
+ if (XLogStandbyInfoActive())
+ {
+ BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+
+ _bt_log_reuse_page(rel, blkno, opaque->btpo.xact);
+ }
+
/* Okay to use page. Re-initialize and return it */
_bt_pageinit(page, BufferGetPageSize(buf));
return buf;
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index 83a7c98c14e..f5320fb1039 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.60 2010/02/08 04:33:53 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.61 2010/02/13 00:59:58 sriggs Exp $
*
*-------------------------------------------------------------------------
*/
@@ -814,26 +814,48 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record)
{
uint8 info = record->xl_info & ~XLR_INFO_MASK;
- /*
- * Btree delete records can conflict with standby queries. You might
- * think that vacuum records would conflict as well, but we've handled
- * that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
- * cleaned by the vacuum of the heap and so we can resolve any conflicts
- * just once when that arrives. After that any we know that no conflicts
- * exist from individual btree vacuum records on that index.
- */
- if (InHotStandby && info == XLOG_BTREE_DELETE)
+ if (InHotStandby)
{
- xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
+ switch (info)
+ {
+ case XLOG_BTREE_DELETE:
+ /*
+ * Btree delete records can conflict with standby queries. You might
+ * think that vacuum records would conflict as well, but we've handled
+ * that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest xid
+ * cleaned by the vacuum of the heap and so we can resolve any conflicts
+ * just once when that arrives. After that any we know that no conflicts
+ * exist from individual btree vacuum records on that index.
+ */
+ {
+ xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
- /*
- * XXX Currently we put everybody on death row, because
- * currently _bt_delitems() supplies InvalidTransactionId.
- * This can be fairly painful, so providing a better value
- * here is worth some thought and possibly some effort to
- * improve.
- */
- ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
+ /*
+ * XXX Currently we put everybody on death row, because
+ * currently _bt_delitems() supplies InvalidTransactionId.
+ * This can be fairly painful, so providing a better value
+ * here is worth some thought and possibly some effort to
+ * improve.
+ */
+ ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
+ }
+ break;
+
+ case XLOG_BTREE_REUSE_PAGE:
+ /*
+ * Btree reuse page records exist to provide a conflict point when we
+ * reuse pages in the index via the FSM. That's all it does though.
+ */
+ {
+ xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record);
+
+ ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid, xlrec->node);
+ }
+ return;
+
+ default:
+ break;
+ }
}
/*
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index acbb0cbc7d7..f3898a41408 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.128 2010/02/08 04:33:54 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.129 2010/02/13 00:59:58 sriggs Exp $
*
*-------------------------------------------------------------------------
*/
@@ -221,6 +221,7 @@ typedef struct BTMetaPageData
#define XLOG_BTREE_DELETE_PAGE_HALF 0xB0 /* page deletion that makes
* parent half-dead */
#define XLOG_BTREE_VACUUM 0xC0 /* delete entries on a page during vacuum */
+#define XLOG_BTREE_REUSE_PAGE 0xD0 /* old page is about to be reused from FSM */
/*
* All that we need to find changed index tuple
@@ -322,6 +323,18 @@ typedef struct xl_btree_delete
#define SizeOfBtreeDelete (offsetof(xl_btree_delete, latestRemovedXid) + sizeof(TransactionId))
/*
+ * This is what we need to know about page reuse within btree.
+ */
+typedef struct xl_btree_reuse_page
+{
+ RelFileNode node;
+ BlockNumber block;
+ TransactionId latestRemovedXid;
+} xl_btree_reuse_page;
+
+#define SizeOfBtreeReusePage (sizeof(xl_btree_reuse_page))
+
+/*
* This is what we need to know about vacuum of individual leaf index tuples.
* The WAL record can represent deletion of any number of index tuples on a
* single index page when executed by VACUUM.