Indexes with INCLUDE columns and their support in B-tree

This patch introduces INCLUDE clause to index definition. This clause specifies a list of columns which will be included as a non-key part in the index. The INCLUDE columns exist solely to allow more queries to benefit from index-only scans. Also, such columns don't need to have appropriate operator classes. Expressions are not supported as INCLUDE columns since they cannot be used in index-only scans. Index access methods supporting INCLUDE are indicated by amcaninclude flag in IndexAmRoutine. For now, only B-tree indexes support INCLUDE clause. In B-tree indexes INCLUDE columns are truncated from pivot index tuples (tuples located in non-leaf pages and high keys). Therefore, B-tree indexes now might have variable number of attributes. This patch also provides generic facility to support that: pivot tuples contain number of their attributes in t_tid.ip_posid. Free 13th bit of t_info is used for indicating that. This facility will simplify further support of index suffix truncation. The changes of above are backward-compatible, pg_upgrade doesn't need special handling of B-tree indexes for that. Bump catalog version Author: Anastasia Lubennikova with contribition by Alexander Korotkov and me Reviewed by: Peter Geoghegan, Tomas Vondra, Antonin Houska, Jeff Janes, David Rowley, Alexander Korotkov Discussion: https://www.postgresql.org/message-id/flat/56168952.4010101@postgrespro.ru
author: Teodor Sigaev <teodor@sigaev.ru> 2018-04-07 23:00:39 +0300
committer: Teodor Sigaev <teodor@sigaev.ru> 2018-04-07 23:00:39 +0300
commit: 8224de4f42ccf98e08db07b43d52fed72f962ebb (patch)
tree: 0c4aae878e522178def568fcd2dd274233780f88 /src/backend/access/nbtree/nbtxlog.c
parent: 01bb85169afadfe63e2f0e344ff671292080de7e (diff)
download: postgresql-8224de4f42ccf98e08db07b43d52fed72f962ebb.tar.gz
postgresql-8224de4f42ccf98e08db07b43d52fed72f962ebb.zip
1 files changed, 22 insertions, 12 deletions
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index b565bcb5401..0986ef07cf3 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -202,7 +202,7 @@ btree_xlog_insert(bool isleaf, bool ismeta, XLogReaderState *record)
 }
 
 static void
-btree_xlog_split(bool onleft, XLogReaderState *record)
+btree_xlog_split(bool onleft, bool lhighkey, XLogReaderState *record)
 {
 	XLogRecPtr	lsn = record->EndRecPtr;
 	xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
@@ -248,11 +248,14 @@ btree_xlog_split(bool onleft, XLogReaderState *record)
 
 	_bt_restore_page(rpage, datapos, datalen);
 
+	/* Non-leaf page should always have its high key logged. */
+	Assert(isleaf || lhighkey);
+
 	/*
-	 * On leaf level, the high key of the left page is equal to the first key
-	 * on the right page.
+	 * When the high key isn't present is the wal record, then we assume it to
+	 * be equal to the first key on the right page.
 	 */
-	if (isleaf)
+	if (!lhighkey)
 	{
 		ItemId		hiItemId = PageGetItemId(rpage, P_FIRSTDATAKEY(ropaque));
 
@@ -296,13 +299,14 @@ btree_xlog_split(bool onleft, XLogReaderState *record)
 		}
 
 		/* Extract left hikey and its size (assuming 16-bit alignment) */
-		if (!isleaf)
+		if (lhighkey)
 		{
 			left_hikey = (IndexTuple) datapos;
 			left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
 			datapos += left_hikeysz;
 			datalen -= left_hikeysz;
 		}
+
 		Assert(datalen == 0);
 
 		newlpage = PageGetTempPageCopySpecial(lpage);
@@ -616,7 +620,7 @@ btree_xlog_delete_get_latestRemovedXid(XLogReaderState *record)
 		 * heap_fetch, since it uses ReadBuffer rather than XLogReadBuffer.
 		 * Note that we are not looking at tuple data here, just headers.
 		 */
-		hoffnum = ItemPointerGetOffsetNumber(&(itup->t_tid));
+		hoffnum = ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid));
 		hitemid = PageGetItemId(hpage, hoffnum);
 
 		/*
@@ -764,11 +768,11 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
 		nextoffset = OffsetNumberNext(poffset);
 		itemid = PageGetItemId(page, nextoffset);
 		itup = (IndexTuple) PageGetItem(page, itemid);
-		rightsib = ItemPointerGetBlockNumber(&itup->t_tid);
+		rightsib = BTreeInnerTupleGetDownLink(itup);
 
 		itemid = PageGetItemId(page, poffset);
 		itup = (IndexTuple) PageGetItem(page, itemid);
-		ItemPointerSet(&(itup->t_tid), rightsib, P_HIKEY);
+		BTreeInnerTupleSetDownLink(itup, rightsib);
 		nextoffset = OffsetNumberNext(poffset);
 		PageIndexTupleDelete(page, nextoffset);
 
@@ -798,7 +802,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
 	MemSet(&trunctuple, 0, sizeof(IndexTupleData));
 	trunctuple.t_info = sizeof(IndexTupleData);
 	if (xlrec->topparent != InvalidBlockNumber)
-		ItemPointerSet(&trunctuple.t_tid, xlrec->topparent, P_HIKEY);
+		ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent);
 	else
 		ItemPointerSetInvalid(&trunctuple.t_tid);
 	if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
@@ -908,7 +912,7 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
 		MemSet(&trunctuple, 0, sizeof(IndexTupleData));
 		trunctuple.t_info = sizeof(IndexTupleData);
 		if (xlrec->topparent != InvalidBlockNumber)
-			ItemPointerSet(&trunctuple.t_tid, xlrec->topparent, P_HIKEY);
+			ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent);
 		else
 			ItemPointerSetInvalid(&trunctuple.t_tid);
 		if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY,
@@ -1004,10 +1008,16 @@ btree_redo(XLogReaderState *record)
 			btree_xlog_insert(false, true, record);
 			break;
 		case XLOG_BTREE_SPLIT_L:
-			btree_xlog_split(true, record);
+			btree_xlog_split(true, false, record);
+			break;
+		case XLOG_BTREE_SPLIT_L_HIGHKEY:
+			btree_xlog_split(true, true, record);
 			break;
 		case XLOG_BTREE_SPLIT_R:
-			btree_xlog_split(false, record);
+			btree_xlog_split(false, false, record);
+			break;
+		case XLOG_BTREE_SPLIT_R_HIGHKEY:
+			btree_xlog_split(false, true, record);
 			break;
 		case XLOG_BTREE_VACUUM:
 			btree_xlog_vacuum(record);
author	Teodor Sigaev <teodor@sigaev.ru>	2018-04-07 23:00:39 +0300
committer	Teodor Sigaev <teodor@sigaev.ru>	2018-04-07 23:00:39 +0300
commit	8224de4f42ccf98e08db07b43d52fed72f962ebb (patch)
tree	0c4aae878e522178def568fcd2dd274233780f88 /src/backend/access/nbtree/nbtxlog.c
parent	01bb85169afadfe63e2f0e344ff671292080de7e (diff)
download	postgresql-8224de4f42ccf98e08db07b43d52fed72f962ebb.tar.gz postgresql-8224de4f42ccf98e08db07b43d52fed72f962ebb.zip