diff options
author | Teodor Sigaev <teodor@sigaev.ru> | 2018-04-07 23:00:39 +0300 |
---|---|---|
committer | Teodor Sigaev <teodor@sigaev.ru> | 2018-04-07 23:00:39 +0300 |
commit | 8224de4f42ccf98e08db07b43d52fed72f962ebb (patch) | |
tree | 0c4aae878e522178def568fcd2dd274233780f88 /src/backend/access/nbtree/nbtxlog.c | |
parent | 01bb85169afadfe63e2f0e344ff671292080de7e (diff) | |
download | postgresql-8224de4f42ccf98e08db07b43d52fed72f962ebb.tar.gz postgresql-8224de4f42ccf98e08db07b43d52fed72f962ebb.zip |
Indexes with INCLUDE columns and their support in B-tree
This patch introduces INCLUDE clause to index definition. This clause
specifies a list of columns which will be included as a non-key part in
the index. The INCLUDE columns exist solely to allow more queries to
benefit from index-only scans. Also, such columns don't need to have
appropriate operator classes. Expressions are not supported as INCLUDE
columns since they cannot be used in index-only scans.
Index access methods supporting INCLUDE are indicated by amcaninclude flag
in IndexAmRoutine. For now, only B-tree indexes support INCLUDE clause.
In B-tree indexes INCLUDE columns are truncated from pivot index tuples
(tuples located in non-leaf pages and high keys). Therefore, B-tree indexes
now might have variable number of attributes. This patch also provides
generic facility to support that: pivot tuples contain number of their
attributes in t_tid.ip_posid. Free 13th bit of t_info is used for indicating
that. This facility will simplify further support of index suffix truncation.
The changes of above are backward-compatible, pg_upgrade doesn't need special
handling of B-tree indexes for that.
Bump catalog version
Author: Anastasia Lubennikova with contribition by Alexander Korotkov and me
Reviewed by: Peter Geoghegan, Tomas Vondra, Antonin Houska, Jeff Janes,
David Rowley, Alexander Korotkov
Discussion: https://www.postgresql.org/message-id/flat/56168952.4010101@postgrespro.ru
Diffstat (limited to 'src/backend/access/nbtree/nbtxlog.c')
-rw-r--r-- | src/backend/access/nbtree/nbtxlog.c | 34 |
1 files changed, 22 insertions, 12 deletions
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c index b565bcb5401..0986ef07cf3 100644 --- a/src/backend/access/nbtree/nbtxlog.c +++ b/src/backend/access/nbtree/nbtxlog.c @@ -202,7 +202,7 @@ btree_xlog_insert(bool isleaf, bool ismeta, XLogReaderState *record) } static void -btree_xlog_split(bool onleft, XLogReaderState *record) +btree_xlog_split(bool onleft, bool lhighkey, XLogReaderState *record) { XLogRecPtr lsn = record->EndRecPtr; xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record); @@ -248,11 +248,14 @@ btree_xlog_split(bool onleft, XLogReaderState *record) _bt_restore_page(rpage, datapos, datalen); + /* Non-leaf page should always have its high key logged. */ + Assert(isleaf || lhighkey); + /* - * On leaf level, the high key of the left page is equal to the first key - * on the right page. + * When the high key isn't present is the wal record, then we assume it to + * be equal to the first key on the right page. */ - if (isleaf) + if (!lhighkey) { ItemId hiItemId = PageGetItemId(rpage, P_FIRSTDATAKEY(ropaque)); @@ -296,13 +299,14 @@ btree_xlog_split(bool onleft, XLogReaderState *record) } /* Extract left hikey and its size (assuming 16-bit alignment) */ - if (!isleaf) + if (lhighkey) { left_hikey = (IndexTuple) datapos; left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey)); datapos += left_hikeysz; datalen -= left_hikeysz; } + Assert(datalen == 0); newlpage = PageGetTempPageCopySpecial(lpage); @@ -616,7 +620,7 @@ btree_xlog_delete_get_latestRemovedXid(XLogReaderState *record) * heap_fetch, since it uses ReadBuffer rather than XLogReadBuffer. * Note that we are not looking at tuple data here, just headers. */ - hoffnum = ItemPointerGetOffsetNumber(&(itup->t_tid)); + hoffnum = ItemPointerGetOffsetNumberNoCheck(&(itup->t_tid)); hitemid = PageGetItemId(hpage, hoffnum); /* @@ -764,11 +768,11 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record) nextoffset = OffsetNumberNext(poffset); itemid = PageGetItemId(page, nextoffset); itup = (IndexTuple) PageGetItem(page, itemid); - rightsib = ItemPointerGetBlockNumber(&itup->t_tid); + rightsib = BTreeInnerTupleGetDownLink(itup); itemid = PageGetItemId(page, poffset); itup = (IndexTuple) PageGetItem(page, itemid); - ItemPointerSet(&(itup->t_tid), rightsib, P_HIKEY); + BTreeInnerTupleSetDownLink(itup, rightsib); nextoffset = OffsetNumberNext(poffset); PageIndexTupleDelete(page, nextoffset); @@ -798,7 +802,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record) MemSet(&trunctuple, 0, sizeof(IndexTupleData)); trunctuple.t_info = sizeof(IndexTupleData); if (xlrec->topparent != InvalidBlockNumber) - ItemPointerSet(&trunctuple.t_tid, xlrec->topparent, P_HIKEY); + ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent); else ItemPointerSetInvalid(&trunctuple.t_tid); if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY, @@ -908,7 +912,7 @@ btree_xlog_unlink_page(uint8 info, XLogReaderState *record) MemSet(&trunctuple, 0, sizeof(IndexTupleData)); trunctuple.t_info = sizeof(IndexTupleData); if (xlrec->topparent != InvalidBlockNumber) - ItemPointerSet(&trunctuple.t_tid, xlrec->topparent, P_HIKEY); + ItemPointerSetBlockNumber(&trunctuple.t_tid, xlrec->topparent); else ItemPointerSetInvalid(&trunctuple.t_tid); if (PageAddItem(page, (Item) &trunctuple, sizeof(IndexTupleData), P_HIKEY, @@ -1004,10 +1008,16 @@ btree_redo(XLogReaderState *record) btree_xlog_insert(false, true, record); break; case XLOG_BTREE_SPLIT_L: - btree_xlog_split(true, record); + btree_xlog_split(true, false, record); + break; + case XLOG_BTREE_SPLIT_L_HIGHKEY: + btree_xlog_split(true, true, record); break; case XLOG_BTREE_SPLIT_R: - btree_xlog_split(false, record); + btree_xlog_split(false, false, record); + break; + case XLOG_BTREE_SPLIT_R_HIGHKEY: + btree_xlog_split(false, true, record); break; case XLOG_BTREE_VACUUM: btree_xlog_vacuum(record); |