aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/nbtree/nbtree.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/nbtree/nbtree.c')
-rw-r--r--src/backend/access/nbtree/nbtree.c516
1 files changed, 516 insertions, 0 deletions
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
new file mode 100644
index 00000000000..06016119964
--- /dev/null
+++ b/src/backend/access/nbtree/nbtree.c
@@ -0,0 +1,516 @@
+/*-------------------------------------------------------------------------
+ *
+ * btree.c--
+ * Implementation of Lehman and Yao's btree management algorithm for
+ * Postgres.
+ *
+ * Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $
+ *
+ * NOTES
+ * This file contains only the public interface routines.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "storage/bufmgr.h"
+#include "storage/bufpage.h"
+
+#include "utils/elog.h"
+#include "utils/palloc.h"
+#include "utils/rel.h"
+#include "utils/excid.h"
+
+#include "access/heapam.h"
+#include "access/genam.h"
+#include "access/sdir.h"
+#include "access/nbtree.h"
+#include "access/funcindex.h"
+
+#include "nodes/execnodes.h"
+#include "nodes/plannodes.h"
+
+#include "executor/executor.h"
+#include "executor/tuptable.h"
+
+#include "catalog/index.h"
+
+bool BuildingBtree = false;
+bool FastBuild = false; /* turn this on to make bulk builds work*/
+
+/*
+ * btbuild() -- build a new btree index.
+ *
+ * We use a global variable to record the fact that we're creating
+ * a new index. This is used to avoid high-concurrency locking,
+ * since the index won't be visible until this transaction commits
+ * and since building is guaranteed to be single-threaded.
+ */
+void
+btbuild(Relation heap,
+ Relation index,
+ int natts,
+ AttrNumber *attnum,
+ IndexStrategy istrat,
+ uint16 pcount,
+ Datum *params,
+ FuncIndexInfo *finfo,
+ PredInfo *predInfo)
+{
+ HeapScanDesc hscan;
+ Buffer buffer;
+ HeapTuple htup;
+ IndexTuple itup;
+ TupleDesc htupdesc, itupdesc;
+ Datum *attdata;
+ bool *nulls;
+ InsertIndexResult res;
+ int nhtups, nitups;
+ int i;
+ BTItem btitem;
+ ExprContext *econtext;
+ TupleTable tupleTable;
+ TupleTableSlot *slot;
+ Oid hrelid, irelid;
+ Node *pred, *oldPred;
+ void *spool;
+
+ /* note that this is a new btree */
+ BuildingBtree = true;
+
+ pred = predInfo->pred;
+ oldPred = predInfo->oldPred;
+
+ /* initialize the btree index metadata page (if this is a new index) */
+ if (oldPred == NULL)
+ _bt_metapinit(index);
+
+ /* get tuple descriptors for heap and index relations */
+ htupdesc = RelationGetTupleDescriptor(heap);
+ itupdesc = RelationGetTupleDescriptor(index);
+
+ /* get space for data items that'll appear in the index tuple */
+ attdata = (Datum *) palloc(natts * sizeof(Datum));
+ nulls = (bool *) palloc(natts * sizeof(bool));
+
+ /*
+ * If this is a predicate (partial) index, we will need to evaluate the
+ * predicate using ExecQual, which requires the current tuple to be in a
+ * slot of a TupleTable. In addition, ExecQual must have an ExprContext
+ * referring to that slot. Here, we initialize dummy TupleTable and
+ * ExprContext objects for this purpose. --Nels, Feb '92
+ */
+#ifndef OMIT_PARTIAL_INDEX
+ if (pred != NULL || oldPred != NULL) {
+ tupleTable = ExecCreateTupleTable(1);
+ slot = ExecAllocTableSlot(tupleTable);
+ econtext = makeNode(ExprContext);
+ FillDummyExprContext(econtext, slot, htupdesc, InvalidBuffer);
+ }
+#endif /* OMIT_PARTIAL_INDEX */
+
+ /* start a heap scan */
+ hscan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL);
+ htup = heap_getnext(hscan, 0, &buffer);
+
+ /* build the index */
+ nhtups = nitups = 0;
+
+ if (FastBuild) {
+ spool = _bt_spoolinit(index, 7);
+ res = (InsertIndexResult) NULL;
+ }
+
+ for (; HeapTupleIsValid(htup); htup = heap_getnext(hscan, 0, &buffer)) {
+
+ nhtups++;
+
+ /*
+ * If oldPred != NULL, this is an EXTEND INDEX command, so skip
+ * this tuple if it was already in the existing partial index
+ */
+ if (oldPred != NULL) {
+#ifndef OMIT_PARTIAL_INDEX
+
+ /*SetSlotContents(slot, htup);*/
+ slot->val = htup;
+ if (ExecQual((List*)oldPred, econtext) == true) {
+ nitups++;
+ continue;
+ }
+#endif /* OMIT_PARTIAL_INDEX */
+ }
+
+ /* Skip this tuple if it doesn't satisfy the partial-index predicate */
+ if (pred != NULL) {
+#ifndef OMIT_PARTIAL_INDEX
+ /* SetSlotContents(slot, htup); */
+ slot->val = htup;
+ if (ExecQual((List*)pred, econtext) == false)
+ continue;
+#endif /* OMIT_PARTIAL_INDEX */
+ }
+
+ nitups++;
+
+ /*
+ * For the current heap tuple, extract all the attributes
+ * we use in this index, and note which are null.
+ */
+
+ for (i = 1; i <= natts; i++) {
+ int attoff;
+ bool attnull;
+
+ /*
+ * Offsets are from the start of the tuple, and are
+ * zero-based; indices are one-based. The next call
+ * returns i - 1. That's data hiding for you.
+ */
+
+ attoff = AttrNumberGetAttrOffset(i);
+ attdata[attoff] = GetIndexValue(htup,
+ htupdesc,
+ attoff,
+ attnum,
+ finfo,
+ &attnull,
+ buffer);
+ nulls[attoff] = (attnull ? 'n' : ' ');
+ }
+
+ /* form an index tuple and point it at the heap tuple */
+ itup = index_formtuple(itupdesc, attdata, nulls);
+
+ /*
+ * If the single index key is null, we don't insert it into
+ * the index. Btrees support scans on <, <=, =, >=, and >.
+ * Relational algebra says that A op B (where op is one of the
+ * operators above) returns null if either A or B is null. This
+ * means that no qualification used in an index scan could ever
+ * return true on a null attribute. It also means that indices
+ * can't be used by ISNULL or NOTNULL scans, but that's an
+ * artifact of the strategy map architecture chosen in 1986, not
+ * of the way nulls are handled here.
+ */
+
+ if (itup->t_info & INDEX_NULL_MASK) {
+ pfree(itup);
+ continue;
+ }
+
+ itup->t_tid = htup->t_ctid;
+ btitem = _bt_formitem(itup);
+
+ /*
+ * if we are doing bottom-up btree build, we insert the index
+ * into a spool page for subsequent processing. otherwise, we
+ * insert into the btree.
+ */
+ if (FastBuild) {
+ _bt_spool(index, btitem, spool);
+ } else {
+ res = _bt_doinsert(index, btitem);
+ }
+
+ pfree(btitem);
+ pfree(itup);
+ if (res) {
+ pfree(res);
+ }
+ }
+
+ /* okay, all heap tuples are indexed */
+ heap_endscan(hscan);
+
+ if (pred != NULL || oldPred != NULL) {
+#ifndef OMIT_PARTIAL_INDEX
+ ExecDestroyTupleTable(tupleTable, true);
+ pfree(econtext);
+#endif /* OMIT_PARTIAL_INDEX */
+ }
+
+ /*
+ * if we are doing bottom-up btree build, we now have a bunch of
+ * sorted runs in the spool pages. finish the build by (1)
+ * merging the runs, (2) inserting the sorted tuples into btree
+ * pages and (3) building the upper levels.
+ */
+ if (FastBuild) {
+ _bt_spool(index, (BTItem) NULL, spool); /* flush spool */
+ _bt_leafbuild(index, spool);
+ _bt_spooldestroy(spool);
+ }
+
+ /*
+ * Since we just counted the tuples in the heap, we update its
+ * stats in pg_class to guarantee that the planner takes advantage
+ * of the index we just created. Finally, only update statistics
+ * during normal index definitions, not for indices on system catalogs
+ * created during bootstrap processing. We must close the relations
+ * before updatings statistics to guarantee that the relcache entries
+ * are flushed when we increment the command counter in UpdateStats().
+ */
+ if (IsNormalProcessingMode())
+ {
+ hrelid = heap->rd_id;
+ irelid = index->rd_id;
+ heap_close(heap);
+ index_close(index);
+ UpdateStats(hrelid, nhtups, true);
+ UpdateStats(irelid, nitups, false);
+ if (oldPred != NULL) {
+ if (nitups == nhtups) pred = NULL;
+ UpdateIndexPredicate(irelid, oldPred, pred);
+ }
+ }
+
+ /* be tidy */
+ pfree(nulls);
+ pfree(attdata);
+
+ /* all done */
+ BuildingBtree = false;
+}
+
+/*
+ * btinsert() -- insert an index tuple into a btree.
+ *
+ * Descend the tree recursively, find the appropriate location for our
+ * new tuple, put it there, set its unique OID as appropriate, and
+ * return an InsertIndexResult to the caller.
+ */
+InsertIndexResult
+btinsert(Relation rel, IndexTuple itup)
+{
+ BTItem btitem;
+ InsertIndexResult res;
+
+ if (itup->t_info & INDEX_NULL_MASK)
+ return ((InsertIndexResult) NULL);
+
+ btitem = _bt_formitem(itup);
+
+ res = _bt_doinsert(rel, btitem);
+ pfree(btitem);
+
+ return (res);
+}
+
+/*
+ * btgettuple() -- Get the next tuple in the scan.
+ */
+char *
+btgettuple(IndexScanDesc scan, ScanDirection dir)
+{
+ RetrieveIndexResult res;
+
+ /*
+ * If we've already initialized this scan, we can just advance it
+ * in the appropriate direction. If we haven't done so yet, we
+ * call a routine to get the first item in the scan.
+ */
+
+ if (ItemPointerIsValid(&(scan->currentItemData)))
+ res = _bt_next(scan, dir);
+ else
+ res = _bt_first(scan, dir);
+
+ return ((char *) res);
+}
+
+/*
+ * btbeginscan() -- start a scan on a btree index
+ */
+char *
+btbeginscan(Relation rel, bool fromEnd, uint16 keysz, ScanKey scankey)
+{
+ IndexScanDesc scan;
+ StrategyNumber strat;
+ BTScanOpaque so;
+
+ /* first order the keys in the qualification */
+ if (keysz > 1)
+ _bt_orderkeys(rel, &keysz, scankey);
+
+ /* now get the scan */
+ scan = RelationGetIndexScan(rel, fromEnd, keysz, scankey);
+ so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData));
+ so->btso_curbuf = so->btso_mrkbuf = InvalidBuffer;
+ scan->opaque = so;
+
+ /* finally, be sure that the scan exploits the tree order */
+ scan->scanFromEnd = false;
+ scan->flags = 0x0;
+ if (keysz > 0) {
+ strat = _bt_getstrat(scan->relation, 1 /* XXX */,
+ scankey[0].sk_procedure);
+
+ if (strat == BTLessStrategyNumber
+ || strat == BTLessEqualStrategyNumber)
+ scan->scanFromEnd = true;
+ } else {
+ scan->scanFromEnd = true;
+ }
+
+ /* register scan in case we change pages it's using */
+ _bt_regscan(scan);
+
+ return ((char *) scan);
+}
+
+/*
+ * btrescan() -- rescan an index relation
+ */
+void
+btrescan(IndexScanDesc scan, bool fromEnd, ScanKey scankey)
+{
+ ItemPointer iptr;
+ BTScanOpaque so;
+
+ so = (BTScanOpaque) scan->opaque;
+
+ /* we hold a read lock on the current page in the scan */
+ if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
+ _bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
+ so->btso_curbuf = InvalidBuffer;
+ ItemPointerSetInvalid(iptr);
+ }
+
+ /* and we hold a read lock on the last marked item in the scan */
+ if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
+ _bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ);
+ so->btso_mrkbuf = InvalidBuffer;
+ ItemPointerSetInvalid(iptr);
+ }
+
+ /* reset the scan key */
+ if (scan->numberOfKeys > 0) {
+ memmove(scan->keyData,
+ scankey,
+ scan->numberOfKeys * sizeof(ScanKeyData));
+ }
+}
+
+void
+btmovescan(IndexScanDesc scan, Datum v)
+{
+ ItemPointer iptr;
+ BTScanOpaque so;
+
+ so = (BTScanOpaque) scan->opaque;
+
+ /* release any locks we still hold */
+ if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
+ _bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
+ so->btso_curbuf = InvalidBuffer;
+ ItemPointerSetInvalid(iptr);
+ }
+
+ scan->keyData[0].sk_argument = v;
+}
+
+/*
+ * btendscan() -- close down a scan
+ */
+void
+btendscan(IndexScanDesc scan)
+{
+ ItemPointer iptr;
+ BTScanOpaque so;
+
+ so = (BTScanOpaque) scan->opaque;
+
+ /* release any locks we still hold */
+ if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
+ if (BufferIsValid(so->btso_curbuf))
+ _bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
+ so->btso_curbuf = InvalidBuffer;
+ ItemPointerSetInvalid(iptr);
+ }
+
+ if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
+ if (BufferIsValid(so->btso_mrkbuf))
+ _bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ);
+ so->btso_mrkbuf = InvalidBuffer;
+ ItemPointerSetInvalid(iptr);
+ }
+
+ /* don't need scan registered anymore */
+ _bt_dropscan(scan);
+
+ /* be tidy */
+#ifdef PERFECT_MMGR
+ pfree (scan->opaque);
+#endif /* PERFECT_MMGR */
+}
+
+/*
+ * btmarkpos() -- save current scan position
+ */
+void
+btmarkpos(IndexScanDesc scan)
+{
+ ItemPointer iptr;
+ BTScanOpaque so;
+
+ so = (BTScanOpaque) scan->opaque;
+
+ /* release lock on old marked data, if any */
+ if (ItemPointerIsValid(iptr = &(scan->currentMarkData))) {
+ _bt_relbuf(scan->relation, so->btso_mrkbuf, BT_READ);
+ so->btso_mrkbuf = InvalidBuffer;
+ ItemPointerSetInvalid(iptr);
+ }
+
+ /* bump lock on currentItemData and copy to currentMarkData */
+ if (ItemPointerIsValid(&(scan->currentItemData))) {
+ so->btso_mrkbuf = _bt_getbuf(scan->relation,
+ BufferGetBlockNumber(so->btso_curbuf),
+ BT_READ);
+ scan->currentMarkData = scan->currentItemData;
+ }
+}
+
+/*
+ * btrestrpos() -- restore scan to last saved position
+ */
+void
+btrestrpos(IndexScanDesc scan)
+{
+ ItemPointer iptr;
+ BTScanOpaque so;
+
+ so = (BTScanOpaque) scan->opaque;
+
+ /* release lock on current data, if any */
+ if (ItemPointerIsValid(iptr = &(scan->currentItemData))) {
+ _bt_relbuf(scan->relation, so->btso_curbuf, BT_READ);
+ so->btso_curbuf = InvalidBuffer;
+ ItemPointerSetInvalid(iptr);
+ }
+
+ /* bump lock on currentMarkData and copy to currentItemData */
+ if (ItemPointerIsValid(&(scan->currentMarkData))) {
+ so->btso_curbuf = _bt_getbuf(scan->relation,
+ BufferGetBlockNumber(so->btso_mrkbuf),
+ BT_READ);
+
+ scan->currentItemData = scan->currentMarkData;
+ }
+}
+
+/* stubs */
+void
+btdelete(Relation rel, ItemPointer tid)
+{
+ /* adjust any active scans that will be affected by this deletion */
+ _bt_adjscans(rel, tid);
+
+ /* delete the data from the page */
+ _bt_pagedel(rel, tid);
+}