diff options
Diffstat (limited to 'src/backend/access/rtree')
-rw-r--r-- | src/backend/access/rtree/Makefile.inc | 14 | ||||
-rw-r--r-- | src/backend/access/rtree/rtget.c | 320 | ||||
-rw-r--r-- | src/backend/access/rtree/rtproc.c | 150 | ||||
-rw-r--r-- | src/backend/access/rtree/rtree.c | 955 | ||||
-rw-r--r-- | src/backend/access/rtree/rtscan.c | 392 | ||||
-rw-r--r-- | src/backend/access/rtree/rtstrat.c | 239 |
6 files changed, 2070 insertions, 0 deletions
diff --git a/src/backend/access/rtree/Makefile.inc b/src/backend/access/rtree/Makefile.inc new file mode 100644 index 00000000000..a93a5e53290 --- /dev/null +++ b/src/backend/access/rtree/Makefile.inc @@ -0,0 +1,14 @@ +#------------------------------------------------------------------------- +# +# Makefile.inc-- +# Makefile for access/rtree (R-Tree access method) +# +# Copyright (c) 1994, Regents of the University of California +# +# +# IDENTIFICATION +# $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/Makefile.inc,v 1.1.1.1 1996/07/09 06:21:12 scrappy Exp $ +# +#------------------------------------------------------------------------- + +SUBSRCS+= rtget.c rtproc.c rtree.c rtscan.c rtstrat.c diff --git a/src/backend/access/rtree/rtget.c b/src/backend/access/rtree/rtget.c new file mode 100644 index 00000000000..fb2e169297d --- /dev/null +++ b/src/backend/access/rtree/rtget.c @@ -0,0 +1,320 @@ +/*------------------------------------------------------------------------- + * + * rtget.c-- + * fetch tuples from an rtree scan. + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtget.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "storage/bufmgr.h" +#include "storage/bufpage.h" + +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/rel.h" + +#include "access/heapam.h" +#include "access/genam.h" +#include "access/iqual.h" +#include "access/rtree.h" +#include "access/sdir.h" + +static OffsetNumber findnext(IndexScanDesc s, Page p, OffsetNumber n, + ScanDirection dir); +static RetrieveIndexResult rtscancache(IndexScanDesc s, ScanDirection dir); +static RetrieveIndexResult rtfirst(IndexScanDesc s, ScanDirection dir); +static RetrieveIndexResult rtnext(IndexScanDesc s, ScanDirection dir); +static ItemPointer rtheapptr(Relation r, ItemPointer itemp); + + +RetrieveIndexResult +rtgettuple(IndexScanDesc s, ScanDirection dir) +{ + RetrieveIndexResult res; + + /* if we have it cached in the scan desc, just return the value */ + if ((res = rtscancache(s, dir)) != (RetrieveIndexResult) NULL) + return (res); + + /* not cached, so we'll have to do some work */ + if (ItemPointerIsValid(&(s->currentItemData))) { + res = rtnext(s, dir); + } else { + res = rtfirst(s, dir); + } + return (res); +} + +static RetrieveIndexResult +rtfirst(IndexScanDesc s, ScanDirection dir) +{ + Buffer b; + Page p; + OffsetNumber n; + OffsetNumber maxoff; + RetrieveIndexResult res; + RTreePageOpaque po; + RTreeScanOpaque so; + RTSTACK *stk; + BlockNumber blk; + IndexTuple it; + ItemPointer ip; + + b = ReadBuffer(s->relation, P_ROOT); + p = BufferGetPage(b); + po = (RTreePageOpaque) PageGetSpecialPointer(p); + so = (RTreeScanOpaque) s->opaque; + + for (;;) { + maxoff = PageGetMaxOffsetNumber(p); + if (ScanDirectionIsBackward(dir)) + n = findnext(s, p, maxoff, dir); + else + n = findnext(s, p, FirstOffsetNumber, dir); + + while (n < FirstOffsetNumber || n > maxoff) { + + ReleaseBuffer(b); + if (so->s_stack == (RTSTACK *) NULL) + return ((RetrieveIndexResult) NULL); + + stk = so->s_stack; + b = ReadBuffer(s->relation, stk->rts_blk); + p = BufferGetPage(b); + po = (RTreePageOpaque) PageGetSpecialPointer(p); + maxoff = PageGetMaxOffsetNumber(p); + + if (ScanDirectionIsBackward(dir)) { + n = OffsetNumberPrev(stk->rts_child); + } else { + n = OffsetNumberNext(stk->rts_child); + } + so->s_stack = stk->rts_parent; + pfree(stk); + + n = findnext(s, p, n, dir); + } + if (po->flags & F_LEAF) { + ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n); + + it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); + ip = (ItemPointer) palloc(sizeof(ItemPointerData)); + memmove((char *) ip, (char *) &(it->t_tid), + sizeof(ItemPointerData)); + ReleaseBuffer(b); + + res = FormRetrieveIndexResult(&(s->currentItemData), ip); + + return (res); + } else { + stk = (RTSTACK *) palloc(sizeof(RTSTACK)); + stk->rts_child = n; + stk->rts_blk = BufferGetBlockNumber(b); + stk->rts_parent = so->s_stack; + so->s_stack = stk; + + it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); + blk = ItemPointerGetBlockNumber(&(it->t_tid)); + + ReleaseBuffer(b); + b = ReadBuffer(s->relation, blk); + p = BufferGetPage(b); + po = (RTreePageOpaque) PageGetSpecialPointer(p); + } + } +} + +static RetrieveIndexResult +rtnext(IndexScanDesc s, ScanDirection dir) +{ + Buffer b; + Page p; + OffsetNumber n; + OffsetNumber maxoff; + RetrieveIndexResult res; + RTreePageOpaque po; + RTreeScanOpaque so; + RTSTACK *stk; + BlockNumber blk; + IndexTuple it; + ItemPointer ip; + + blk = ItemPointerGetBlockNumber(&(s->currentItemData)); + n = ItemPointerGetOffsetNumber(&(s->currentItemData)); + + if (ScanDirectionIsForward(dir)) { + n = OffsetNumberNext(n); + } else { + n = OffsetNumberPrev(n); + } + + b = ReadBuffer(s->relation, blk); + p = BufferGetPage(b); + po = (RTreePageOpaque) PageGetSpecialPointer(p); + so = (RTreeScanOpaque) s->opaque; + + for (;;) { + maxoff = PageGetMaxOffsetNumber(p); + n = findnext(s, p, n, dir); + + while (n < FirstOffsetNumber || n > maxoff) { + + ReleaseBuffer(b); + if (so->s_stack == (RTSTACK *) NULL) + return ((RetrieveIndexResult) NULL); + + stk = so->s_stack; + b = ReadBuffer(s->relation, stk->rts_blk); + p = BufferGetPage(b); + maxoff = PageGetMaxOffsetNumber(p); + po = (RTreePageOpaque) PageGetSpecialPointer(p); + + if (ScanDirectionIsBackward(dir)) { + n = OffsetNumberPrev(stk->rts_child); + } else { + n = OffsetNumberNext(stk->rts_child); + } + so->s_stack = stk->rts_parent; + pfree(stk); + + n = findnext(s, p, n, dir); + } + if (po->flags & F_LEAF) { + ItemPointerSet(&(s->currentItemData), BufferGetBlockNumber(b), n); + + it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); + ip = (ItemPointer) palloc(sizeof(ItemPointerData)); + memmove((char *) ip, (char *) &(it->t_tid), + sizeof(ItemPointerData)); + ReleaseBuffer(b); + + res = FormRetrieveIndexResult(&(s->currentItemData), ip); + + return (res); + } else { + stk = (RTSTACK *) palloc(sizeof(RTSTACK)); + stk->rts_child = n; + stk->rts_blk = BufferGetBlockNumber(b); + stk->rts_parent = so->s_stack; + so->s_stack = stk; + + it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); + blk = ItemPointerGetBlockNumber(&(it->t_tid)); + + ReleaseBuffer(b); + b = ReadBuffer(s->relation, blk); + p = BufferGetPage(b); + po = (RTreePageOpaque) PageGetSpecialPointer(p); + + if (ScanDirectionIsBackward(dir)) { + n = PageGetMaxOffsetNumber(p); + } else { + n = FirstOffsetNumber; + } + } + } +} + +static OffsetNumber +findnext(IndexScanDesc s, Page p, OffsetNumber n, ScanDirection dir) +{ + OffsetNumber maxoff; + IndexTuple it; + RTreePageOpaque po; + RTreeScanOpaque so; + + maxoff = PageGetMaxOffsetNumber(p); + po = (RTreePageOpaque) PageGetSpecialPointer(p); + so = (RTreeScanOpaque) s->opaque; + + /* + * If we modified the index during the scan, we may have a pointer to + * a ghost tuple, before the scan. If this is the case, back up one. + */ + + if (so->s_flags & RTS_CURBEFORE) { + so->s_flags &= ~RTS_CURBEFORE; + n = OffsetNumberPrev(n); + } + + while (n >= FirstOffsetNumber && n <= maxoff) { + it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); + if (po->flags & F_LEAF) { + if (index_keytest(it, + RelationGetTupleDescriptor(s->relation), + s->numberOfKeys, s->keyData)) + break; + } else { + if (index_keytest(it, + RelationGetTupleDescriptor(s->relation), + so->s_internalNKey, so->s_internalKey)) + break; + } + + if (ScanDirectionIsBackward(dir)) { + n = OffsetNumberPrev(n); + } else { + n = OffsetNumberNext(n); + } + } + + return (n); +} + +static RetrieveIndexResult +rtscancache(IndexScanDesc s, ScanDirection dir) +{ + RetrieveIndexResult res; + ItemPointer ip; + + if (!(ScanDirectionIsNoMovement(dir) + && ItemPointerIsValid(&(s->currentItemData)))) { + + return ((RetrieveIndexResult) NULL); + } + + ip = rtheapptr(s->relation, &(s->currentItemData)); + + if (ItemPointerIsValid(ip)) + res = FormRetrieveIndexResult(&(s->currentItemData), ip); + else + res = (RetrieveIndexResult) NULL; + + return (res); +} + +/* + * rtheapptr returns the item pointer to the tuple in the heap relation + * for which itemp is the index relation item pointer. + */ +static ItemPointer +rtheapptr(Relation r, ItemPointer itemp) +{ + Buffer b; + Page p; + IndexTuple it; + ItemPointer ip; + OffsetNumber n; + + ip = (ItemPointer) palloc(sizeof(ItemPointerData)); + if (ItemPointerIsValid(itemp)) { + b = ReadBuffer(r, ItemPointerGetBlockNumber(itemp)); + p = BufferGetPage(b); + n = ItemPointerGetOffsetNumber(itemp); + it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); + memmove((char *) ip, (char *) &(it->t_tid), + sizeof(ItemPointerData)); + ReleaseBuffer(b); + } else { + ItemPointerSetInvalid(ip); + } + + return (ip); +} diff --git a/src/backend/access/rtree/rtproc.c b/src/backend/access/rtree/rtproc.c new file mode 100644 index 00000000000..a2f7bef46b4 --- /dev/null +++ b/src/backend/access/rtree/rtproc.c @@ -0,0 +1,150 @@ +/*------------------------------------------------------------------------- + * + * rtproc.c-- + * pg_amproc entries for rtrees. + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtproc.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include <math.h> +#include <string.h> + +#include "postgres.h" + +#include "utils/elog.h" +#include "utils/geo-decls.h" +#include "utils/palloc.h" + +BOX +*rt_box_union(BOX *a, BOX *b) +{ + BOX *n; + + if ((n = (BOX *) palloc(sizeof (*n))) == (BOX *) NULL) + elog(WARN, "Cannot allocate box for union"); + + n->xh = Max(a->xh, b->xh); + n->yh = Max(a->yh, b->yh); + n->xl = Min(a->xl, b->xl); + n->yl = Min(a->yl, b->yl); + + return (n); +} + +BOX * +rt_box_inter(BOX *a, BOX *b) +{ + BOX *n; + + if ((n = (BOX *) palloc(sizeof (*n))) == (BOX *) NULL) + elog(WARN, "Cannot allocate box for union"); + + n->xh = Min(a->xh, b->xh); + n->yh = Min(a->yh, b->yh); + n->xl = Max(a->xl, b->xl); + n->yl = Max(a->yl, b->yl); + + if (n->xh < n->xl || n->yh < n->yl) { + pfree(n); + return ((BOX *) NULL); + } + + return (n); +} + +void +rt_box_size(BOX *a, float *size) +{ + if (a == (BOX *) NULL || a->xh <= a->xl || a->yh <= a->yl) + *size = 0.0; + else + *size = (float) ((a->xh - a->xl) * (a->yh - a->yl)); + + return; +} + +/* + * rt_bigbox_size() -- Compute a size for big boxes. + * + * In an earlier release of the system, this routine did something + * different from rt_box_size. We now use floats, rather than ints, + * as the return type for the size routine, so we no longer need to + * have a special return type for big boxes. + */ +void +rt_bigbox_size(BOX *a, float *size) +{ + rt_box_size(a, size); +} + +POLYGON * +rt_poly_union(POLYGON *a, POLYGON *b) +{ + POLYGON *p; + + p = (POLYGON *)PALLOCTYPE(POLYGON); + + if (!PointerIsValid(p)) + elog(WARN, "Cannot allocate polygon for union"); + + memset((char *) p, 0, sizeof(POLYGON)); /* zero any holes */ + p->size = sizeof(POLYGON); + p->npts = 0; + p->boundbox.xh = Max(a->boundbox.xh, b->boundbox.xh); + p->boundbox.yh = Max(a->boundbox.yh, b->boundbox.yh); + p->boundbox.xl = Min(a->boundbox.xl, b->boundbox.xl); + p->boundbox.yl = Min(a->boundbox.yl, b->boundbox.yl); + return p; +} + +void +rt_poly_size(POLYGON *a, float *size) +{ + double xdim, ydim; + + size = (float *) palloc(sizeof(float)); + if (a == (POLYGON *) NULL || + a->boundbox.xh <= a->boundbox.xl || + a->boundbox.yh <= a->boundbox.yl) + *size = 0.0; + else { + xdim = (a->boundbox.xh - a->boundbox.xl); + ydim = (a->boundbox.yh - a->boundbox.yl); + + *size = (float) (xdim * ydim); + } + + return; +} + +POLYGON * +rt_poly_inter(POLYGON *a, POLYGON *b) +{ + POLYGON *p; + + p = (POLYGON *) PALLOCTYPE(POLYGON); + + if (!PointerIsValid(p)) + elog(WARN, "Cannot allocate polygon for intersection"); + + memset((char *) p, 0, sizeof(POLYGON)); /* zero any holes */ + p->size = sizeof(POLYGON); + p->npts = 0; + p->boundbox.xh = Min(a->boundbox.xh, b->boundbox.xh); + p->boundbox.yh = Min(a->boundbox.yh, b->boundbox.yh); + p->boundbox.xl = Max(a->boundbox.xl, b->boundbox.xl); + p->boundbox.yl = Max(a->boundbox.yl, b->boundbox.yl); + + if (p->boundbox.xh < p->boundbox.xl || p->boundbox.yh < p->boundbox.yl) + { + pfree(p); + return ((POLYGON *) NULL); + } + + return (p); +} diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c new file mode 100644 index 00000000000..96efc3bc90b --- /dev/null +++ b/src/backend/access/rtree/rtree.c @@ -0,0 +1,955 @@ +/*------------------------------------------------------------------------- + * + * rtree.c-- + * interface routines for the postgres rtree indexed access method. + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "storage/bufmgr.h" +#include "storage/bufpage.h" + +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/rel.h" +#include "utils/excid.h" + +#include "access/heapam.h" +#include "access/genam.h" +#include "access/rtree.h" +#include "access/rtscan.h" +#include "access/funcindex.h" +#include "access/tupdesc.h" + +#include "nodes/execnodes.h" +#include "nodes/plannodes.h" + +#include "executor/executor.h" +#include "executor/tuptable.h" + +#include "catalog/index.h" + +typedef struct SPLITVEC { + OffsetNumber *spl_left; + int spl_nleft; + char *spl_ldatum; + OffsetNumber *spl_right; + int spl_nright; + char *spl_rdatum; +} SPLITVEC; + +typedef struct RTSTATE { + func_ptr unionFn; /* union function */ + func_ptr sizeFn; /* size function */ + func_ptr interFn; /* intersection function */ +} RTSTATE; + +/* non-export function prototypes */ +static InsertIndexResult rtdoinsert(Relation r, IndexTuple itup, + RTSTATE *rtstate); +static void rttighten(Relation r, RTSTACK *stk, char *datum, int att_size, + RTSTATE *rtstate); +static InsertIndexResult dosplit(Relation r, Buffer buffer, RTSTACK *stack, + IndexTuple itup, RTSTATE *rtstate); +static void rtintinsert(Relation r, RTSTACK *stk, IndexTuple ltup, + IndexTuple rtup, RTSTATE *rtstate); +static void rtnewroot(Relation r, IndexTuple lt, IndexTuple rt); +static void picksplit(Relation r, Page page, SPLITVEC *v, IndexTuple itup, + RTSTATE *rtstate); +static void RTInitBuffer(Buffer b, uint32 f); +static OffsetNumber choose(Relation r, Page p, IndexTuple it, + RTSTATE *rtstate); +static int nospace(Page p, IndexTuple it); +static void initRtstate(RTSTATE *rtstate, Relation index); + + +void +rtbuild(Relation heap, + Relation index, + int natts, + AttrNumber *attnum, + IndexStrategy istrat, + uint16 pcount, + Datum *params, + FuncIndexInfo *finfo, + PredInfo *predInfo) +{ + HeapScanDesc scan; + Buffer buffer; + AttrNumber i; + HeapTuple htup; + IndexTuple itup; + TupleDesc hd, id; + InsertIndexResult res; + Datum *d; + bool *nulls; + int nb, nh, ni; + ExprContext *econtext; + TupleTable tupleTable; + TupleTableSlot *slot; + Oid hrelid, irelid; + Node *pred, *oldPred; + RTSTATE rtState; + + initRtstate(&rtState, index); + + /* rtrees only know how to do stupid locking now */ + RelationSetLockForWrite(index); + + pred = predInfo->pred; + oldPred = predInfo->oldPred; + + /* + * We expect to be called exactly once for any index relation. + * If that's not the case, big trouble's what we have. + */ + + if (oldPred == NULL && (nb = RelationGetNumberOfBlocks(index)) != 0) + elog(WARN, "%s already contains data", index->rd_rel->relname.data); + + /* initialize the root page (if this is a new index) */ + if (oldPred == NULL) { + buffer = ReadBuffer(index, P_NEW); + RTInitBuffer(buffer, F_LEAF); + WriteBuffer(buffer); + } + + /* init the tuple descriptors and get set for a heap scan */ + hd = RelationGetTupleDescriptor(heap); + id = RelationGetTupleDescriptor(index); + d = (Datum *)palloc(natts * sizeof (*d)); + nulls = (bool *)palloc(natts * sizeof (*nulls)); + + /* + * If this is a predicate (partial) index, we will need to evaluate the + * predicate using ExecQual, which requires the current tuple to be in a + * slot of a TupleTable. In addition, ExecQual must have an ExprContext + * referring to that slot. Here, we initialize dummy TupleTable and + * ExprContext objects for this purpose. --Nels, Feb '92 + */ +#ifndef OMIT_PARTIAL_INDEX + if (pred != NULL || oldPred != NULL) { + tupleTable = ExecCreateTupleTable(1); + slot = ExecAllocTableSlot(tupleTable); + econtext = makeNode(ExprContext); + FillDummyExprContext(econtext, slot, hd, buffer); + } +#endif /* OMIT_PARTIAL_INDEX */ + scan = heap_beginscan(heap, 0, NowTimeQual, 0, (ScanKey) NULL); + htup = heap_getnext(scan, 0, &buffer); + + /* count the tuples as we insert them */ + nh = ni = 0; + + for (; HeapTupleIsValid(htup); htup = heap_getnext(scan, 0, &buffer)) { + + nh++; + + /* + * If oldPred != NULL, this is an EXTEND INDEX command, so skip + * this tuple if it was already in the existing partial index + */ + if (oldPred != NULL) { +#ifndef OMIT_PARTIAL_INDEX + /*SetSlotContents(slot, htup); */ + slot->val = htup; + if (ExecQual((List*)oldPred, econtext) == true) { + ni++; + continue; + } +#endif /* OMIT_PARTIAL_INDEX */ + } + + /* Skip this tuple if it doesn't satisfy the partial-index predicate */ + if (pred != NULL) { +#ifndef OMIT_PARTIAL_INDEX + /*SetSlotContents(slot, htup); */ + slot->val = htup; + if (ExecQual((List*)pred, econtext) == false) + continue; +#endif /* OMIT_PARTIAL_INDEX */ + } + + ni++; + + /* + * For the current heap tuple, extract all the attributes + * we use in this index, and note which are null. + */ + + for (i = 1; i <= natts; i++) { + int attoff; + bool attnull; + + /* + * Offsets are from the start of the tuple, and are + * zero-based; indices are one-based. The next call + * returns i - 1. That's data hiding for you. + */ + + attoff = AttrNumberGetAttrOffset(i); + /* + d[attoff] = HeapTupleGetAttributeValue(htup, buffer, + */ + d[attoff] = GetIndexValue(htup, + hd, + attoff, + attnum, + finfo, + &attnull, + buffer); + nulls[attoff] = (attnull ? 'n' : ' '); + } + + /* form an index tuple and point it at the heap tuple */ + itup = index_formtuple(id, &d[0], nulls); + itup->t_tid = htup->t_ctid; + + /* + * Since we already have the index relation locked, we + * call rtdoinsert directly. Normal access method calls + * dispatch through rtinsert, which locks the relation + * for write. This is the right thing to do if you're + * inserting single tups, but not when you're initializing + * the whole index at once. + */ + + res = rtdoinsert(index, itup, &rtState); + pfree(itup); + pfree(res); + } + + /* okay, all heap tuples are indexed */ + heap_endscan(scan); + RelationUnsetLockForWrite(index); + + if (pred != NULL || oldPred != NULL) { +#ifndef OMIT_PARTIAL_INDEX + ExecDestroyTupleTable(tupleTable, true); + pfree(econtext); +#endif /* OMIT_PARTIAL_INDEX */ + } + + /* + * Since we just counted the tuples in the heap, we update its + * stats in pg_relation to guarantee that the planner takes + * advantage of the index we just created. UpdateStats() does a + * CommandCounterIncrement(), which flushes changed entries from + * the system relcache. The act of constructing an index changes + * these heap and index tuples in the system catalogs, so they + * need to be flushed. We close them to guarantee that they + * will be. + */ + + hrelid = heap->rd_id; + irelid = index->rd_id; + heap_close(heap); + index_close(index); + + UpdateStats(hrelid, nh, true); + UpdateStats(irelid, ni, false); + + if (oldPred != NULL) { + if (ni == nh) pred = NULL; + UpdateIndexPredicate(irelid, oldPred, pred); + } + + /* be tidy */ + pfree(nulls); + pfree(d); +} + +/* + * rtinsert -- wrapper for rtree tuple insertion. + * + * This is the public interface routine for tuple insertion in rtrees. + * It doesn't do any work; just locks the relation and passes the buck. + */ +InsertIndexResult +rtinsert(Relation r, IndexTuple itup) +{ + InsertIndexResult res; + RTSTATE rtState; + + initRtstate(&rtState, r); + + RelationSetLockForWrite(r); + res = rtdoinsert(r, itup, &rtState); + + /* XXX two-phase locking -- don't unlock the relation until EOT */ + return (res); +} + +static InsertIndexResult +rtdoinsert(Relation r, IndexTuple itup, RTSTATE *rtstate) +{ + Page page; + Buffer buffer; + BlockNumber blk; + IndexTuple which; + OffsetNumber l; + RTSTACK *stack; + InsertIndexResult res; + RTreePageOpaque opaque; + char *datum; + + blk = P_ROOT; + buffer = InvalidBuffer; + stack = (RTSTACK *) NULL; + + do { + /* let go of current buffer before getting next */ + if (buffer != InvalidBuffer) + ReleaseBuffer(buffer); + + /* get next buffer */ + buffer = ReadBuffer(r, blk); + page = (Page) BufferGetPage(buffer); + + opaque = (RTreePageOpaque) PageGetSpecialPointer(page); + if (!(opaque->flags & F_LEAF)) { + RTSTACK *n; + ItemId iid; + + n = (RTSTACK *) palloc(sizeof(RTSTACK)); + n->rts_parent = stack; + n->rts_blk = blk; + n->rts_child = choose(r, page, itup, rtstate); + stack = n; + + iid = PageGetItemId(page, n->rts_child); + which = (IndexTuple) PageGetItem(page, iid); + blk = ItemPointerGetBlockNumber(&(which->t_tid)); + } + } while (!(opaque->flags & F_LEAF)); + + if (nospace(page, itup)) { + /* need to do a split */ + res = dosplit(r, buffer, stack, itup, rtstate); + freestack(stack); + WriteBuffer(buffer); /* don't forget to release buffer! */ + return (res); + } + + /* add the item and write the buffer */ + if (PageIsEmpty(page)) { + l = PageAddItem(page, (Item) itup, IndexTupleSize(itup), + FirstOffsetNumber, + LP_USED); + } else { + l = PageAddItem(page, (Item) itup, IndexTupleSize(itup), + OffsetNumberNext(PageGetMaxOffsetNumber(page)), + LP_USED); + } + + WriteBuffer(buffer); + + datum = (((char *) itup) + sizeof(IndexTupleData)); + + /* now expand the page boundary in the parent to include the new child */ + rttighten(r, stack, datum, + (IndexTupleSize(itup) - sizeof(IndexTupleData)), rtstate); + freestack(stack); + + /* build and return an InsertIndexResult for this insertion */ + res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); + ItemPointerSet(&(res->pointerData), blk, l); + + return (res); +} + +static void +rttighten(Relation r, + RTSTACK *stk, + char *datum, + int att_size, + RTSTATE *rtstate) +{ + char *oldud; + char *tdatum; + Page p; + float old_size, newd_size; + Buffer b; + + if (stk == (RTSTACK *) NULL) + return; + + b = ReadBuffer(r, stk->rts_blk); + p = BufferGetPage(b); + + oldud = (char *) PageGetItem(p, PageGetItemId(p, stk->rts_child)); + oldud += sizeof(IndexTupleData); + + (*rtstate->sizeFn)(oldud, &old_size); + datum = (char *) (*rtstate->unionFn)(oldud, datum); + + (*rtstate->sizeFn)(datum, &newd_size); + + if (newd_size != old_size) { + TupleDesc td = RelationGetTupleDescriptor(r); + + if (td->attrs[0]->attlen < 0) { + /* + * This is an internal page, so 'oldud' had better be a + * union (constant-length) key, too. (See comment below.) + */ + Assert(VARSIZE(datum) == VARSIZE(oldud)); + memmove(oldud, datum, VARSIZE(datum)); + } else { + memmove(oldud, datum, att_size); + } + WriteBuffer(b); + + /* + * The user may be defining an index on variable-sized data (like + * polygons). If so, we need to get a constant-sized datum for + * insertion on the internal page. We do this by calling the union + * proc, which is guaranteed to return a rectangle. + */ + + tdatum = (char *) (*rtstate->unionFn)(datum, datum); + rttighten(r, stk->rts_parent, tdatum, att_size, rtstate); + pfree(tdatum); + } else { + ReleaseBuffer(b); + } + pfree(datum); +} + +/* + * dosplit -- split a page in the tree. + * + * This is the quadratic-cost split algorithm Guttman describes in + * his paper. The reason we chose it is that you can implement this + * with less information about the data types on which you're operating. + */ +static InsertIndexResult +dosplit(Relation r, + Buffer buffer, + RTSTACK *stack, + IndexTuple itup, + RTSTATE *rtstate) +{ + Page p; + Buffer leftbuf, rightbuf; + Page left, right; + ItemId itemid; + IndexTuple item; + IndexTuple ltup, rtup; + OffsetNumber maxoff; + OffsetNumber i; + OffsetNumber leftoff, rightoff; + BlockNumber lbknum, rbknum; + BlockNumber bufblock; + RTreePageOpaque opaque; + int blank; + InsertIndexResult res; + char *isnull; + SPLITVEC v; + TupleDesc tupDesc; + + isnull = (char *) palloc(r->rd_rel->relnatts); + for (blank = 0; blank < r->rd_rel->relnatts; blank++) + isnull[blank] = ' '; + p = (Page) BufferGetPage(buffer); + opaque = (RTreePageOpaque) PageGetSpecialPointer(p); + + /* + * The root of the tree is the first block in the relation. If + * we're about to split the root, we need to do some hocus-pocus + * to enforce this guarantee. + */ + + if (BufferGetBlockNumber(buffer) == P_ROOT) { + leftbuf = ReadBuffer(r, P_NEW); + RTInitBuffer(leftbuf, opaque->flags); + lbknum = BufferGetBlockNumber(leftbuf); + left = (Page) BufferGetPage(leftbuf); + } else { + leftbuf = buffer; + IncrBufferRefCount(buffer); + lbknum = BufferGetBlockNumber(buffer); + left = (Page) PageGetTempPage(p, sizeof(RTreePageOpaqueData)); + } + + rightbuf = ReadBuffer(r, P_NEW); + RTInitBuffer(rightbuf, opaque->flags); + rbknum = BufferGetBlockNumber(rightbuf); + right = (Page) BufferGetPage(rightbuf); + + picksplit(r, p, &v, itup, rtstate); + + leftoff = rightoff = FirstOffsetNumber; + maxoff = PageGetMaxOffsetNumber(p); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { + itemid = PageGetItemId(p, i); + item = (IndexTuple) PageGetItem(p, itemid); + + if (i == *(v.spl_left)) { + (void) PageAddItem(left, (Item) item, IndexTupleSize(item), + leftoff, LP_USED); + leftoff = OffsetNumberNext(leftoff); + v.spl_left++; /* advance in left split vector */ + } else { + (void) PageAddItem(right, (Item) item, IndexTupleSize(item), + rightoff, LP_USED); + rightoff = OffsetNumberNext(rightoff); + v.spl_right++; /* advance in right split vector */ + } + } + + /* build an InsertIndexResult for this insertion */ + res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); + + /* now insert the new index tuple */ + if (*(v.spl_left) != FirstOffsetNumber) { + (void) PageAddItem(left, (Item) itup, IndexTupleSize(itup), + leftoff, LP_USED); + leftoff = OffsetNumberNext(leftoff); + ItemPointerSet(&(res->pointerData), lbknum, leftoff); + } else { + (void) PageAddItem(right, (Item) itup, IndexTupleSize(itup), + rightoff, LP_USED); + rightoff = OffsetNumberNext(rightoff); + ItemPointerSet(&(res->pointerData), rbknum, rightoff); + } + + if ((bufblock = BufferGetBlockNumber(buffer)) != P_ROOT) { + PageRestoreTempPage(left, p); + } + WriteBuffer(leftbuf); + WriteBuffer(rightbuf); + + /* + * Okay, the page is split. We have three things left to do: + * + * 1) Adjust any active scans on this index to cope with changes + * we introduced in its structure by splitting this page. + * + * 2) "Tighten" the bounding box of the pointer to the left + * page in the parent node in the tree, if any. Since we + * moved a bunch of stuff off the left page, we expect it + * to get smaller. This happens in the internal insertion + * routine. + * + * 3) Insert a pointer to the right page in the parent. This + * may cause the parent to split. If it does, we need to + * repeat steps one and two for each split node in the tree. + */ + + /* adjust active scans */ + rtadjscans(r, RTOP_SPLIT, bufblock, FirstOffsetNumber); + + tupDesc = r->rd_att; + ltup = (IndexTuple) index_formtuple(tupDesc, + (Datum *) &(v.spl_ldatum), isnull); + rtup = (IndexTuple) index_formtuple(tupDesc, + (Datum *) &(v.spl_rdatum), isnull); + pfree(isnull); + + /* set pointers to new child pages in the internal index tuples */ + ItemPointerSet(&(ltup->t_tid), lbknum, 1); + ItemPointerSet(&(rtup->t_tid), rbknum, 1); + + rtintinsert(r, stack, ltup, rtup, rtstate); + + pfree(ltup); + pfree(rtup); + + return (res); +} + +static void +rtintinsert(Relation r, + RTSTACK *stk, + IndexTuple ltup, + IndexTuple rtup, + RTSTATE *rtstate) +{ + IndexTuple old; + Buffer b; + Page p; + char *ldatum, *rdatum, *newdatum; + InsertIndexResult res; + + if (stk == (RTSTACK *) NULL) { + rtnewroot(r, ltup, rtup); + return; + } + + b = ReadBuffer(r, stk->rts_blk); + p = BufferGetPage(b); + old = (IndexTuple) PageGetItem(p, PageGetItemId(p, stk->rts_child)); + + /* + * This is a hack. Right now, we force rtree keys to be constant size. + * To fix this, need delete the old key and add both left and right + * for the two new pages. The insertion of left may force a split if + * the new left key is bigger than the old key. + */ + + if (IndexTupleSize(old) != IndexTupleSize(ltup)) + elog(WARN, "Variable-length rtree keys are not supported."); + + /* install pointer to left child */ + memmove(old, ltup,IndexTupleSize(ltup)); + + if (nospace(p, rtup)) { + newdatum = (((char *) ltup) + sizeof(IndexTupleData)); + rttighten(r, stk->rts_parent, newdatum, + (IndexTupleSize(ltup) - sizeof(IndexTupleData)), rtstate); + res = dosplit(r, b, stk->rts_parent, rtup, rtstate); + WriteBuffer(b); /* don't forget to release buffer! - 01/31/94 */ + pfree(res); + } else { + (void) PageAddItem(p, (Item) rtup, IndexTupleSize(rtup), + PageGetMaxOffsetNumber(p), LP_USED); + WriteBuffer(b); + ldatum = (((char *) ltup) + sizeof(IndexTupleData)); + rdatum = (((char *) rtup) + sizeof(IndexTupleData)); + newdatum = (char *) (*rtstate->unionFn)(ldatum, rdatum); + + rttighten(r, stk->rts_parent, newdatum, + (IndexTupleSize(rtup) - sizeof(IndexTupleData)), rtstate); + + pfree(newdatum); + } +} + +static void +rtnewroot(Relation r, IndexTuple lt, IndexTuple rt) +{ + Buffer b; + Page p; + + b = ReadBuffer(r, P_ROOT); + RTInitBuffer(b, 0); + p = BufferGetPage(b); + (void) PageAddItem(p, (Item) lt, IndexTupleSize(lt), + FirstOffsetNumber, LP_USED); + (void) PageAddItem(p, (Item) rt, IndexTupleSize(rt), + OffsetNumberNext(FirstOffsetNumber), LP_USED); + WriteBuffer(b); +} + +static void +picksplit(Relation r, + Page page, + SPLITVEC *v, + IndexTuple itup, + RTSTATE *rtstate) +{ + OffsetNumber maxoff; + OffsetNumber i, j; + IndexTuple item_1, item_2; + char *datum_alpha, *datum_beta; + char *datum_l, *datum_r; + char *union_d, *union_dl, *union_dr; + char *inter_d; + bool firsttime; + float size_alpha, size_beta, size_union, size_inter; + float size_waste, waste; + float size_l, size_r; + int nbytes; + OffsetNumber seed_1 = 0, seed_2 = 0; + OffsetNumber *left, *right; + + maxoff = PageGetMaxOffsetNumber(page); + + nbytes = (maxoff + 2) * sizeof(OffsetNumber); + v->spl_left = (OffsetNumber *) palloc(nbytes); + v->spl_right = (OffsetNumber *) palloc(nbytes); + + firsttime = true; + waste = 0.0; + + for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) { + item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); + datum_alpha = ((char *) item_1) + sizeof(IndexTupleData); + for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j)) { + item_2 = (IndexTuple) PageGetItem(page, PageGetItemId(page, j)); + datum_beta = ((char *) item_2) + sizeof(IndexTupleData); + + /* compute the wasted space by unioning these guys */ + union_d = (char *)(rtstate->unionFn)(datum_alpha, datum_beta); + (rtstate->sizeFn)(union_d, &size_union); + inter_d = (char *)(rtstate->interFn)(datum_alpha, datum_beta); + (rtstate->sizeFn)(inter_d, &size_inter); + size_waste = size_union - size_inter; + + pfree(union_d); + + if (inter_d != (char *) NULL) + pfree(inter_d); + + /* + * are these a more promising split that what we've + * already seen? + */ + + if (size_waste > waste || firsttime) { + waste = size_waste; + seed_1 = i; + seed_2 = j; + firsttime = false; + } + } + } + + left = v->spl_left; + v->spl_nleft = 0; + right = v->spl_right; + v->spl_nright = 0; + + item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, seed_1)); + datum_alpha = ((char *) item_1) + sizeof(IndexTupleData); + datum_l = (char *)(*rtstate->unionFn)(datum_alpha, datum_alpha); + (*rtstate->sizeFn)(datum_l, &size_l); + item_2 = (IndexTuple) PageGetItem(page, PageGetItemId(page, seed_2)); + datum_beta = ((char *) item_2) + sizeof(IndexTupleData); + datum_r = (char *)(*rtstate->unionFn)(datum_beta, datum_beta); + (*rtstate->sizeFn)(datum_r, &size_r); + + /* + * Now split up the regions between the two seeds. An important + * property of this split algorithm is that the split vector v + * has the indices of items to be split in order in its left and + * right vectors. We exploit this property by doing a merge in + * the code that actually splits the page. + * + * For efficiency, we also place the new index tuple in this loop. + * This is handled at the very end, when we have placed all the + * existing tuples and i == maxoff + 1. + */ + + maxoff = OffsetNumberNext(maxoff); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { + + /* + * If we've already decided where to place this item, just + * put it on the right list. Otherwise, we need to figure + * out which page needs the least enlargement in order to + * store the item. + */ + + if (i == seed_1) { + *left++ = i; + v->spl_nleft++; + continue; + } else if (i == seed_2) { + *right++ = i; + v->spl_nright++; + continue; + } + + /* okay, which page needs least enlargement? */ + if (i == maxoff) { + item_1 = itup; + } else { + item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); + } + + datum_alpha = ((char *) item_1) + sizeof(IndexTupleData); + union_dl = (char *)(*rtstate->unionFn)(datum_l, datum_alpha); + union_dr = (char *)(*rtstate->unionFn)(datum_r, datum_alpha); + (*rtstate->sizeFn)(union_dl, &size_alpha); + (*rtstate->sizeFn)(union_dr, &size_beta); + + /* pick which page to add it to */ + if (size_alpha - size_l < size_beta - size_r) { + pfree(datum_l); + pfree(union_dr); + datum_l = union_dl; + size_l = size_alpha; + *left++ = i; + v->spl_nleft++; + } else { + pfree(datum_r); + pfree(union_dl); + datum_r = union_dr; + size_r = size_alpha; + *right++ = i; + v->spl_nright++; + } + } + *left = *right = FirstOffsetNumber; /* sentinel value, see dosplit() */ + + v->spl_ldatum = datum_l; + v->spl_rdatum = datum_r; +} + +static void +RTInitBuffer(Buffer b, uint32 f) +{ + RTreePageOpaque opaque; + Page page; + Size pageSize; + + pageSize = BufferGetPageSize(b); + + page = BufferGetPage(b); + memset(page, 0, (int) pageSize); + PageInit(page, pageSize, sizeof(RTreePageOpaqueData)); + + opaque = (RTreePageOpaque) PageGetSpecialPointer(page); + opaque->flags = f; +} + +static OffsetNumber +choose(Relation r, Page p, IndexTuple it, RTSTATE *rtstate) +{ + OffsetNumber maxoff; + OffsetNumber i; + char *ud, *id; + char *datum; + float usize, dsize; + OffsetNumber which; + float which_grow; + + id = ((char *) it) + sizeof(IndexTupleData); + maxoff = PageGetMaxOffsetNumber(p); + which_grow = -1.0; + which = -1; + + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { + datum = (char *) PageGetItem(p, PageGetItemId(p, i)); + datum += sizeof(IndexTupleData); + (*rtstate->sizeFn)(datum, &dsize); + ud = (char *) (*rtstate->unionFn)(datum, id); + (*rtstate->sizeFn)(ud, &usize); + pfree(ud); + if (which_grow < 0 || usize - dsize < which_grow) { + which = i; + which_grow = usize - dsize; + if (which_grow == 0) + break; + } + } + + return (which); +} + +static int +nospace(Page p, IndexTuple it) +{ + return (PageGetFreeSpace(p) < IndexTupleSize(it)); +} + +void +freestack(RTSTACK *s) +{ + RTSTACK *p; + + while (s != (RTSTACK *) NULL) { + p = s->rts_parent; + pfree(s); + s = p; + } +} + +char * +rtdelete(Relation r, ItemPointer tid) +{ + BlockNumber blkno; + OffsetNumber offnum; + Buffer buf; + Page page; + + /* must write-lock on delete */ + RelationSetLockForWrite(r); + + blkno = ItemPointerGetBlockNumber(tid); + offnum = ItemPointerGetOffsetNumber(tid); + + /* adjust any scans that will be affected by this deletion */ + rtadjscans(r, RTOP_DEL, blkno, offnum); + + /* delete the index tuple */ + buf = ReadBuffer(r, blkno); + page = BufferGetPage(buf); + + PageIndexTupleDelete(page, offnum); + + WriteBuffer(buf); + + /* XXX -- two-phase locking, don't release the write lock */ + return ((char *) NULL); +} + +static void initRtstate(RTSTATE *rtstate, Relation index) +{ + RegProcedure union_proc, size_proc, inter_proc; + func_ptr user_fn; + int pronargs; + + union_proc = index_getprocid(index, 1, RT_UNION_PROC); + size_proc = index_getprocid(index, 1, RT_SIZE_PROC); + inter_proc = index_getprocid(index, 1, RT_INTER_PROC); + fmgr_info(union_proc, &user_fn, &pronargs); + rtstate->unionFn = user_fn; + fmgr_info(size_proc, &user_fn, &pronargs); + rtstate->sizeFn = user_fn; + fmgr_info(inter_proc, &user_fn, &pronargs); + rtstate->interFn = user_fn; + return; +} + +#define RTDEBUG +#ifdef RTDEBUG +#include "utils/geo-decls.h" + +void +_rtdump(Relation r) +{ + Buffer buf; + Page page; + OffsetNumber offnum, maxoff; + BlockNumber blkno; + BlockNumber nblocks; + RTreePageOpaque po; + IndexTuple itup; + BlockNumber itblkno; + OffsetNumber itoffno; + char *datum; + char *itkey; + + nblocks = RelationGetNumberOfBlocks(r); + for (blkno = 0; blkno < nblocks; blkno++) { + buf = ReadBuffer(r, blkno); + page = BufferGetPage(buf); + po = (RTreePageOpaque) PageGetSpecialPointer(page); + maxoff = PageGetMaxOffsetNumber(page); + printf("Page %d maxoff %d <%s>\n", blkno, maxoff, + (po->flags & F_LEAF ? "LEAF" : "INTERNAL")); + + if (PageIsEmpty(page)) { + ReleaseBuffer(buf); + continue; + } + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) { + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); + itblkno = ItemPointerGetBlockNumber(&(itup->t_tid)); + itoffno = ItemPointerGetOffsetNumber(&(itup->t_tid)); + datum = ((char *) itup); + datum += sizeof(IndexTupleData); + itkey = (char *) box_out((BOX *) datum); + printf("\t[%d] size %d heap <%d,%d> key:%s\n", + offnum, IndexTupleSize(itup), itblkno, itoffno, itkey); + pfree(itkey); + } + + ReleaseBuffer(buf); + } +} +#endif /* defined RTDEBUG */ + diff --git a/src/backend/access/rtree/rtscan.c b/src/backend/access/rtree/rtscan.c new file mode 100644 index 00000000000..aa68f0db70b --- /dev/null +++ b/src/backend/access/rtree/rtscan.c @@ -0,0 +1,392 @@ +/*------------------------------------------------------------------------- + * + * rtscan.c-- + * routines to manage scans on index relations + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtscan.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "c.h" +#include "postgres.h" + +#include "storage/bufmgr.h" +#include "storage/bufpage.h" + +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/rel.h" + +#include "access/heapam.h" +#include "access/genam.h" +#include "access/rtree.h" +#include "access/rtstrat.h" + +/* routines defined and used here */ +static void rtregscan(IndexScanDesc s); +static void rtdropscan(IndexScanDesc s); +static void rtadjone(IndexScanDesc s, int op, BlockNumber blkno, + OffsetNumber offnum); +static void adjuststack(RTSTACK *stk, BlockNumber blkno, + OffsetNumber offnum); +static void adjustiptr(IndexScanDesc s, ItemPointer iptr, + int op, BlockNumber blkno, OffsetNumber offnum); + +/* + * Whenever we start an rtree scan in a backend, we register it in private + * space. Then if the rtree index gets updated, we check all registered + * scans and adjust them if the tuple they point at got moved by the + * update. We only need to do this in private space, because when we update + * an rtree we have a write lock on the tree, so no other process can have + * any locks at all on it. A single transaction can have write and read + * locks on the same object, so that's why we need to handle this case. + */ + +typedef struct RTScanListData { + IndexScanDesc rtsl_scan; + struct RTScanListData *rtsl_next; +} RTScanListData; + +typedef RTScanListData *RTScanList; + +/* pointer to list of local scans on rtrees */ +static RTScanList RTScans = (RTScanList) NULL; + +IndexScanDesc +rtbeginscan(Relation r, + bool fromEnd, + uint16 nkeys, + ScanKey key) +{ + IndexScanDesc s; + + RelationSetLockForRead(r); + s = RelationGetIndexScan(r, fromEnd, nkeys, key); + rtregscan(s); + + return (s); +} + +void +rtrescan(IndexScanDesc s, bool fromEnd, ScanKey key) +{ + RTreeScanOpaque p; + RegProcedure internal_proc; + int i; + + if (!IndexScanIsValid(s)) { + elog(WARN, "rtrescan: invalid scan."); + return; + } + + /* + * Clear all the pointers. + */ + + ItemPointerSetInvalid(&s->previousItemData); + ItemPointerSetInvalid(&s->currentItemData); + ItemPointerSetInvalid(&s->nextItemData); + ItemPointerSetInvalid(&s->previousMarkData); + ItemPointerSetInvalid(&s->currentMarkData); + ItemPointerSetInvalid(&s->nextMarkData); + + /* + * Set flags. + */ + if (RelationGetNumberOfBlocks(s->relation) == 0) { + s->flags = ScanUnmarked; + } else if (fromEnd) { + s->flags = ScanUnmarked | ScanUncheckedPrevious; + } else { + s->flags = ScanUnmarked | ScanUncheckedNext; + } + + s->scanFromEnd = fromEnd; + + if (s->numberOfKeys > 0) { + memmove(s->keyData, + key, + s->numberOfKeys * sizeof(ScanKeyData)); + } + + p = (RTreeScanOpaque) s->opaque; + if (p != (RTreeScanOpaque) NULL) { + freestack(p->s_stack); + freestack(p->s_markstk); + p->s_stack = p->s_markstk = (RTSTACK *) NULL; + p->s_flags = 0x0; + } else { + /* initialize opaque data */ + p = (RTreeScanOpaque) palloc(sizeof(RTreeScanOpaqueData)); + p->s_internalKey = + (ScanKey) palloc(sizeof(ScanKeyData) * s->numberOfKeys); + p->s_stack = p->s_markstk = (RTSTACK *) NULL; + p->s_internalNKey = s->numberOfKeys; + p->s_flags = 0x0; + for (i = 0; i < s->numberOfKeys; i++) + p->s_internalKey[i].sk_argument = s->keyData[i].sk_argument; + s->opaque = p; + if (s->numberOfKeys > 0) { + + /* + * Scans on internal pages use different operators than they + * do on leaf pages. For example, if the user wants all boxes + * that exactly match (x1,y1,x2,y2), then on internal pages + * we need to find all boxes that contain (x1,y1,x2,y2). + */ + + for (i = 0; i < s->numberOfKeys; i++) { + internal_proc = RTMapOperator(s->relation, + s->keyData[i].sk_attno, + s->keyData[i].sk_procedure); + ScanKeyEntryInitialize(&(p->s_internalKey[i]), + s->keyData[i].sk_flags, + s->keyData[i].sk_attno, + internal_proc, + s->keyData[i].sk_argument); + } + } + } +} + +void +rtmarkpos(IndexScanDesc s) +{ + RTreeScanOpaque p; + RTSTACK *o, *n, *tmp; + + s->currentMarkData = s->currentItemData; + p = (RTreeScanOpaque) s->opaque; + if (p->s_flags & RTS_CURBEFORE) + p->s_flags |= RTS_MRKBEFORE; + else + p->s_flags &= ~RTS_MRKBEFORE; + + o = (RTSTACK *) NULL; + n = p->s_stack; + + /* copy the parent stack from the current item data */ + while (n != (RTSTACK *) NULL) { + tmp = (RTSTACK *) palloc(sizeof(RTSTACK)); + tmp->rts_child = n->rts_child; + tmp->rts_blk = n->rts_blk; + tmp->rts_parent = o; + o = tmp; + n = n->rts_parent; + } + + freestack(p->s_markstk); + p->s_markstk = o; +} + +void +rtrestrpos(IndexScanDesc s) +{ + RTreeScanOpaque p; + RTSTACK *o, *n, *tmp; + + s->currentItemData = s->currentMarkData; + p = (RTreeScanOpaque) s->opaque; + if (p->s_flags & RTS_MRKBEFORE) + p->s_flags |= RTS_CURBEFORE; + else + p->s_flags &= ~RTS_CURBEFORE; + + o = (RTSTACK *) NULL; + n = p->s_markstk; + + /* copy the parent stack from the current item data */ + while (n != (RTSTACK *) NULL) { + tmp = (RTSTACK *) palloc(sizeof(RTSTACK)); + tmp->rts_child = n->rts_child; + tmp->rts_blk = n->rts_blk; + tmp->rts_parent = o; + o = tmp; + n = n->rts_parent; + } + + freestack(p->s_stack); + p->s_stack = o; +} + +void +rtendscan(IndexScanDesc s) +{ + RTreeScanOpaque p; + + p = (RTreeScanOpaque) s->opaque; + + if (p != (RTreeScanOpaque) NULL) { + freestack(p->s_stack); + freestack(p->s_markstk); + } + + rtdropscan(s); + /* XXX don't unset read lock -- two-phase locking */ +} + +static void +rtregscan(IndexScanDesc s) +{ + RTScanList l; + + l = (RTScanList) palloc(sizeof(RTScanListData)); + l->rtsl_scan = s; + l->rtsl_next = RTScans; + RTScans = l; +} + +static void +rtdropscan(IndexScanDesc s) +{ + RTScanList l; + RTScanList prev; + + prev = (RTScanList) NULL; + + for (l = RTScans; + l != (RTScanList) NULL && l->rtsl_scan != s; + l = l->rtsl_next) { + prev = l; + } + + if (l == (RTScanList) NULL) + elog(WARN, "rtree scan list corrupted -- cannot find 0x%lx", s); + + if (prev == (RTScanList) NULL) + RTScans = l->rtsl_next; + else + prev->rtsl_next = l->rtsl_next; + + pfree(l); +} + +void +rtadjscans(Relation r, int op, BlockNumber blkno, OffsetNumber offnum) +{ + RTScanList l; + Oid relid; + + relid = r->rd_id; + for (l = RTScans; l != (RTScanList) NULL; l = l->rtsl_next) { + if (l->rtsl_scan->relation->rd_id == relid) + rtadjone(l->rtsl_scan, op, blkno, offnum); + } +} + +/* + * rtadjone() -- adjust one scan for update. + * + * By here, the scan passed in is on a modified relation. Op tells + * us what the modification is, and blkno and offind tell us what + * block and offset index were affected. This routine checks the + * current and marked positions, and the current and marked stacks, + * to see if any stored location needs to be changed because of the + * update. If so, we make the change here. + */ +static void +rtadjone(IndexScanDesc s, + int op, + BlockNumber blkno, + OffsetNumber offnum) +{ + RTreeScanOpaque so; + + adjustiptr(s, &(s->currentItemData), op, blkno, offnum); + adjustiptr(s, &(s->currentMarkData), op, blkno, offnum); + + so = (RTreeScanOpaque) s->opaque; + + if (op == RTOP_SPLIT) { + adjuststack(so->s_stack, blkno, offnum); + adjuststack(so->s_markstk, blkno, offnum); + } +} + +/* + * adjustiptr() -- adjust current and marked item pointers in the scan + * + * Depending on the type of update and the place it happened, we + * need to do nothing, to back up one record, or to start over on + * the same page. + */ +static void +adjustiptr(IndexScanDesc s, + ItemPointer iptr, + int op, + BlockNumber blkno, + OffsetNumber offnum) +{ + OffsetNumber curoff; + RTreeScanOpaque so; + + if (ItemPointerIsValid(iptr)) { + if (ItemPointerGetBlockNumber(iptr) == blkno) { + curoff = ItemPointerGetOffsetNumber(iptr); + so = (RTreeScanOpaque) s->opaque; + + switch (op) { + case RTOP_DEL: + /* back up one if we need to */ + if (curoff >= offnum) { + + if (curoff > FirstOffsetNumber) { + /* just adjust the item pointer */ + ItemPointerSet(iptr, blkno, OffsetNumberPrev(curoff)); + } else { + /* remember that we're before the current tuple */ + ItemPointerSet(iptr, blkno, FirstOffsetNumber); + if (iptr == &(s->currentItemData)) + so->s_flags |= RTS_CURBEFORE; + else + so->s_flags |= RTS_MRKBEFORE; + } + } + break; + + case RTOP_SPLIT: + /* back to start of page on split */ + ItemPointerSet(iptr, blkno, FirstOffsetNumber); + if (iptr == &(s->currentItemData)) + so->s_flags &= ~RTS_CURBEFORE; + else + so->s_flags &= ~RTS_MRKBEFORE; + break; + + default: + elog(WARN, "Bad operation in rtree scan adjust: %d", op); + } + } + } +} + +/* + * adjuststack() -- adjust the supplied stack for a split on a page in + * the index we're scanning. + * + * If a page on our parent stack has split, we need to back up to the + * beginning of the page and rescan it. The reason for this is that + * the split algorithm for rtrees doesn't order tuples in any useful + * way on a single page. This means on that a split, we may wind up + * looking at some heap tuples more than once. This is handled in the + * access method update code for heaps; if we've modified the tuple we + * are looking at already in this transaction, we ignore the update + * request. + */ +/*ARGSUSED*/ +static void +adjuststack(RTSTACK *stk, + BlockNumber blkno, + OffsetNumber offnum) +{ + while (stk != (RTSTACK *) NULL) { + if (stk->rts_blk == blkno) + stk->rts_child = FirstOffsetNumber; + + stk = stk->rts_parent; + } +} diff --git a/src/backend/access/rtree/rtstrat.c b/src/backend/access/rtree/rtstrat.c new file mode 100644 index 00000000000..c5d934a22a2 --- /dev/null +++ b/src/backend/access/rtree/rtstrat.c @@ -0,0 +1,239 @@ +/*------------------------------------------------------------------------- + * + * rtstrat.c-- + * strategy map data for rtrees. + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtstrat.c,v 1.1.1.1 1996/07/09 06:21:13 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "c.h" + +#include "utils/rel.h" + +#include "storage/bufmgr.h" +#include "storage/bufpage.h" + +#include "access/istrat.h" +#include "access/rtree.h" + +/* + * Note: negate, commute, and negatecommute all assume that operators are + * ordered as follows in the strategy map: + * + * left, left-or-overlap, overlap, right-or-overlap, right, same, + * contains, contained-by + * + * The negate, commute, and negatecommute arrays are used by the planner + * to plan indexed scans over data that appears in the qualificiation in + * a boolean negation, or whose operands appear in the wrong order. For + * example, if the operator "<%" means "contains", and the user says + * + * where not rel.box <% "(10,10,20,20)"::box + * + * the planner can plan an index scan by noting that rtree indices have + * an operator in their operator class for negating <%. + * + * Similarly, if the user says something like + * + * where "(10,10,20,20)"::box <% rel.box + * + * the planner can see that the rtree index on rel.box has an operator in + * its opclass for commuting <%, and plan the scan using that operator. + * This added complexity in the access methods makes the planner a lot easier + * to write. + */ + +/* if a op b, what operator tells us if (not a op b)? */ +static StrategyNumber RTNegate[RTNStrategies] = { + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy + }; + +/* if a op_1 b, what is the operator op_2 such that b op_2 a? */ +static StrategyNumber RTCommute[RTNStrategies] = { + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy + }; + +/* if a op_1 b, what is the operator op_2 such that (b !op_2 a)? */ +static StrategyNumber RTNegateCommute[RTNStrategies] = { + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy, + InvalidStrategy + }; + +/* + * Now do the TermData arrays. These exist in case the user doesn't give + * us a full set of operators for a particular operator class. The idea + * is that by making multiple comparisons using any one of the supplied + * operators, we can decide whether two n-dimensional polygons are equal. + * For example, if a contains b and b contains a, we may conclude that + * a and b are equal. + * + * The presence of the TermData arrays in all this is a historical accident. + * Early in the development of the POSTGRES access methods, it was believed + * that writing functions was harder than writing arrays. This is wrong; + * TermData is hard to understand and hard to get right. In general, when + * someone populates a new operator class, the populate it completely. If + * Mike Hirohama had forced Cimarron Taylor to populate the strategy map + * for btree int2_ops completely in 1988, you wouldn't have to deal with + * all this now. Too bad for you. + * + * Since you can't necessarily do this in all cases (for example, you can't + * do it given only "intersects" or "disjoint"), TermData arrays for some + * operators don't appear below. + * + * Note that if you DO supply all the operators required in a given opclass + * by inserting them into the pg_opclass system catalog, you can get away + * without doing all this TermData stuff. Since the rtree code is intended + * to be a reference for access method implementors, I'm doing TermData + * correctly here. + * + * Note on style: these are all actually of type StrategyTermData, but + * since those have variable-length data at the end of the struct we can't + * properly initialize them if we declare them to be what they are. + */ + +/* if you only have "contained-by", how do you determine equality? */ +static uint16 RTContainedByTermData[] = { + 2, /* make two comparisons */ + RTContainedByStrategyNumber, /* use "a contained-by b" */ + 0x0, /* without any magic */ + RTContainedByStrategyNumber, /* then use contained-by, */ + SK_COMMUTE /* swapping a and b */ + }; + +/* if you only have "contains", how do you determine equality? */ +static uint16 RTContainsTermData[] = { + 2, /* make two comparisons */ + RTContainsStrategyNumber, /* use "a contains b" */ + 0x0, /* without any magic */ + RTContainsStrategyNumber, /* then use contains again, */ + SK_COMMUTE /* swapping a and b */ + }; + +/* now put all that together in one place for the planner */ +static StrategyTerm RTEqualExpressionData[] = { + (StrategyTerm) RTContainedByTermData, + (StrategyTerm) RTContainsTermData, + NULL + }; + +/* + * If you were sufficiently attentive to detail, you would go through + * the ExpressionData pain above for every one of the seven strategies + * we defined. I am not. Now we declare the StrategyEvaluationData + * structure that gets shipped around to help the planner and the access + * method decide what sort of scan it should do, based on (a) what the + * user asked for, (b) what operators are defined for a particular opclass, + * and (c) the reams of information we supplied above. + * + * The idea of all of this initialized data is to make life easier on the + * user when he defines a new operator class to use this access method. + * By filling in all the data, we let him get away with leaving holes in his + * operator class, and still let him use the index. The added complexity + * in the access methods just isn't worth the trouble, though. + */ + +static StrategyEvaluationData RTEvaluationData = { + RTNStrategies, /* # of strategies */ + (StrategyTransformMap) RTNegate, /* how to do (not qual) */ + (StrategyTransformMap) RTCommute, /* how to swap operands */ + (StrategyTransformMap) RTNegateCommute, /* how to do both */ + { + NULL, /* express left */ + NULL, /* express overleft */ + NULL, /* express over */ + NULL, /* express overright */ + NULL, /* express right */ + (StrategyExpression) RTEqualExpressionData, /* express same */ + NULL, /* express contains */ + NULL, /* express contained-by */ + NULL, + NULL, + NULL + } +}; + +/* + * Okay, now something peculiar to rtrees that doesn't apply to most other + * indexing structures: When we're searching a tree for a given value, we + * can't do the same sorts of comparisons on internal node entries as we + * do at leaves. The reason is that if we're looking for (say) all boxes + * that are the same as (0,0,10,10), then we need to find all leaf pages + * that overlap that region. So internally we search for overlap, and at + * the leaf we search for equality. + * + * This array maps leaf search operators to the internal search operators. + * We assume the normal ordering on operators: + * + * left, left-or-overlap, overlap, right-or-overlap, right, same, + * contains, contained-by + */ +static StrategyNumber RTOperMap[RTNStrategies] = { + RTOverLeftStrategyNumber, + RTOverLeftStrategyNumber, + RTOverlapStrategyNumber, + RTOverRightStrategyNumber, + RTOverRightStrategyNumber, + RTContainsStrategyNumber, + RTContainsStrategyNumber, + RTOverlapStrategyNumber + }; + +StrategyNumber +RelationGetRTStrategy(Relation r, + AttrNumber attnum, + RegProcedure proc) +{ + return (RelationGetStrategy(r, attnum, &RTEvaluationData, proc)); +} + +bool +RelationInvokeRTStrategy(Relation r, + AttrNumber attnum, + StrategyNumber s, + Datum left, + Datum right) +{ + return (RelationInvokeStrategy(r, &RTEvaluationData, attnum, s, + left, right)); +} + +RegProcedure +RTMapOperator(Relation r, + AttrNumber attnum, + RegProcedure proc) +{ + StrategyNumber procstrat; + StrategyMap strategyMap; + + procstrat = RelationGetRTStrategy(r, attnum, proc); + strategyMap = IndexStrategyGetStrategyMap(RelationGetIndexStrategy(r), + RTNStrategies, + attnum); + + return (strategyMap->entry[RTOperMap[procstrat - 1] - 1].sk_procedure); +} |