diff options
Diffstat (limited to 'src/include/access/spgist_private.h')
-rw-r--r-- | src/include/access/spgist_private.h | 609 |
1 files changed, 609 insertions, 0 deletions
diff --git a/src/include/access/spgist_private.h b/src/include/access/spgist_private.h new file mode 100644 index 00000000000..5c57799f09c --- /dev/null +++ b/src/include/access/spgist_private.h @@ -0,0 +1,609 @@ +/*------------------------------------------------------------------------- + * + * spgist_private.h + * Private declarations for SP-GiST access method. + * + * + * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/access/spgist_private.h + * + *------------------------------------------------------------------------- + */ +#ifndef SPGIST_PRIVATE_H +#define SPGIST_PRIVATE_H + +#include "access/itup.h" +#include "access/spgist.h" +#include "nodes/tidbitmap.h" +#include "utils/rel.h" + + +/* Page numbers of fixed-location pages */ +#define SPGIST_METAPAGE_BLKNO (0) +#define SPGIST_HEAD_BLKNO (1) + +/* + * Contents of page special space on SPGiST index pages + */ +typedef struct SpGistPageOpaqueData +{ + uint16 flags; /* see bit definitions below */ + uint16 nRedirection; /* number of redirection tuples on page */ + uint16 nPlaceholder; /* number of placeholder tuples on page */ + /* note there's no count of either LIVE or DEAD tuples ... */ + uint16 spgist_page_id; /* for identification of SP-GiST indexes */ +} SpGistPageOpaqueData; + +typedef SpGistPageOpaqueData *SpGistPageOpaque; + +/* Flag bits in page special space */ +#define SPGIST_META (1<<0) +#define SPGIST_DELETED (1<<1) +#define SPGIST_LEAF (1<<2) + +#define SpGistPageGetOpaque(page) ((SpGistPageOpaque) PageGetSpecialPointer(page)) +#define SpGistPageIsMeta(page) (SpGistPageGetOpaque(page)->flags & SPGIST_META) +#define SpGistPageIsDeleted(page) (SpGistPageGetOpaque(page)->flags & SPGIST_DELETED) +#define SpGistPageSetDeleted(page) (SpGistPageGetOpaque(page)->flags |= SPGIST_DELETED) +#define SpGistPageSetNonDeleted(page) (SpGistPageGetOpaque(page)->flags &= ~SPGIST_DELETED) +#define SpGistPageIsLeaf(page) (SpGistPageGetOpaque(page)->flags & SPGIST_LEAF) +#define SpGistPageSetLeaf(page) (SpGistPageGetOpaque(page)->flags |= SPGIST_LEAF) +#define SpGistPageSetInner(page) (SpGistPageGetOpaque(page)->flags &= ~SPGIST_LEAF) + +/* + * The page ID is for the convenience of pg_filedump and similar utilities, + * which otherwise would have a hard time telling pages of different index + * types apart. It should be the last 2 bytes on the page. This is more or + * less "free" due to alignment considerations. + */ +#define SPGIST_PAGE_ID 0xFF82 + +/* + * Each backend keeps a cache of last-used page info in its index->rd_amcache + * area. This is initialized from, and occasionally written back to, + * shared storage in the index metapage. + */ +typedef struct SpGistLastUsedPage +{ + BlockNumber blkno; /* block number of described page */ + int freeSpace; /* its free space (could be obsolete!) */ +} SpGistLastUsedPage; + +typedef struct SpGistCache +{ + SpGistLastUsedPage innerPage[3]; /* one per triple-parity group */ + SpGistLastUsedPage leafPage; +} SpGistCache; + +/* + * metapage + */ +typedef struct SpGistMetaPageData +{ + uint32 magicNumber; /* for identity cross-check */ + SpGistCache lastUsedPages; /* shared storage of last-used info */ +} SpGistMetaPageData; + +#define SPGIST_MAGIC_NUMBER (0xBA0BABED) + +#define SpGistPageGetMeta(p) \ + ((SpGistMetaPageData *) PageGetContents(p)) + +/* + * Private state of index AM. SpGistState is common to both insert and + * search code; SpGistScanOpaque is for searches only. + */ + +/* Per-datatype info needed in SpGistState */ +typedef struct SpGistTypeDesc +{ + Oid type; + bool attbyval; + int16 attlen; +} SpGistTypeDesc; + +typedef struct SpGistState +{ + spgConfigOut config; /* filled in by opclass config method */ + + SpGistTypeDesc attType; /* type of input data and leaf values */ + SpGistTypeDesc attPrefixType; /* type of inner-tuple prefix values */ + SpGistTypeDesc attLabelType; /* type of node label values */ + + /* lookup data for the opclass support functions, except config */ + FmgrInfo chooseFn; + FmgrInfo picksplitFn; + FmgrInfo innerConsistentFn; + FmgrInfo leafConsistentFn; + + char *deadTupleStorage; /* workspace for spgFormDeadTuple */ + + TransactionId myXid; /* XID to use when creating a redirect tuple */ + bool isBuild; /* true if doing index build */ +} SpGistState; + +/* + * Private state of an index scan + */ +typedef struct SpGistScanOpaqueData +{ + SpGistState state; /* see above */ + MemoryContext tempCxt; /* short-lived memory context */ + + /* Index quals for scan (copied from IndexScanDesc for convenience) */ + int numberOfKeys; /* number of index qualifier conditions */ + ScanKey keyData; /* array of index qualifier descriptors */ + + /* Stack of yet-to-be-visited pages */ + List *scanStack; /* List of ScanStackEntrys */ + + /* These fields are only used in amgetbitmap scans: */ + TIDBitmap *tbm; /* bitmap being filled */ + int64 ntids; /* number of TIDs passed to bitmap */ + + /* These fields are only used in amgettuple scans: */ + int nPtrs; /* number of TIDs found on current page */ + int iPtr; /* index for scanning through same */ + ItemPointerData heapPtrs[MaxIndexTuplesPerPage]; /* TIDs from cur page */ + bool recheck[MaxIndexTuplesPerPage]; /* their recheck flags */ + + /* + * Note: using MaxIndexTuplesPerPage above is a bit hokey since + * SpGistLeafTuples aren't exactly IndexTuples; however, they are + * larger, so this is safe. + */ +} SpGistScanOpaqueData; + +typedef SpGistScanOpaqueData *SpGistScanOpaque; + + +/* + * SPGiST tuple types. Note: inner, leaf, and dead tuple structs + * must have the same tupstate field in the same position! Real inner and + * leaf tuples always have tupstate = LIVE; if the state is something else, + * use the SpGistDeadTuple struct to inspect the tuple. + */ + +/* values of tupstate (see README for more info) */ +#define SPGIST_LIVE 0 /* normal live tuple (either inner or leaf) */ +#define SPGIST_REDIRECT 1 /* temporary redirection placeholder */ +#define SPGIST_DEAD 2 /* dead, cannot be removed because of links */ +#define SPGIST_PLACEHOLDER 3 /* placeholder, used to preserve offsets */ + +/* + * SPGiST inner tuple: list of "nodes" that subdivide a set of tuples + * + * Inner tuple layout: + * header/optional prefix/array of nodes, which are SpGistNodeTuples + * + * size and prefixSize must be multiples of MAXALIGN + */ +typedef struct SpGistInnerTupleData +{ + unsigned int tupstate:2, /* LIVE/REDIRECT/DEAD/PLACEHOLDER */ + allTheSame:1, /* all nodes in tuple are equivalent */ + nNodes:13, /* number of nodes within inner tuple */ + prefixSize:16; /* size of prefix, or 0 if none */ + uint16 size; /* total size of inner tuple */ + /* On most machines there will be a couple of wasted bytes here */ + /* prefix datum follows, then nodes */ +} SpGistInnerTupleData; + +typedef SpGistInnerTupleData *SpGistInnerTuple; + +/* these must match largest values that fit in bit fields declared above */ +#define SGITMAXNNODES 0x1FFF +#define SGITMAXPREFIXSIZE 0xFFFF +#define SGITMAXSIZE 0xFFFF + +#define SGITHDRSZ MAXALIGN(sizeof(SpGistInnerTupleData)) +#define _SGITDATA(x) (((char *) (x)) + SGITHDRSZ) +#define SGITDATAPTR(x) ((x)->prefixSize ? _SGITDATA(x) : NULL) +#define SGITDATUM(x, s) ((x)->prefixSize ? \ + ((s)->attPrefixType.attbyval ? \ + *(Datum *) _SGITDATA(x) : \ + PointerGetDatum(_SGITDATA(x))) \ + : (Datum) 0) +#define SGITNODEPTR(x) ((SpGistNodeTuple) (_SGITDATA(x) + (x)->prefixSize)) + +/* Macro for iterating through the nodes of an inner tuple */ +#define SGITITERATE(x, i, nt) \ + for ((i) = 0, (nt) = SGITNODEPTR(x); \ + (i) < (x)->nNodes; \ + (i)++, (nt) = (SpGistNodeTuple) (((char *) (nt)) + IndexTupleSize(nt))) + +/* + * SPGiST node tuple: one node within an inner tuple + * + * Node tuples use the same header as ordinary Postgres IndexTuples, but + * we do not use a null bitmap, because we know there is only one column + * so the INDEX_NULL_MASK bit suffices. Also, pass-by-value datums are + * stored as a full Datum, the same convention as for inner tuple prefixes + * and leaf tuple datums. + */ + +typedef IndexTupleData SpGistNodeTupleData; + +typedef SpGistNodeTupleData *SpGistNodeTuple; + +#define SGNTHDRSZ MAXALIGN(sizeof(SpGistNodeTupleData)) +#define SGNTDATAPTR(x) (((char *) (x)) + SGNTHDRSZ) +#define SGNTDATUM(x, s) ((s)->attLabelType.attbyval ? \ + *(Datum *) SGNTDATAPTR(x) : \ + PointerGetDatum(SGNTDATAPTR(x))) + +/* + * SPGiST leaf tuple: carries a datum and a heap tuple TID + * + * In the simplest case, the datum is the same as the indexed value; but + * it could also be a suffix or some other sort of delta that permits + * reconstruction given knowledge of the prefix path traversed to get here. + * + * The size field is wider than could possibly be needed for an on-disk leaf + * tuple, but this allows us to form leaf tuples even when the datum is too + * wide to be stored immediately, and it costs nothing because of alignment + * considerations. + * + * Normally, nextOffset links to the next tuple belonging to the same parent + * node (which must be on the same page). But when the root page is a leaf + * page, we don't chain its tuples, so nextOffset is always 0 on the root. + * + * size must be a multiple of MAXALIGN + */ +typedef struct SpGistLeafTupleData +{ + unsigned int tupstate:2, /* LIVE/REDIRECT/DEAD/PLACEHOLDER */ + size:30; /* large enough for any palloc'able value */ + OffsetNumber nextOffset; /* next tuple in chain, or InvalidOffset */ + ItemPointerData heapPtr; /* TID of represented heap tuple */ + /* leaf datum follows */ +} SpGistLeafTupleData; + +typedef SpGistLeafTupleData *SpGistLeafTuple; + +#define SGLTHDRSZ MAXALIGN(sizeof(SpGistLeafTupleData)) +#define SGLTDATAPTR(x) (((char *) (x)) + SGLTHDRSZ) +#define SGLTDATUM(x, s) ((s)->attType.attbyval ? \ + *(Datum *) SGLTDATAPTR(x) : \ + PointerGetDatum(SGLTDATAPTR(x))) + +/* + * SPGiST dead tuple: declaration for examining non-live tuples + * + * The tupstate field of this struct must match those of regular inner and + * leaf tuples, and its size field must match a leaf tuple's. + * Also, the pointer field must be in the same place as a leaf tuple's heapPtr + * field, to satisfy some Asserts that we make when replacing a leaf tuple + * with a dead tuple. + * We don't use nextOffset, but it's needed to align the pointer field. + * pointer and xid are only valid when tupstate = REDIRECT. + */ +typedef struct SpGistDeadTupleData +{ + unsigned int tupstate:2, /* LIVE/REDIRECT/DEAD/PLACEHOLDER */ + size:30; + OffsetNumber nextOffset; /* not used in dead tuples */ + ItemPointerData pointer; /* redirection inside index */ + TransactionId xid; /* ID of xact that inserted this tuple */ +} SpGistDeadTupleData; + +typedef SpGistDeadTupleData *SpGistDeadTuple; + +#define SGDTSIZE MAXALIGN(sizeof(SpGistDeadTupleData)) + +/* + * Macros for doing free-space calculations. Note that when adding up the + * space needed for tuples, we always consider each tuple to need the tuple's + * size plus sizeof(ItemIdData) (for the line pointer). This works correctly + * so long as tuple sizes are always maxaligned. + */ + +/* Page capacity after allowing for fixed header and special space */ +#define SPGIST_PAGE_CAPACITY \ + MAXALIGN_DOWN(BLCKSZ - \ + SizeOfPageHeaderData - \ + MAXALIGN(sizeof(SpGistPageOpaqueData))) + +/* + * Compute free space on page, assuming that up to n placeholders can be + * recycled if present (n should be the number of tuples to be inserted) + */ +#define SpGistPageGetFreeSpace(p, n) \ + (PageGetExactFreeSpace(p) + \ + Min(SpGistPageGetOpaque(p)->nPlaceholder, n) * \ + (SGDTSIZE + sizeof(ItemIdData))) + +/* + * XLOG stuff + * + * ACCEPT_RDATA_* can only use fixed-length rdata arrays, because of lengthof + */ + +#define ACCEPT_RDATA_DATA(p, s, i) \ + do { \ + Assert((i) < lengthof(rdata)); \ + rdata[i].data = (char *) (p); \ + rdata[i].len = (s); \ + rdata[i].buffer = InvalidBuffer; \ + rdata[i].buffer_std = true; \ + rdata[i].next = NULL; \ + if ((i) > 0) \ + rdata[(i) - 1].next = rdata + (i); \ + } while(0) + +#define ACCEPT_RDATA_BUFFER(b, i) \ + do { \ + Assert((i) < lengthof(rdata)); \ + rdata[i].data = NULL; \ + rdata[i].len = 0; \ + rdata[i].buffer = (b); \ + rdata[i].buffer_std = true; \ + rdata[i].next = NULL; \ + if ((i) > 0) \ + rdata[(i) - 1].next = rdata + (i); \ + } while(0) + + +/* XLOG record types for SPGiST */ +#define XLOG_SPGIST_CREATE_INDEX 0x00 +#define XLOG_SPGIST_ADD_LEAF 0x10 +#define XLOG_SPGIST_MOVE_LEAFS 0x20 +#define XLOG_SPGIST_ADD_NODE 0x30 +#define XLOG_SPGIST_SPLIT_TUPLE 0x40 +#define XLOG_SPGIST_PICKSPLIT 0x50 +#define XLOG_SPGIST_VACUUM_LEAF 0x60 +#define XLOG_SPGIST_VACUUM_ROOT 0x70 +#define XLOG_SPGIST_VACUUM_REDIRECT 0x80 + +/* + * Some redo functions need an SpGistState, although only a few of its fields + * need to be valid. spgxlogState carries the required info in xlog records. + * (See fillFakeState in spgxlog.c for more comments.) + */ +typedef struct spgxlogState +{ + TransactionId myXid; + bool isBuild; +} spgxlogState; + +#define STORE_STATE(s, d) \ + do { \ + (d).myXid = (s)->myXid; \ + (d).isBuild = (s)->isBuild; \ + } while(0) + + +typedef struct spgxlogAddLeaf +{ + RelFileNode node; + + BlockNumber blknoLeaf; /* destination page for leaf tuple */ + bool newPage; /* init dest page? */ + OffsetNumber offnumLeaf; /* offset where leaf tuple gets placed */ + OffsetNumber offnumHeadLeaf; /* offset of head tuple in chain, if any */ + + BlockNumber blknoParent; /* where the parent downlink is, if any */ + OffsetNumber offnumParent; + uint16 nodeI; + + /* + * new leaf tuple follows, on an intalign boundary (replay only needs to + * fetch its size field, so that should be enough alignment) + */ +} spgxlogAddLeaf; + +typedef struct spgxlogMoveLeafs +{ + RelFileNode node; + + BlockNumber blknoSrc; /* source leaf page */ + BlockNumber blknoDst; /* destination leaf page */ + uint16 nMoves; /* number of tuples moved from source page */ + bool newPage; /* init dest page? */ + bool replaceDead; /* are we replacing a DEAD source tuple? */ + + BlockNumber blknoParent; /* where the parent downlink is */ + OffsetNumber offnumParent; + uint16 nodeI; + + spgxlogState stateSrc; + + /*---------- + * data follows: + * array of deleted tuple numbers, length nMoves + * array of inserted tuple numbers, length nMoves + 1 or 1 + * list of leaf tuples, length nMoves + 1 or 1 (must be maxaligned) + * the tuple number arrays are padded to maxalign boundaries so that the + * leaf tuples will be suitably aligned + * + * Note: if replaceDead is true then there is only one inserted tuple + * number and only one leaf tuple in the data, because we are not copying + * the dead tuple from the source + * + * Buffer references in the rdata array are: + * Src page + * Dest page + * Parent page + *---------- + */ +} spgxlogMoveLeafs; + +typedef struct spgxlogAddNode +{ + RelFileNode node; + + BlockNumber blkno; /* block number of original inner tuple */ + OffsetNumber offnum; /* offset of original inner tuple */ + + BlockNumber blknoParent; /* where parent downlink is, if updated */ + OffsetNumber offnumParent; + uint16 nodeI; + + BlockNumber blknoNew; /* where new tuple goes, if not same place */ + OffsetNumber offnumNew; + bool newPage; /* init new page? */ + + spgxlogState stateSrc; + + /* + * updated inner tuple follows, on an intalign boundary (replay only needs + * to fetch its size field, so that should be enough alignment) + */ +} spgxlogAddNode; + +typedef struct spgxlogSplitTuple +{ + RelFileNode node; + + BlockNumber blknoPrefix; /* where the prefix tuple goes */ + OffsetNumber offnumPrefix; + + BlockNumber blknoPostfix; /* where the postfix tuple goes */ + OffsetNumber offnumPostfix; + bool newPage; /* need to init that page? */ + + /* + * new prefix inner tuple follows, then new postfix inner tuple, on + * intalign boundaries (replay only needs to fetch size fields, so that + * should be enough alignment) + */ +} spgxlogSplitTuple; + +typedef struct spgxlogPickSplit +{ + RelFileNode node; + + BlockNumber blknoSrc; /* original leaf page */ + BlockNumber blknoDest; /* other leaf page, if any */ + uint16 nDelete; /* n to delete from Src */ + uint16 nInsert; /* n to insert on Src and/or Dest */ + bool initSrc; /* re-init the Src page? */ + bool initDest; /* re-init the Dest page? */ + + BlockNumber blknoInner; /* where to put new inner tuple */ + OffsetNumber offnumInner; + bool initInner; /* re-init the Inner page? */ + + BlockNumber blknoParent; /* where the parent downlink is, if any */ + OffsetNumber offnumParent; + uint16 nodeI; + + spgxlogState stateSrc; + + /*---------- + * data follows: + * new inner tuple (assumed to have a maxaligned length) + * array of deleted tuple numbers, length nDelete + * array of inserted tuple numbers, length nInsert + * array of page selector bytes for inserted tuples, length nInsert + * list of leaf tuples, length nInsert (must be maxaligned) + * the tuple number and page selector arrays are padded to maxalign + * boundaries so that the leaf tuples will be suitably aligned + * + * Buffer references in the rdata array are: + * Src page (only if not root and not being init'd) + * Dest page (if used and not being init'd) + * Inner page (only if not being init'd) + * Parent page (if any; could be same as Inner) + *---------- + */ +} spgxlogPickSplit; + +typedef struct spgxlogVacuumLeaf +{ + RelFileNode node; + + BlockNumber blkno; /* block number to clean */ + uint16 nDead; /* number of tuples to become DEAD */ + uint16 nPlaceholder; /* number of tuples to become PLACEHOLDER */ + uint16 nMove; /* number of tuples to move */ + uint16 nChain; /* number of tuples to re-chain */ + + spgxlogState stateSrc; + + /*---------- + * data follows: + * tuple numbers to become DEAD + * tuple numbers to become PLACEHOLDER + * tuple numbers to move from (and replace with PLACEHOLDER) + * tuple numbers to move to (replacing what is there) + * tuple numbers to update nextOffset links of + * tuple numbers to insert in nextOffset links + *---------- + */ +} spgxlogVacuumLeaf; + +typedef struct spgxlogVacuumRoot +{ + /* vacuum root page when it is a leaf */ + RelFileNode node; + + uint16 nDelete; /* number of tuples to delete */ + + spgxlogState stateSrc; + + /* offsets of tuples to delete follow */ +} spgxlogVacuumRoot; + +typedef struct spgxlogVacuumRedirect +{ + RelFileNode node; + + BlockNumber blkno; /* block number to clean */ + uint16 nToPlaceholder; /* number of redirects to make placeholders */ + OffsetNumber firstPlaceholder; /* first placeholder tuple to remove */ + + /* offsets of redirect tuples to make placeholders follow */ +} spgxlogVacuumRedirect; + +/* + * The "flags" argument for SpGistGetBuffer should be either GBUF_LEAF to + * get a leaf page, or GBUF_INNER_PARITY(blockNumber) to get an inner + * page in the same triple-parity group as the specified block number. + * (Typically, this should be GBUF_INNER_PARITY(parentBlockNumber + 1) + * to follow the rule described in spgist/README.) + */ +#define GBUF_PARITY_MASK 0x03 +#define GBUF_LEAF 0x04 +#define GBUF_INNER_PARITY(x) ((x) % 3) + +/* spgutils.c */ +extern void initSpGistState(SpGistState *state, Relation index); +extern Buffer SpGistNewBuffer(Relation index); +extern void SpGistUpdateMetaPage(Relation index); +extern Buffer SpGistGetBuffer(Relation index, int flags, + int needSpace, bool *isNew); +extern void SpGistSetLastUsedPage(Relation index, Buffer buffer); +extern void SpGistInitPage(Page page, uint16 f); +extern void SpGistInitBuffer(Buffer b, uint16 f); +extern void SpGistInitMetapage(Page page); +extern unsigned int SpGistGetTypeSize(SpGistTypeDesc *att, Datum datum); +extern SpGistLeafTuple spgFormLeafTuple(SpGistState *state, + ItemPointer heapPtr, Datum datum); +extern SpGistNodeTuple spgFormNodeTuple(SpGistState *state, + Datum label, bool isnull); +extern SpGistInnerTuple spgFormInnerTuple(SpGistState *state, + bool hasPrefix, Datum prefix, + int nNodes, SpGistNodeTuple *nodes); +extern SpGistDeadTuple spgFormDeadTuple(SpGistState *state, int tupstate, + BlockNumber blkno, OffsetNumber offnum); +extern Datum *spgExtractNodeLabels(SpGistState *state, + SpGistInnerTuple innerTuple); +extern OffsetNumber SpGistPageAddNewItem(SpGistState *state, Page page, + Item item, Size size, + OffsetNumber *startOffset, + bool errorOK); + +/* spgdoinsert.c */ +extern void updateNodeLink(SpGistInnerTuple tup, int nodeN, + BlockNumber blkno, OffsetNumber offset); +extern void spgPageIndexMultiDelete(SpGistState *state, Page page, + OffsetNumber *itemnos, int nitems, + int firststate, int reststate, + BlockNumber blkno, OffsetNumber offnum); +extern void spgdoinsert(Relation index, SpGistState *state, + ItemPointer heapPtr, Datum datum); + +#endif /* SPGIST_PRIVATE_H */ |