diff options
Diffstat (limited to 'contrib/pageinspect/btreefuncs.c')
-rw-r--r-- | contrib/pageinspect/btreefuncs.c | 499 |
1 files changed, 499 insertions, 0 deletions
diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c new file mode 100644 index 00000000000..9b392d047bb --- /dev/null +++ b/contrib/pageinspect/btreefuncs.c @@ -0,0 +1,499 @@ +/* + * btreefuncs.c + * + * Copyright (c) 2006 Satoshi Nagayasu <nagayasus@nttdata.co.jp> + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose, without fee, and without a + * written agreement is hereby granted, provided that the above + * copyright notice and this paragraph and the following two + * paragraphs appear in all copies. + * + * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, + * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING + * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS + * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS + * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, + * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "postgres.h" + +#include "fmgr.h" +#include "funcapi.h" +#include "access/heapam.h" +#include "access/itup.h" +#include "access/nbtree.h" +#include "access/transam.h" +#include "catalog/namespace.h" +#include "catalog/pg_type.h" +#include "utils/builtins.h" +#include "utils/inval.h" + +PG_FUNCTION_INFO_V1(bt_metap); +PG_FUNCTION_INFO_V1(bt_page_items); +PG_FUNCTION_INFO_V1(bt_page_stats); + +extern Datum bt_metap(PG_FUNCTION_ARGS); +extern Datum bt_page_items(PG_FUNCTION_ARGS); +extern Datum bt_page_stats(PG_FUNCTION_ARGS); + +#define BTMETAP_TYPE "public.bt_metap_type" +#define BTMETAP_NCOLUMNS 6 + +#define BTPAGEITEMS_TYPE "public.bt_page_items_type" +#define BTPAGEITEMS_NCOLUMNS 6 + +#define BTPAGESTATS_TYPE "public.bt_page_stats_type" +#define BTPAGESTATS_NCOLUMNS 11 + + +#define IS_INDEX(r) ((r)->rd_rel->relkind == 'i') +#define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID) + +#define CHECK_PAGE_OFFSET_RANGE(page, offset) { \ + if ( !(FirstOffsetNumber<=(offset) && \ + (offset)<=PageGetMaxOffsetNumber(page)) ) \ + elog(ERROR, "Page offset number out of range."); } + +#define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \ + if ( (blkno)<0 && RelationGetNumberOfBlocks((rel))<=(blkno) ) \ + elog(ERROR, "Block number out of range."); } + +/* ------------------------------------------------ + * structure for single btree page statistics + * ------------------------------------------------ + */ +typedef struct BTPageStat +{ + uint32 blkno; + uint32 live_items; + uint32 dead_items; + uint32 page_size; + uint32 max_avail; + uint32 free_size; + uint32 avg_item_size; + char type; + + /* opaque data */ + BlockNumber btpo_prev; + BlockNumber btpo_next; + union + { + uint32 level; + TransactionId xact; + } btpo; + uint16 btpo_flags; + BTCycleId btpo_cycleid; +} BTPageStat; + +/* ------------------------------------------------ + * A structure for a whole btree index statistics + * used by pgstatindex(). + * ------------------------------------------------ + */ +typedef struct BTIndexStat +{ + uint32 magic; + uint32 version; + BlockNumber root_blkno; + uint32 level; + + BlockNumber fastroot; + uint32 fastlevel; + + uint32 live_items; + uint32 dead_items; + + uint32 root_pages; + uint32 internal_pages; + uint32 leaf_pages; + uint32 empty_pages; + uint32 deleted_pages; + + uint32 page_size; + uint32 avg_item_size; + + uint32 max_avail; + uint32 free_space; +} BTIndexStat; + + +/* ------------------------------------------------- + * GetBTPageStatistics() + * + * Collect statistics of single b-tree leaf page + * ------------------------------------------------- + */ +static void +GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat * stat) +{ + Page page = BufferGetPage(buffer); + PageHeader phdr = (PageHeader) page; + OffsetNumber maxoff = PageGetMaxOffsetNumber(page); + BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page); + int item_size = 0; + int off; + + stat->blkno = blkno; + + stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData); + + stat->dead_items = stat->live_items = 0; + + stat->page_size = PageGetPageSize(page); + + /* page type (flags) */ + if (P_ISDELETED(opaque)) + { + stat->type = 'd'; + stat->btpo.xact = opaque->btpo.xact; + return; + } + else if (P_IGNORE(opaque)) + stat->type = 'e'; + else if (P_ISLEAF(opaque)) + stat->type = 'l'; + else if (P_ISROOT(opaque)) + stat->type = 'r'; + else + stat->type = 'i'; + + /* btpage opaque data */ + stat->btpo_prev = opaque->btpo_prev; + stat->btpo_next = opaque->btpo_next; + stat->btpo.level = opaque->btpo.level; + stat->btpo_flags = opaque->btpo_flags; + stat->btpo_cycleid = opaque->btpo_cycleid; + + /* count live and dead tuples, and free space */ + for (off = FirstOffsetNumber; off <= maxoff; off++) + { + IndexTuple itup; + + ItemId id = PageGetItemId(page, off); + + itup = (IndexTuple) PageGetItem(page, id); + + item_size += IndexTupleSize(itup); + + if (!ItemIdDeleted(id)) + stat->live_items++; + else + stat->dead_items++; + } + stat->free_size = PageGetFreeSpace(page); + + if ((stat->live_items + stat->dead_items) > 0) + stat->avg_item_size = item_size / (stat->live_items + stat->dead_items); + else + stat->avg_item_size = 0; +} + +/* ----------------------------------------------- + * bt_page() + * + * Usage: SELECT * FROM bt_page('t1_pkey', 0); + * ----------------------------------------------- + */ +Datum +bt_page_stats(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_P(0); + uint32 blkno = PG_GETARG_UINT32(1); + Buffer buffer; + + Relation rel; + RangeVar *relrv; + Datum result; + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + CHECK_RELATION_BLOCK_RANGE(rel, blkno); + + buffer = ReadBuffer(rel, blkno); + + if (!IS_INDEX(rel) || !IS_BTREE(rel)) + elog(ERROR, "bt_page_stats() can be used only on b-tree index."); + + if (blkno == 0) + elog(ERROR, "Block 0 is a meta page."); + + { + HeapTuple tuple; + TupleDesc tupleDesc; + int j; + char *values[BTPAGESTATS_NCOLUMNS]; + + BTPageStat stat; + + /* keep compiler quiet */ + stat.btpo_prev = stat.btpo_next = InvalidBlockNumber; + stat.btpo_flags = stat.free_size = stat.avg_item_size = 0; + + GetBTPageStatistics(blkno, buffer, &stat); + + tupleDesc = RelationNameGetTupleDesc(BTPAGESTATS_TYPE); + + j = 0; + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.blkno); + + values[j] = palloc(32); + snprintf(values[j++], 32, "%c", stat.type); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.live_items); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.dead_items); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.avg_item_size); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.page_size); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.free_size); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.btpo_prev); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.btpo_next); + + values[j] = palloc(32); + if (stat.type == 'd') + snprintf(values[j++], 32, "%d", stat.btpo.xact); + else + snprintf(values[j++], 32, "%d", stat.btpo.level); + + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", stat.btpo_flags); + + tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), + values); + + result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple); + } + + ReleaseBuffer(buffer); + + relation_close(rel, AccessShareLock); + + PG_RETURN_DATUM(result); +} + +/*------------------------------------------------------- + * bt_page_items() + * + * Get IndexTupleData set in a leaf page + * + * Usage: SELECT * FROM bt_page_items('t1_pkey', 0); + *------------------------------------------------------- + */ +/* --------------------------------------------------- + * data structure for SRF to hold a scan information + * --------------------------------------------------- + */ +struct user_args +{ + TupleDesc tupd; + Relation rel; + Buffer buffer; + Page page; + uint16 offset; +}; + +Datum +bt_page_items(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_P(0); + uint32 blkno = PG_GETARG_UINT32(1); + + RangeVar *relrv; + Datum result; + char *values[BTPAGEITEMS_NCOLUMNS]; + BTPageOpaque opaque; + HeapTuple tuple; + ItemId id; + + FuncCallContext *fctx; + MemoryContext mctx; + struct user_args *uargs = NULL; + + if (blkno == 0) + elog(ERROR, "Block 0 is a meta page."); + + if (SRF_IS_FIRSTCALL()) + { + fctx = SRF_FIRSTCALL_INIT(); + mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); + + uargs = palloc(sizeof(struct user_args)); + + uargs->tupd = RelationNameGetTupleDesc(BTPAGEITEMS_TYPE); + uargs->offset = FirstOffsetNumber; + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + uargs->rel = relation_openrv(relrv, AccessShareLock); + + CHECK_RELATION_BLOCK_RANGE(uargs->rel, blkno); + + uargs->buffer = ReadBuffer(uargs->rel, blkno); + + if (!IS_INDEX(uargs->rel) || !IS_BTREE(uargs->rel)) + elog(ERROR, "bt_page_items() can be used only on b-tree index."); + + uargs->page = BufferGetPage(uargs->buffer); + + opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page); + + if (P_ISDELETED(opaque)) + elog(NOTICE, "bt_page_items(): this page is deleted."); + + fctx->max_calls = PageGetMaxOffsetNumber(uargs->page); + fctx->user_fctx = uargs; + + MemoryContextSwitchTo(mctx); + } + + fctx = SRF_PERCALL_SETUP(); + uargs = fctx->user_fctx; + + if (fctx->call_cntr < fctx->max_calls) + { + IndexTuple itup; + + id = PageGetItemId(uargs->page, uargs->offset); + + if (!ItemIdIsValid(id)) + elog(ERROR, "Invalid ItemId."); + + itup = (IndexTuple) PageGetItem(uargs->page, id); + + { + int j = 0; + + BlockNumber blkno = BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)); + + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", uargs->offset); + values[j] = palloc(32); + snprintf(values[j++], 32, "(%u,%u)", blkno, itup->t_tid.ip_posid); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", (int) IndexTupleSize(itup)); + values[j] = palloc(32); + snprintf(values[j++], 32, "%c", IndexTupleHasNulls(itup) ? 't' : 'f'); + values[j] = palloc(32); + snprintf(values[j++], 32, "%c", IndexTupleHasVarwidths(itup) ? 't' : 'f'); + + { + int off; + char *dump; + char *ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info); + + dump = palloc(IndexTupleSize(itup) * 3); + memset(dump, 0, IndexTupleSize(itup) * 3); + + for (off = 0; + off < IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info); + off++) + { + if (dump[0] == '\0') + sprintf(dump, "%02x", *(ptr + off) & 0xff); + else + { + char buf[4]; + + sprintf(buf, " %02x", *(ptr + off) & 0xff); + strcat(dump, buf); + } + } + values[j] = dump; + } + + tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(uargs->tupd), values); + result = TupleGetDatum(TupleDescGetSlot(uargs->tupd), tuple); + } + + uargs->offset = uargs->offset + 1; + + SRF_RETURN_NEXT(fctx, result); + } + else + { + ReleaseBuffer(uargs->buffer); + relation_close(uargs->rel, AccessShareLock); + + SRF_RETURN_DONE(fctx); + } +} + + +/* ------------------------------------------------ + * bt_metap() + * + * Get a btree meta-page information + * + * Usage: SELECT * FROM bt_metap('t1_pkey') + * ------------------------------------------------ + */ +Datum +bt_metap(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_P(0); + Buffer buffer; + + Relation rel; + RangeVar *relrv; + Datum result; + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + if (!IS_INDEX(rel) || !IS_BTREE(rel)) + elog(ERROR, "bt_metap() can be used only on b-tree index."); + + buffer = ReadBuffer(rel, 0); + + { + BTMetaPageData *metad; + + TupleDesc tupleDesc; + int j; + char *values[BTMETAP_NCOLUMNS]; + HeapTuple tuple; + + Page page = BufferGetPage(buffer); + + metad = BTPageGetMeta(page); + + tupleDesc = RelationNameGetTupleDesc(BTMETAP_TYPE); + + j = 0; + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_magic); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_version); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_root); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_level); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_fastroot); + values[j] = palloc(32); + snprintf(values[j++], 32, "%d", metad->btm_fastlevel); + + tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), + values); + + result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple); + } + + ReleaseBuffer(buffer); + + relation_close(rel, AccessShareLock); + + PG_RETURN_DATUM(result); +} |