diff options
Diffstat (limited to 'src/backend/access/hash/hashinsert.c')
-rw-r--r-- | src/backend/access/hash/hashinsert.c | 239 |
1 files changed, 239 insertions, 0 deletions
diff --git a/src/backend/access/hash/hashinsert.c b/src/backend/access/hash/hashinsert.c new file mode 100644 index 00000000000..c514cc614d8 --- /dev/null +++ b/src/backend/access/hash/hashinsert.c @@ -0,0 +1,239 @@ +/*------------------------------------------------------------------------- + * + * hashinsert.c-- + * Item insertion in hash tables for Postgres. + * + * Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * $Header: /cvsroot/pgsql/src/backend/access/hash/hashinsert.c,v 1.1.1.1 1996/07/09 06:21:10 scrappy Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "storage/bufmgr.h" +#include "storage/bufpage.h" + +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/rel.h" +#include "utils/excid.h" + +#include "access/heapam.h" +#include "access/genam.h" +#include "access/hash.h" + +static InsertIndexResult _hash_insertonpg(Relation rel, Buffer buf, int keysz, ScanKey scankey, HashItem hitem, Buffer metabuf); +static OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, int keysz, ScanKey itup_scankey, Size itemsize, HashItem hitem); + +/* + * _hash_doinsert() -- Handle insertion of a single HashItem in the table. + * + * This routine is called by the public interface routines, hashbuild + * and hashinsert. By here, hashitem is filled in, and has a unique + * (xid, seqno) pair. The datum to be used as a "key" is in the + * hashitem. + */ +InsertIndexResult +_hash_doinsert(Relation rel, HashItem hitem) +{ + Buffer buf; + Buffer metabuf; + BlockNumber blkno; + HashMetaPage metap; + IndexTuple itup; + InsertIndexResult res; + ScanKey itup_scankey; + int natts; + Page page; + + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + /* we need a scan key to do our search, so build one */ + itup = &(hitem->hash_itup); + if ((natts = rel->rd_rel->relnatts) != 1) + elog(WARN, "Hash indices valid for only one index key."); + itup_scankey = _hash_mkscankey(rel, itup, metap); + + /* + * find the first page in the bucket chain containing this key and + * place it in buf. _hash_search obtains a read lock for us. + */ + _hash_search(rel, natts, itup_scankey, &buf, metap); + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE); + + /* + * trade in our read lock for a write lock so that we can do the + * insertion. + */ + blkno = BufferGetBlockNumber(buf); + _hash_relbuf(rel, buf, HASH_READ); + buf = _hash_getbuf(rel, blkno, HASH_WRITE); + + + /* + * XXX btree comment (haven't decided what to do in hash): don't + * think the bucket can be split while we're reading the metapage. + * + * If the page was split between the time that we surrendered our + * read lock and acquired our write lock, then this page may no + * longer be the right place for the key we want to insert. + */ + + /* do the insertion */ + res = _hash_insertonpg(rel, buf, natts, itup_scankey, + hitem, metabuf); + + /* be tidy */ + _hash_freeskey(itup_scankey); + + return (res); +} + +/* + * _hash_insertonpg() -- Insert a tuple on a particular page in the table. + * + * This recursive procedure does the following things: + * + * + if necessary, splits the target page. + * + inserts the tuple. + * + * On entry, we must have the right buffer on which to do the + * insertion, and the buffer must be pinned and locked. On return, + * we will have dropped both the pin and the write lock on the buffer. + * + */ +static InsertIndexResult +_hash_insertonpg(Relation rel, + Buffer buf, + int keysz, + ScanKey scankey, + HashItem hitem, + Buffer metabuf) +{ + InsertIndexResult res; + Page page; + BlockNumber itup_blkno; + OffsetNumber itup_off; + int itemsz; + HashPageOpaque pageopaque; + bool do_expand = false; + Buffer ovflbuf; + HashMetaPage metap; + Bucket bucket; + + metap = (HashMetaPage) BufferGetPage(metabuf); + _hash_checkpage((Page) metap, LH_META_PAGE); + + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); + pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); + bucket = pageopaque->hasho_bucket; + + itemsz = IndexTupleDSize(hitem->hash_itup) + + (sizeof(HashItemData) - sizeof(IndexTupleData)); + itemsz = DOUBLEALIGN(itemsz); + + while (PageGetFreeSpace(page) < itemsz) { + /* + * no space on this page; check for an overflow page + */ + if (BlockNumberIsValid(pageopaque->hasho_nextblkno)) { + /* + * ovfl page exists; go get it. if it doesn't have room, + * we'll find out next pass through the loop test above. + */ + ovflbuf = _hash_getbuf(rel, pageopaque->hasho_nextblkno, + HASH_WRITE); + _hash_relbuf(rel, buf, HASH_WRITE); + buf = ovflbuf; + page = BufferGetPage(buf); + } else { + /* + * we're at the end of the bucket chain and we haven't + * found a page with enough room. allocate a new overflow + * page. + */ + do_expand = true; + ovflbuf = _hash_addovflpage(rel, &metabuf, buf); + _hash_relbuf(rel, buf, HASH_WRITE); + buf = ovflbuf; + page = BufferGetPage(buf); + + if (PageGetFreeSpace(page) < itemsz) { + /* it doesn't fit on an empty page -- give up */ + elog(WARN, "hash item too large"); + } + } + _hash_checkpage(page, LH_OVERFLOW_PAGE); + pageopaque = (HashPageOpaque) PageGetSpecialPointer(page); + Assert(pageopaque->hasho_bucket == bucket); + } + + itup_off = _hash_pgaddtup(rel, buf, keysz, scankey, itemsz, hitem); + itup_blkno = BufferGetBlockNumber(buf); + + /* by here, the new tuple is inserted */ + res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData)); + + ItemPointerSet(&(res->pointerData), itup_blkno, itup_off); + + if (res != NULL) { + /* + * Increment the number of keys in the table. + * We switch lock access type just for a moment + * to allow greater accessibility to the metapage. + */ + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, + HASH_READ, HASH_WRITE); + metap->hashm_nkeys += 1; + metap = (HashMetaPage) _hash_chgbufaccess(rel, &metabuf, + HASH_WRITE, HASH_READ); + + } + + _hash_wrtbuf(rel, buf); + + if (do_expand || + (metap->hashm_nkeys / (metap->hashm_maxbucket + 1)) + > metap->hashm_ffactor) { + _hash_expandtable(rel, metabuf); + } + _hash_relbuf(rel, metabuf, HASH_READ); + return (res); +} + +/* + * _hash_pgaddtup() -- add a tuple to a particular page in the index. + * + * This routine adds the tuple to the page as requested, and keeps the + * write lock and reference associated with the page's buffer. It is + * an error to call pgaddtup() without a write lock and reference. + */ +static OffsetNumber +_hash_pgaddtup(Relation rel, + Buffer buf, + int keysz, + ScanKey itup_scankey, + Size itemsize, + HashItem hitem) +{ + OffsetNumber itup_off; + Page page; + + page = BufferGetPage(buf); + _hash_checkpage(page, LH_BUCKET_PAGE|LH_OVERFLOW_PAGE); + + itup_off = OffsetNumberNext(PageGetMaxOffsetNumber(page)); + (void) PageAddItem(page, (Item) hitem, itemsize, itup_off, LP_USED); + + /* write the buffer, but hold our lock */ + _hash_wrtnorelbuf(rel, buf); + + return (itup_off); +} |