aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/backend/access/hash/hash.c178
-rw-r--r--src/backend/access/hash/hashovfl.c15
-rw-r--r--src/backend/access/hash/hashpage.c16
-rw-r--r--src/backend/access/hash/hashsearch.c25
-rw-r--r--src/backend/access/hash/hashutil.c60
-rw-r--r--src/include/access/hash.h13
6 files changed, 231 insertions, 76 deletions
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c
index fbdf0dc04c8..0d2f8b61995 100644
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.65 2003/08/04 02:39:57 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.66 2003/09/02 02:18:38 tgl Exp $
*
* NOTES
* This file contains only the public interface routines.
@@ -449,40 +449,178 @@ hashbulkdelete(PG_FUNCTION_ARGS)
BlockNumber num_pages;
double tuples_removed;
double num_index_tuples;
- IndexScanDesc iscan;
+ uint32 deleted_tuples;
+ uint32 tuples_remaining;
+ uint32 orig_ntuples;
+ Bucket orig_maxbucket;
+ Bucket cur_maxbucket;
+ Bucket cur_bucket;
+ Buffer metabuf;
+ HashMetaPage metap;
+ HashMetaPageData local_metapage;
+ /*
+ * keep track of counts in both float form (to return) and integer form
+ * (to update hashm_ntuples). It'd be better to make hashm_ntuples a
+ * double, but that will have to wait for an initdb.
+ */
tuples_removed = 0;
num_index_tuples = 0;
+ deleted_tuples = 0;
+ tuples_remaining = 0;
/*
- * XXX generic implementation --- should be improved!
+ * Read the metapage to fetch original bucket and tuple counts. Also,
+ * we keep a copy of the last-seen metapage so that we can use its
+ * hashm_spares[] values to compute bucket page addresses. This is a
+ * bit hokey but perfectly safe, since the interesting entries in the
+ * spares array cannot change under us; and it beats rereading the
+ * metapage for each bucket.
*/
+ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ);
+ metap = (HashMetaPage) BufferGetPage(metabuf);
+ _hash_checkpage((Page) metap, LH_META_PAGE);
+ orig_maxbucket = metap->hashm_maxbucket;
+ orig_ntuples = metap->hashm_ntuples;
+ memcpy(&local_metapage, metap, sizeof(local_metapage));
+ _hash_relbuf(rel, metabuf, HASH_READ);
+
+ /* Scan the buckets that we know exist */
+ cur_bucket = 0;
+ cur_maxbucket = orig_maxbucket;
+
+loop_top:
+ while (cur_bucket <= cur_maxbucket)
+ {
+ BlockNumber bucket_blkno;
+ BlockNumber blkno;
+ bool bucket_dirty = false;
- /* walk through the entire index */
- iscan = index_beginscan(NULL, rel, SnapshotAny, 0, (ScanKey) NULL);
- /* including killed tuples */
- iscan->ignore_killed_tuples = false;
+ /* Get address of bucket's start page */
+ bucket_blkno = BUCKET_TO_BLKNO(&local_metapage, cur_bucket);
- while (index_getnext_indexitem(iscan, ForwardScanDirection))
- {
- if (callback(&iscan->xs_ctup.t_self, callback_state))
+ /* XXX lock bucket here */
+
+ /* Scan each page in bucket */
+ blkno = bucket_blkno;
+ while (BlockNumberIsValid(blkno))
{
- ItemPointerData indextup = iscan->currentItemData;
+ Buffer buf;
+ Page page;
+ HashPageOpaque opaque;
+ OffsetNumber offno;
+ OffsetNumber maxoffno;
+ bool page_dirty = false;
+
+ buf = _hash_getbuf(rel, blkno, HASH_WRITE);
+ page = BufferGetPage(buf);
+ _hash_checkpage(page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
+ opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+ Assert(opaque->hasho_bucket == cur_bucket);
+
+ /* Scan each tuple in page */
+ offno = FirstOffsetNumber;
+ maxoffno = PageGetMaxOffsetNumber(page);
+ while (offno <= maxoffno)
+ {
+ HashItem hitem;
+ ItemPointer htup;
+
+ hitem = (HashItem) PageGetItem(page,
+ PageGetItemId(page, offno));
+ htup = &(hitem->hash_itup.t_tid);
+ if (callback(htup, callback_state))
+ {
+ ItemPointerData indextup;
+
+ /* adjust any active scans that will be affected */
+ /* (this should be unnecessary) */
+ ItemPointerSet(&indextup, blkno, offno);
+ _hash_adjscans(rel, &indextup);
+
+ /* delete the item from the page */
+ PageIndexTupleDelete(page, offno);
+ bucket_dirty = page_dirty = true;
+
+ /* don't increment offno, instead decrement maxoffno */
+ maxoffno = OffsetNumberPrev(maxoffno);
+
+ tuples_removed += 1;
+ deleted_tuples += 1;
+ }
+ else
+ {
+ offno = OffsetNumberNext(offno);
+
+ num_index_tuples += 1;
+ tuples_remaining += 1;
+ }
+ }
- /* adjust any active scans that will be affected by deletion */
- /* (namely, my own scan) */
- _hash_adjscans(rel, &indextup);
+ /*
+ * Write or free page if needed, advance to next page. We want
+ * to preserve the invariant that overflow pages are nonempty.
+ */
+ blkno = opaque->hasho_nextblkno;
+
+ if (PageIsEmpty(page) && (opaque->hasho_flag & LH_OVERFLOW_PAGE))
+ _hash_freeovflpage(rel, buf);
+ else if (page_dirty)
+ _hash_wrtbuf(rel, buf);
+ else
+ _hash_relbuf(rel, buf, HASH_WRITE);
+ }
- /* delete the data from the page */
- _hash_pagedel(rel, &indextup);
+ /* If we deleted anything, try to compact free space */
+ if (bucket_dirty)
+ _hash_squeezebucket(rel, cur_bucket, bucket_blkno);
- tuples_removed += 1;
- }
+ /* XXX unlock bucket here */
+
+ /* Advance to next bucket */
+ cur_bucket++;
+ }
+
+ /* Write-lock metapage and check for split since we started */
+ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_WRITE);
+ metap = (HashMetaPage) BufferGetPage(metabuf);
+ _hash_checkpage((Page) metap, LH_META_PAGE);
+
+ if (cur_maxbucket != metap->hashm_maxbucket)
+ {
+ /* There's been a split, so process the additional bucket(s) */
+ cur_maxbucket = metap->hashm_maxbucket;
+ memcpy(&local_metapage, metap, sizeof(local_metapage));
+ _hash_relbuf(rel, metabuf, HASH_WRITE);
+ goto loop_top;
+ }
+
+ /* Okay, we're really done. Update tuple count in metapage. */
+
+ if (orig_maxbucket == metap->hashm_maxbucket &&
+ orig_ntuples == metap->hashm_ntuples)
+ {
+ /*
+ * No one has split or inserted anything since start of scan,
+ * so believe our count as gospel.
+ */
+ metap->hashm_ntuples = tuples_remaining;
+ }
+ else
+ {
+ /*
+ * Otherwise, our count is untrustworthy since we may have
+ * double-scanned tuples in split buckets. Proceed by
+ * dead-reckoning.
+ */
+ if (metap->hashm_ntuples > deleted_tuples)
+ metap->hashm_ntuples -= deleted_tuples;
else
- num_index_tuples += 1;
+ metap->hashm_ntuples = 0;
+ num_index_tuples = metap->hashm_ntuples;
}
- index_endscan(iscan);
+ _hash_wrtbuf(rel, metabuf);
/* return statistics */
num_pages = RelationGetNumberOfBlocks(rel);
diff --git a/src/backend/access/hash/hashovfl.c b/src/backend/access/hash/hashovfl.c
index aa74c547da4..f3f120e47fc 100644
--- a/src/backend/access/hash/hashovfl.c
+++ b/src/backend/access/hash/hashovfl.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.38 2003/09/01 20:26:34 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashovfl.c,v 1.39 2003/09/02 02:18:38 tgl Exp $
*
* NOTES
* Overflow pages look like ordinary relation pages.
@@ -444,11 +444,13 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
* first page in the bucket chain. The read page works backward and
* the write page works forward; the procedure terminates when the
* read page and write page are the same page.
+ *
+ * Caller must hold exclusive lock on the target bucket.
*/
void
_hash_squeezebucket(Relation rel,
- HashMetaPage metap,
- Bucket bucket)
+ Bucket bucket,
+ BlockNumber bucket_blkno)
{
Buffer wbuf;
Buffer rbuf = 0;
@@ -466,7 +468,7 @@ _hash_squeezebucket(Relation rel,
/*
* start squeezing into the base bucket page.
*/
- wblkno = BUCKET_TO_BLKNO(bucket);
+ wblkno = bucket_blkno;
wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE);
wpage = BufferGetPage(wbuf);
_hash_checkpage(wpage, LH_BUCKET_PAGE);
@@ -484,11 +486,6 @@ _hash_squeezebucket(Relation rel,
/*
* find the last page in the bucket chain by starting at the base
* bucket page and working forward.
- *
- * XXX if chains tend to be long, we should probably move forward using
- * HASH_READ and then _hash_chgbufaccess to HASH_WRITE when we reach
- * the end. if they are short we probably don't care very much. if
- * the hash function is working at all, they had better be short..
*/
ropaque = wopaque;
do
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c
index e5e77c94b6b..23d8e0bdf6c 100644
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.39 2003/09/01 20:26:34 tgl Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashpage.c,v 1.40 2003/09/02 02:18:38 tgl Exp $
*
* NOTES
* Postgres hash pages look like ordinary relation pages. The opaque
@@ -143,7 +143,7 @@ _hash_metapinit(Relation rel)
*/
for (i = 0; i <= 1; i++)
{
- buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(i), HASH_WRITE);
+ buf = _hash_getbuf(rel, BUCKET_TO_BLKNO(metap, i), HASH_WRITE);
pg = BufferGetPage(buf);
_hash_pageinit(pg, BufferGetPageSize(buf));
pageopaque = (HashPageOpaque) PageGetSpecialPointer(pg);
@@ -456,6 +456,8 @@ _hash_splitbucket(Relation rel,
Buffer ovflbuf;
BlockNumber oblkno;
BlockNumber nblkno;
+ BlockNumber start_oblkno;
+ BlockNumber start_nblkno;
bool null;
Datum datum;
HashItem hitem;
@@ -475,8 +477,10 @@ _hash_splitbucket(Relation rel,
_hash_checkpage((Page) metap, LH_META_PAGE);
/* get the buffers & pages */
- oblkno = BUCKET_TO_BLKNO(obucket);
- nblkno = BUCKET_TO_BLKNO(nbucket);
+ start_oblkno = BUCKET_TO_BLKNO(metap, obucket);
+ start_nblkno = BUCKET_TO_BLKNO(metap, nbucket);
+ oblkno = start_oblkno;
+ nblkno = start_nblkno;
obuf = _hash_getbuf(rel, oblkno, HASH_WRITE);
nbuf = _hash_getbuf(rel, nblkno, HASH_WRITE);
opage = BufferGetPage(obuf);
@@ -571,7 +575,7 @@ _hash_splitbucket(Relation rel,
*/
_hash_wrtbuf(rel, obuf);
_hash_wrtbuf(rel, nbuf);
- _hash_squeezebucket(rel, metap, obucket);
+ _hash_squeezebucket(rel, obucket, start_oblkno);
return;
}
}
@@ -639,7 +643,7 @@ _hash_splitbucket(Relation rel,
if (!BlockNumberIsValid(oblkno))
{
_hash_wrtbuf(rel, nbuf);
- _hash_squeezebucket(rel, metap, obucket);
+ _hash_squeezebucket(rel, obucket, start_oblkno);
return;
}
diff --git a/src/backend/access/hash/hashsearch.c b/src/backend/access/hash/hashsearch.c
index 53796c7e765..3237e7a8fd3 100644
--- a/src/backend/access/hash/hashsearch.c
+++ b/src/backend/access/hash/hashsearch.c
@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.31 2003/08/04 02:39:57 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashsearch.c,v 1.32 2003/09/02 02:18:38 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -19,8 +19,10 @@
/*
- * _hash_search() -- Finds the page/bucket that the contains the
- * scankey and loads it into *bufP. the buffer has a read lock.
+ * _hash_search() -- Find the bucket that contains the scankey
+ * and fetch its primary bucket page into *bufP.
+ *
+ * the buffer has a read lock.
*/
void
_hash_search(Relation rel,
@@ -30,22 +32,23 @@ _hash_search(Relation rel,
HashMetaPage metap)
{
BlockNumber blkno;
- Datum keyDatum;
Bucket bucket;
- if (scankey == (ScanKey) NULL ||
- (keyDatum = scankey[0].sk_argument) == (Datum) NULL)
+ if (scankey == NULL)
{
/*
- * If the scankey argument is NULL, all tuples will satisfy the
+ * If the scankey is empty, all tuples will satisfy the
* scan so we start the scan at the first bucket (bucket 0).
*/
bucket = 0;
}
else
- bucket = _hash_call(rel, metap, keyDatum);
+ {
+ Assert(!(scankey[0].sk_flags & SK_ISNULL));
+ bucket = _hash_call(rel, metap, scankey[0].sk_argument);
+ }
- blkno = BUCKET_TO_BLKNO(bucket);
+ blkno = BUCKET_TO_BLKNO(metap, bucket);
*bufP = _hash_getbuf(rel, blkno, HASH_READ);
}
@@ -330,7 +333,7 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir, Buffer metabuf)
if (allbuckets && bucket < metap->hashm_maxbucket)
{
++bucket;
- blkno = BUCKET_TO_BLKNO(bucket);
+ blkno = BUCKET_TO_BLKNO(metap, bucket);
buf = _hash_getbuf(rel, blkno, HASH_READ);
page = BufferGetPage(buf);
_hash_checkpage(page, LH_BUCKET_PAGE);
@@ -380,7 +383,7 @@ _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir, Buffer metabuf)
if (allbuckets && bucket > 0)
{
--bucket;
- blkno = BUCKET_TO_BLKNO(bucket);
+ blkno = BUCKET_TO_BLKNO(metap, bucket);
buf = _hash_getbuf(rel, blkno, HASH_READ);
page = BufferGetPage(buf);
_hash_checkpage(page, LH_BUCKET_PAGE);
diff --git a/src/backend/access/hash/hashutil.c b/src/backend/access/hash/hashutil.c
index 83126fccb42..76d9bc5f4ea 100644
--- a/src/backend/access/hash/hashutil.c
+++ b/src/backend/access/hash/hashutil.c
@@ -8,11 +8,10 @@
*
*
* IDENTIFICATION
- * $Header: /cvsroot/pgsql/src/backend/access/hash/hashutil.c,v 1.33 2003/08/04 02:39:57 momjian Exp $
+ * $Header: /cvsroot/pgsql/src/backend/access/hash/hashutil.c,v 1.34 2003/09/02 02:18:38 tgl Exp $
*
*-------------------------------------------------------------------------
*/
-
#include "postgres.h"
#include "access/genam.h"
@@ -20,20 +19,23 @@
#include "access/iqual.h"
+/*
+ * _hash_mkscankey -- build a scan key matching the given indextuple
+ *
+ * Note: this is prepared for multiple index columns, but very little
+ * else in access/hash is ...
+ */
ScanKey
_hash_mkscankey(Relation rel, IndexTuple itup)
{
ScanKey skey;
- TupleDesc itupdesc;
- int natts;
+ TupleDesc itupdesc = RelationGetDescr(rel);
+ int natts = rel->rd_rel->relnatts;
AttrNumber i;
Datum arg;
FmgrInfo *procinfo;
bool isnull;
- natts = rel->rd_rel->relnatts;
- itupdesc = RelationGetDescr(rel);
-
skey = (ScanKey) palloc(natts * sizeof(ScanKeyData));
for (i = 0; i < natts; i++)
@@ -41,7 +43,7 @@ _hash_mkscankey(Relation rel, IndexTuple itup)
arg = index_getattr(itup, i + 1, itupdesc, &isnull);
procinfo = index_getprocinfo(rel, i + 1, HASHPROC);
ScanKeyEntryInitializeWithInfo(&skey[i],
- 0x0,
+ isnull ? SK_ISNULL : 0x0,
(AttrNumber) (i + 1),
procinfo,
CurrentMemoryContext,
@@ -57,18 +59,19 @@ _hash_freeskey(ScanKey skey)
pfree(skey);
}
-
+/*
+ * _hash_checkqual -- does the index tuple satisfy the scan conditions?
+ */
bool
_hash_checkqual(IndexScanDesc scan, IndexTuple itup)
{
- if (scan->numberOfKeys > 0)
- return (index_keytest(itup,
- RelationGetDescr(scan->indexRelation),
- scan->numberOfKeys, scan->keyData));
- else
- return true;
+ return index_keytest(itup, RelationGetDescr(scan->indexRelation),
+ scan->numberOfKeys, scan->keyData);
}
+/*
+ * _hash_formitem -- construct a hash index entry
+ */
HashItem
_hash_formitem(IndexTuple itup)
{
@@ -82,17 +85,27 @@ _hash_formitem(IndexTuple itup)
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("hash indexes cannot include null keys")));
- /* make a copy of the index tuple with room for the sequence number */
+ /*
+ * make a copy of the index tuple (XXX do we still need to copy?)
+ *
+ * HashItemData used to have more fields than IndexTupleData, but no
+ * longer...
+ */
tuplen = IndexTupleSize(itup);
nbytes_hitem = tuplen +
(sizeof(HashItemData) - sizeof(IndexTupleData));
hitem = (HashItem) palloc(nbytes_hitem);
- memmove((char *) &(hitem->hash_itup), (char *) itup, tuplen);
+ memcpy((char *) &(hitem->hash_itup), (char *) itup, tuplen);
return hitem;
}
+/*
+ * _hash_call -- given a Datum, call the index's hash procedure
+ *
+ * Returns the bucket number that the hash key maps to.
+ */
Bucket
_hash_call(Relation rel, HashMetaPage metap, Datum key)
{
@@ -103,9 +116,11 @@ _hash_call(Relation rel, HashMetaPage metap, Datum key)
/* XXX assumes index has only one attribute */
procinfo = index_getprocinfo(rel, 1, HASHPROC);
n = DatumGetUInt32(FunctionCall1(procinfo, key));
+
bucket = n & metap->hashm_highmask;
if (bucket > metap->hashm_maxbucket)
bucket = bucket & metap->hashm_lowmask;
+
return bucket;
}
@@ -119,7 +134,7 @@ _hash_log2(uint32 num)
limit;
limit = 1;
- for (i = 0; limit < num; limit = limit << 1, i++)
+ for (i = 0; limit < num; limit <<= 1, i++)
;
return i;
}
@@ -130,20 +145,19 @@ _hash_log2(uint32 num)
void
_hash_checkpage(Page page, int flags)
{
- HashPageOpaque opaque;
-
+#ifdef USE_ASSERT_CHECKING
Assert(page);
Assert(((PageHeader) (page))->pd_lower >= SizeOfPageHeaderData);
-#if 1
Assert(((PageHeader) (page))->pd_upper <=
(BLCKSZ - MAXALIGN(sizeof(HashPageOpaqueData))));
Assert(((PageHeader) (page))->pd_special ==
(BLCKSZ - MAXALIGN(sizeof(HashPageOpaqueData))));
Assert(PageGetPageSize(page) == BLCKSZ);
-#endif
if (flags)
{
- opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+ HashPageOpaque opaque = (HashPageOpaque) PageGetSpecialPointer(page);
+
Assert(opaque->hasho_flag & flags);
}
+#endif /* USE_ASSERT_CHECKING */
}
diff --git a/src/include/access/hash.h b/src/include/access/hash.h
index 83aae20c1ca..045fb40c40a 100644
--- a/src/include/access/hash.h
+++ b/src/include/access/hash.h
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $Id: hash.h,v 1.50 2003/09/01 20:26:34 tgl Exp $
+ * $Id: hash.h,v 1.51 2003/09/02 02:18:38 tgl Exp $
*
* NOTES
* modeled after Margo Seltzer's hash implementation for unix.
@@ -25,13 +25,12 @@
/*
* Mapping from hash bucket number to physical block number of bucket's
- * starting page. Beware of multiple evaluations of argument! Also notice
- * macro's implicit dependency on "metap".
+ * starting page. Beware of multiple evaluations of argument!
*/
typedef uint32 Bucket;
-#define BUCKET_TO_BLKNO(B) \
- ((BlockNumber) ((B) + ((B) ? metap->hashm_spares[_hash_log2((B)+1)-1] : 0)) + 1)
+#define BUCKET_TO_BLKNO(metap,B) \
+ ((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_log2((B)+1)-1] : 0)) + 1)
/*
* Special space for hash index pages.
@@ -243,8 +242,8 @@ extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf);
extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf);
extern void _hash_initbitmap(Relation rel, HashMetaPage metap,
BlockNumber blkno);
-extern void _hash_squeezebucket(Relation rel, HashMetaPage metap,
- Bucket bucket);
+extern void _hash_squeezebucket(Relation rel,
+ Bucket bucket, BlockNumber bucket_blkno);
/* hashpage.c */
extern void _hash_metapinit(Relation rel);