diff options
Diffstat (limited to 'src/backend/access/nbtree/nbtpage.c')
-rw-r--r-- | src/backend/access/nbtree/nbtpage.c | 194 |
1 files changed, 94 insertions, 100 deletions
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c index 52d60abaec0..927860030c8 100644 --- a/src/backend/access/nbtree/nbtpage.c +++ b/src/backend/access/nbtree/nbtpage.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.87 2005/08/12 14:34:14 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.88 2005/10/15 02:49:09 momjian Exp $ * * NOTES * Postgres btree pages look like ordinary relation pages. The opaque @@ -115,8 +115,8 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level) metaopaque->btpo_flags = BTP_META; /* - * Set pd_lower just past the end of the metadata. This is not - * essential but it makes the page look compressible to xlog.c. + * Set pd_lower just past the end of the metadata. This is not essential + * but it makes the page look compressible to xlog.c. */ ((PageHeader) page)->pd_lower = ((char *) metad + sizeof(BTMetaPageData)) - (char *) page; @@ -198,26 +198,26 @@ _bt_getroot(Relation rel, int access) LockBuffer(metabuf, BT_WRITE); /* - * Race condition: if someone else initialized the metadata - * between the time we released the read lock and acquired the - * write lock, we must avoid doing it again. + * Race condition: if someone else initialized the metadata between + * the time we released the read lock and acquired the write lock, we + * must avoid doing it again. */ if (metad->btm_root != P_NONE) { /* - * Metadata initialized by someone else. In order to - * guarantee no deadlocks, we have to release the metadata - * page and start all over again. (Is that really true? But - * it's hardly worth trying to optimize this case.) + * Metadata initialized by someone else. In order to guarantee no + * deadlocks, we have to release the metadata page and start all + * over again. (Is that really true? But it's hardly worth trying + * to optimize this case.) */ _bt_relbuf(rel, metabuf); return _bt_getroot(rel, access); } /* - * Get, initialize, write, and leave a lock of the appropriate - * type on the new root page. Since this is the first page in the - * tree, it's a leaf as well as the root. + * Get, initialize, write, and leave a lock of the appropriate type on + * the new root page. Since this is the first page in the tree, it's + * a leaf as well as the root. */ rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); rootblkno = BufferGetBlockNumber(rootbuf); @@ -266,9 +266,9 @@ _bt_getroot(Relation rel, int access) _bt_wrtnorelbuf(rel, rootbuf); /* - * swap root write lock for read lock. There is no danger of - * anyone else accessing the new root page while it's unlocked, - * since no one else knows where it is yet. + * swap root write lock for read lock. There is no danger of anyone + * else accessing the new root page while it's unlocked, since no one + * else knows where it is yet. */ LockBuffer(rootbuf, BUFFER_LOCK_UNLOCK); LockBuffer(rootbuf, BT_READ); @@ -312,8 +312,8 @@ _bt_getroot(Relation rel, int access) } /* - * By here, we have a pin and read lock on the root page, and no lock - * set on the metadata page. Return the root page's buffer. + * By here, we have a pin and read lock on the root page, and no lock set + * on the metadata page. Return the root page's buffer. */ return rootbuf; } @@ -435,27 +435,26 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) /* * First see if the FSM knows of any free pages. * - * We can't trust the FSM's report unreservedly; we have to check - * that the page is still free. (For example, an already-free - * page could have been re-used between the time the last VACUUM - * scanned it and the time the VACUUM made its FSM updates.) + * We can't trust the FSM's report unreservedly; we have to check that + * the page is still free. (For example, an already-free page could + * have been re-used between the time the last VACUUM scanned it and + * the time the VACUUM made its FSM updates.) * - * In fact, it's worse than that: we can't even assume that it's safe - * to take a lock on the reported page. If somebody else has a - * lock on it, or even worse our own caller does, we could - * deadlock. (The own-caller scenario is actually not improbable. - * Consider an index on a serial or timestamp column. Nearly all - * splits will be at the rightmost page, so it's entirely likely - * that _bt_split will call us while holding a lock on the page - * most recently acquired from FSM. A VACUUM running concurrently - * with the previous split could well have placed that page back - * in FSM.) + * In fact, it's worse than that: we can't even assume that it's safe to + * take a lock on the reported page. If somebody else has a lock on + * it, or even worse our own caller does, we could deadlock. (The + * own-caller scenario is actually not improbable. Consider an index + * on a serial or timestamp column. Nearly all splits will be at the + * rightmost page, so it's entirely likely that _bt_split will call us + * while holding a lock on the page most recently acquired from FSM. + * A VACUUM running concurrently with the previous split could well + * have placed that page back in FSM.) * - * To get around that, we ask for only a conditional lock on the - * reported page. If we fail, then someone else is using the - * page, and we may reasonably assume it's not free. (If we - * happen to be wrong, the worst consequence is the page will be - * lost to use till the next VACUUM, which is no big problem.) + * To get around that, we ask for only a conditional lock on the reported + * page. If we fail, then someone else is using the page, and we may + * reasonably assume it's not free. (If we happen to be wrong, the + * worst consequence is the page will be lost to use till the next + * VACUUM, which is no big problem.) */ for (;;) { @@ -486,10 +485,10 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) /* * Extend the relation by one page. * - * We have to use a lock to ensure no one else is extending the rel - * at the same time, else we will both try to initialize the same - * new page. We can skip locking for new or temp relations, - * however, since no one else could be accessing them. + * We have to use a lock to ensure no one else is extending the rel at + * the same time, else we will both try to initialize the same new + * page. We can skip locking for new or temp relations, however, + * since no one else could be accessing them. */ needLock = !RELATION_IS_LOCAL(rel); @@ -504,8 +503,8 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access) /* * Release the file-extension lock; it's now OK for someone else to * extend the relation some more. Note that we cannot release this - * lock before we have buffer lock on the new page, or we risk a - * race condition against btvacuumcleanup --- see comments therein. + * lock before we have buffer lock on the new page, or we risk a race + * condition against btvacuumcleanup --- see comments therein. */ if (needLock) UnlockRelationForExtension(rel, ExclusiveLock); @@ -614,10 +613,10 @@ _bt_page_recyclable(Page page) BTPageOpaque opaque; /* - * It's possible to find an all-zeroes page in an index --- for - * example, a backend might successfully extend the relation one page - * and then crash before it is able to make a WAL entry for adding the - * page. If we find a zeroed page then reclaim it. + * It's possible to find an all-zeroes page in an index --- for example, a + * backend might successfully extend the relation one page and then crash + * before it is able to make a WAL entry for adding the page. If we find a + * zeroed page then reclaim it. */ if (PageIsNew(page)) return true; @@ -672,9 +671,9 @@ _bt_delitems(Relation rel, Buffer buf, rdata[0].next = &(rdata[1]); /* - * The target-offsets array is not in the buffer, but pretend that - * it is. When XLogInsert stores the whole buffer, the offsets - * array need not be stored too. + * The target-offsets array is not in the buffer, but pretend that it + * is. When XLogInsert stores the whole buffer, the offsets array + * need not be stored too. */ if (nitems > 0) { @@ -747,8 +746,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) BTPageOpaque opaque; /* - * We can never delete rightmost pages nor root pages. While at it, - * check that page is not already deleted and is empty. + * We can never delete rightmost pages nor root pages. While at it, check + * that page is not already deleted and is empty. */ page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -760,8 +759,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) } /* - * Save info about page, including a copy of its high key (it must - * have one, being non-rightmost). + * Save info about page, including a copy of its high key (it must have + * one, being non-rightmost). */ target = BufferGetBlockNumber(buf); targetlevel = opaque->btpo.level; @@ -770,11 +769,11 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) targetkey = CopyBTItem((BTItem) PageGetItem(page, itemid)); /* - * We need to get an approximate pointer to the page's parent page. - * Use the standard search mechanism to search for the page's high - * key; this will give us a link to either the current parent or - * someplace to its left (if there are multiple equal high keys). To - * avoid deadlocks, we'd better drop the target page lock first. + * We need to get an approximate pointer to the page's parent page. Use + * the standard search mechanism to search for the page's high key; this + * will give us a link to either the current parent or someplace to its + * left (if there are multiple equal high keys). To avoid deadlocks, we'd + * better drop the target page lock first. */ _bt_relbuf(rel, buf); /* we need a scan key to do our search, so build one */ @@ -786,9 +785,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) _bt_relbuf(rel, lbuf); /* - * If we are trying to delete an interior page, _bt_search did more - * than we needed. Locate the stack item pointing to our parent - * level. + * If we are trying to delete an interior page, _bt_search did more than + * we needed. Locate the stack item pointing to our parent level. */ ilevel = 0; for (;;) @@ -803,16 +801,15 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) /* * We have to lock the pages we need to modify in the standard order: - * moving right, then up. Else we will deadlock against other - * writers. + * moving right, then up. Else we will deadlock against other writers. * - * So, we need to find and write-lock the current left sibling of the - * target page. The sibling that was current a moment ago could have - * split, so we may have to move right. This search could fail if - * either the sibling or the target page was deleted by someone else - * meanwhile; if so, give up. (Right now, that should never happen, - * since page deletion is only done in VACUUM and there shouldn't be - * multiple VACUUMs concurrently on the same table.) + * So, we need to find and write-lock the current left sibling of the target + * page. The sibling that was current a moment ago could have split, so + * we may have to move right. This search could fail if either the + * sibling or the target page was deleted by someone else meanwhile; if + * so, give up. (Right now, that should never happen, since page deletion + * is only done in VACUUM and there shouldn't be multiple VACUUMs + * concurrently on the same table.) */ if (leftsib != P_NONE) { @@ -839,19 +836,18 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) lbuf = InvalidBuffer; /* - * Next write-lock the target page itself. It should be okay to take - * just a write lock not a superexclusive lock, since no scans would - * stop on an empty page. + * Next write-lock the target page itself. It should be okay to take just + * a write lock not a superexclusive lock, since no scans would stop on an + * empty page. */ buf = _bt_getbuf(rel, target, BT_WRITE); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); /* - * Check page is still empty etc, else abandon deletion. The empty - * check is necessary since someone else might have inserted into it - * while we didn't have it locked; the others are just for paranoia's - * sake. + * Check page is still empty etc, else abandon deletion. The empty check + * is necessary since someone else might have inserted into it while we + * didn't have it locked; the others are just for paranoia's sake. */ if (P_RIGHTMOST(opaque) || P_ISROOT(opaque) || P_ISDELETED(opaque) || P_FIRSTDATAKEY(opaque) <= PageGetMaxOffsetNumber(page)) @@ -872,9 +868,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) rbuf = _bt_getbuf(rel, rightsib, BT_WRITE); /* - * Next find and write-lock the current parent of the target page. - * This is essentially the same as the corresponding step of - * splitting. + * Next find and write-lock the current parent of the target page. This is + * essentially the same as the corresponding step of splitting. */ ItemPointerSet(&(stack->bts_btitem.bti_itup.t_tid), target, P_HIKEY); @@ -887,8 +882,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) /* * If the target is the rightmost child of its parent, then we can't - * delete, unless it's also the only child --- in which case the - * parent changes to half-dead status. + * delete, unless it's also the only child --- in which case the parent + * changes to half-dead status. */ page = BufferGetPage(pbuf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -917,11 +912,10 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) } /* - * If we are deleting the next-to-last page on the target's level, - * then the rightsib is a candidate to become the new fast root. (In - * theory, it might be possible to push the fast root even further - * down, but the odds of doing so are slim, and the locking - * considerations daunting.) + * If we are deleting the next-to-last page on the target's level, then + * the rightsib is a candidate to become the new fast root. (In theory, it + * might be possible to push the fast root even further down, but the odds + * of doing so are slim, and the locking considerations daunting.) * * We can safely acquire a lock on the metapage here --- see comments for * _bt_newroot(). @@ -939,9 +933,9 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) metad = BTPageGetMeta(metapg); /* - * The expected case here is btm_fastlevel == targetlevel+1; - * if the fastlevel is <= targetlevel, something is wrong, and - * we choose to overwrite it to fix it. + * The expected case here is btm_fastlevel == targetlevel+1; if + * the fastlevel is <= targetlevel, something is wrong, and we + * choose to overwrite it to fix it. */ if (metad->btm_fastlevel > targetlevel + 1) { @@ -961,9 +955,9 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) /* * Update parent. The normal case is a tad tricky because we want to - * delete the target's downlink and the *following* key. Easiest way - * is to copy the right sibling's downlink over the target downlink, - * and then delete the following item. + * delete the target's downlink and the *following* key. Easiest way is + * to copy the right sibling's downlink over the target downlink, and then + * delete the following item. */ page = BufferGetPage(pbuf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); @@ -992,8 +986,8 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) } /* - * Update siblings' side-links. Note the target page's side-links - * will continue to point to the siblings. + * Update siblings' side-links. Note the target page's side-links will + * continue to point to the siblings. */ if (BufferIsValid(lbuf)) { @@ -1123,10 +1117,10 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) _bt_wrtbuf(rel, lbuf); /* - * If parent became half dead, recurse to try to delete it. Otherwise, - * if right sibling is empty and is now the last child of the parent, - * recurse to try to delete it. (These cases cannot apply at the same - * time, though the second case might itself recurse to the first.) + * If parent became half dead, recurse to try to delete it. Otherwise, if + * right sibling is empty and is now the last child of the parent, recurse + * to try to delete it. (These cases cannot apply at the same time, + * though the second case might itself recurse to the first.) */ if (parent_half_dead) { |