aboutsummaryrefslogtreecommitdiff
path: root/src/backend/commands
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/commands')
-rw-r--r--src/backend/commands/async.c8
-rw-r--r--src/backend/commands/trigger.c32
-rw-r--r--src/backend/commands/vacuum.c223
3 files changed, 161 insertions, 102 deletions
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
index 142b02dfaf8..e2c6203891d 100644
--- a/src/backend/commands/async.c
+++ b/src/backend/commands/async.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/async.c,v 1.123 2005/06/17 22:32:43 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/async.c,v 1.124 2005/08/20 00:39:53 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -550,8 +550,9 @@ AtCommit_Notify(void)
}
else if (listener->notification == 0)
{
- ItemPointerData ctid;
HTSU_Result result;
+ ItemPointerData update_ctid;
+ TransactionId update_xmax;
rTuple = heap_modifytuple(lTuple, tdesc,
value, nulls, repl);
@@ -573,7 +574,7 @@ AtCommit_Notify(void)
* heap_update calls.
*/
result = heap_update(lRel, &lTuple->t_self, rTuple,
- &ctid,
+ &update_ctid, &update_xmax,
GetCurrentCommandId(), InvalidSnapshot,
false /* no wait for commit */ );
switch (result)
@@ -585,7 +586,6 @@ AtCommit_Notify(void)
case HeapTupleMayBeUpdated:
/* done successfully */
-
#ifdef NOT_USED /* currently there are no indexes */
CatalogUpdateIndexes(lRel, rTuple);
#endif
diff --git a/src/backend/commands/trigger.c b/src/backend/commands/trigger.c
index 4ea973ae7fa..562f676f4b8 100644
--- a/src/backend/commands/trigger.c
+++ b/src/backend/commands/trigger.c
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.191 2005/08/12 01:35:57 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.192 2005/08/20 00:39:54 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1592,14 +1592,18 @@ GetTupleForTrigger(EState *estate, ResultRelInfo *relinfo,
if (newSlot != NULL)
{
HTSU_Result test;
+ ItemPointerData update_ctid;
+ TransactionId update_xmax;
+
+ *newSlot = NULL;
/*
* lock tuple for update
*/
- *newSlot = NULL;
- tuple.t_self = *tid;
ltrmark:;
- test = heap_lock_tuple(relation, &tuple, &buffer, cid,
+ tuple.t_self = *tid;
+ test = heap_lock_tuple(relation, &tuple, &buffer,
+ &update_ctid, &update_xmax, cid,
LockTupleExclusive, false);
switch (test)
{
@@ -1617,15 +1621,18 @@ ltrmark:;
ereport(ERROR,
(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
errmsg("could not serialize access due to concurrent update")));
- else if (!(ItemPointerEquals(&(tuple.t_self), tid)))
+ else if (!ItemPointerEquals(&update_ctid, &tuple.t_self))
{
- TupleTableSlot *epqslot = EvalPlanQual(estate,
- relinfo->ri_RangeTableIndex,
- &(tuple.t_self));
-
- if (!(TupIsNull(epqslot)))
+ /* it was updated, so look at the updated version */
+ TupleTableSlot *epqslot;
+
+ epqslot = EvalPlanQual(estate,
+ relinfo->ri_RangeTableIndex,
+ &update_ctid,
+ update_xmax);
+ if (!TupIsNull(epqslot))
{
- *tid = tuple.t_self;
+ *tid = update_ctid;
*newSlot = epqslot;
goto ltrmark;
}
@@ -1639,7 +1646,7 @@ ltrmark:;
default:
ReleaseBuffer(buffer);
- elog(ERROR, "invalid heap_lock_tuple status: %d", test);
+ elog(ERROR, "unrecognized heap_lock_tuple status: %u", test);
return NULL; /* keep compiler quiet */
}
}
@@ -1659,6 +1666,7 @@ ltrmark:;
tuple.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
tuple.t_len = ItemIdGetLength(lp);
tuple.t_self = *tid;
+ tuple.t_tableOid = RelationGetRelid(relation);
}
result = heap_copytuple(&tuple);
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 9db91209448..ef199c5f073 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -13,7 +13,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.312 2005/07/29 19:30:03 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.313 2005/08/20 00:39:54 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -51,6 +51,10 @@
#include "pgstat.h"
+/*
+ * VacPage structures keep track of each page on which we find useful
+ * amounts of free space.
+ */
typedef struct VacPageData
{
BlockNumber blkno; /* BlockNumber of this Page */
@@ -73,30 +77,54 @@ typedef struct VacPageListData
typedef VacPageListData *VacPageList;
+/*
+ * The "vtlinks" array keeps information about each recently-updated tuple
+ * ("recent" meaning its XMAX is too new to let us recycle the tuple).
+ * We store the tuple's own TID as well as its t_ctid (its link to the next
+ * newer tuple version). Searching in this array allows us to follow update
+ * chains backwards from newer to older tuples. When we move a member of an
+ * update chain, we must move *all* the live members of the chain, so that we
+ * can maintain their t_ctid link relationships (we must not just overwrite
+ * t_ctid in an existing tuple).
+ *
+ * Note: because t_ctid links can be stale (this would only occur if a prior
+ * VACUUM crashed partway through), it is possible that new_tid points to an
+ * empty slot or unrelated tuple. We have to check the linkage as we follow
+ * it, just as is done in EvalPlanQual.
+ */
typedef struct VTupleLinkData
{
- ItemPointerData new_tid;
- ItemPointerData this_tid;
+ ItemPointerData new_tid; /* t_ctid of an updated tuple */
+ ItemPointerData this_tid; /* t_self of the tuple */
} VTupleLinkData;
typedef VTupleLinkData *VTupleLink;
+/*
+ * We use an array of VTupleMoveData to plan a chain tuple move fully
+ * before we do it.
+ */
typedef struct VTupleMoveData
{
ItemPointerData tid; /* tuple ID */
- VacPage vacpage; /* where to move */
- bool cleanVpd; /* clean vacpage before using */
+ VacPage vacpage; /* where to move it to */
+ bool cleanVpd; /* clean vacpage before using? */
} VTupleMoveData;
typedef VTupleMoveData *VTupleMove;
+/*
+ * VRelStats contains the data acquired by scan_heap for use later
+ */
typedef struct VRelStats
{
+ /* miscellaneous statistics */
BlockNumber rel_pages;
double rel_tuples;
Size min_tlen;
Size max_tlen;
bool hasindex;
+ /* vtlinks array for tuple chain following - sorted by new_tid */
int num_vtlinks;
VTupleLink vtlinks;
} VRelStats;
@@ -117,6 +145,7 @@ typedef struct ExecContextData
EState *estate;
TupleTableSlot *slot;
} ExecContextData;
+
typedef ExecContextData *ExecContext;
static void
@@ -1802,18 +1831,25 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
}
/*
- * If this tuple is in the chain of tuples created in updates
- * by "recent" transactions then we have to move all chain of
- * tuples to another places.
+ * If this tuple is in a chain of tuples created in updates
+ * by "recent" transactions then we have to move the whole chain
+ * of tuples to other places, so that we can write new t_ctid
+ * links that preserve the chain relationship.
+ *
+ * This test is complicated. Read it as "if tuple is a recently
+ * created updated version, OR if it is an obsoleted version".
+ * (In the second half of the test, we needn't make any check
+ * on XMAX --- it must be recently obsoleted, else scan_heap
+ * would have deemed it removable.)
*
* NOTE: this test is not 100% accurate: it is possible for a
* tuple to be an updated one with recent xmin, and yet not
- * have a corresponding tuple in the vtlinks list. Presumably
+ * match any new_tid entry in the vtlinks list. Presumably
* there was once a parent tuple with xmax matching the xmin,
* but it's possible that that tuple has been removed --- for
- * example, if it had xmin = xmax then
- * HeapTupleSatisfiesVacuum would deem it removable as soon as
- * the xmin xact completes.
+ * example, if it had xmin = xmax and wasn't itself an updated
+ * version, then HeapTupleSatisfiesVacuum would deem it removable
+ * as soon as the xmin xact completes.
*
* To be on the safe side, we abandon the repair_frag process if
* we cannot find the parent tuple in vtlinks. This may be
@@ -1854,72 +1890,85 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
break; /* out of walk-along-page loop */
}
- vtmove = (VTupleMove) palloc(100 * sizeof(VTupleMoveData));
- num_vtmove = 0;
- free_vtmove = 100;
-
/*
* If this tuple is in the begin/middle of the chain then
- * we have to move to the end of chain.
+ * we have to move to the end of chain. As with any
+ * t_ctid chase, we have to verify that each new tuple
+ * is really the descendant of the tuple we came from.
*/
while (!(tp.t_data->t_infomask & (HEAP_XMAX_INVALID |
HEAP_IS_LOCKED)) &&
!(ItemPointerEquals(&(tp.t_self),
&(tp.t_data->t_ctid))))
{
- Page Cpage;
- ItemId Citemid;
- ItemPointerData Ctid;
-
- Ctid = tp.t_data->t_ctid;
- if (freeCbuf)
- ReleaseBuffer(Cbuf);
- freeCbuf = true;
- Cbuf = ReadBuffer(onerel,
- ItemPointerGetBlockNumber(&Ctid));
- Cpage = BufferGetPage(Cbuf);
- Citemid = PageGetItemId(Cpage,
- ItemPointerGetOffsetNumber(&Ctid));
- if (!ItemIdIsUsed(Citemid))
+ ItemPointerData nextTid;
+ TransactionId priorXmax;
+ Buffer nextBuf;
+ Page nextPage;
+ OffsetNumber nextOffnum;
+ ItemId nextItemid;
+ HeapTupleHeader nextTdata;
+
+ nextTid = tp.t_data->t_ctid;
+ priorXmax = HeapTupleHeaderGetXmax(tp.t_data);
+ /* assume block# is OK (see heap_fetch comments) */
+ nextBuf = ReadBuffer(onerel,
+ ItemPointerGetBlockNumber(&nextTid));
+ nextPage = BufferGetPage(nextBuf);
+ /* If bogus or unused slot, assume tp is end of chain */
+ nextOffnum = ItemPointerGetOffsetNumber(&nextTid);
+ if (nextOffnum < FirstOffsetNumber ||
+ nextOffnum > PageGetMaxOffsetNumber(nextPage))
{
- /*
- * This means that in the middle of chain there
- * was tuple updated by older (than OldestXmin)
- * xaction and this tuple is already deleted by
- * me. Actually, upper part of chain should be
- * removed and seems that this should be handled
- * in scan_heap(), but it's not implemented at the
- * moment and so we just stop shrinking here.
- */
- elog(DEBUG2, "child itemid in update-chain marked as unused --- can't continue repair_frag");
- chain_move_failed = true;
- break; /* out of loop to move to chain end */
+ ReleaseBuffer(nextBuf);
+ break;
+ }
+ nextItemid = PageGetItemId(nextPage, nextOffnum);
+ if (!ItemIdIsUsed(nextItemid))
+ {
+ ReleaseBuffer(nextBuf);
+ break;
}
+ /* if not matching XMIN, assume tp is end of chain */
+ nextTdata = (HeapTupleHeader) PageGetItem(nextPage,
+ nextItemid);
+ if (!TransactionIdEquals(HeapTupleHeaderGetXmin(nextTdata),
+ priorXmax))
+ {
+ ReleaseBuffer(nextBuf);
+ break;
+ }
+ /* OK, switch our attention to the next tuple in chain */
tp.t_datamcxt = NULL;
- tp.t_data = (HeapTupleHeader) PageGetItem(Cpage, Citemid);
- tp.t_self = Ctid;
- tlen = tp.t_len = ItemIdGetLength(Citemid);
- }
- if (chain_move_failed)
- {
+ tp.t_data = nextTdata;
+ tp.t_self = nextTid;
+ tlen = tp.t_len = ItemIdGetLength(nextItemid);
if (freeCbuf)
ReleaseBuffer(Cbuf);
- pfree(vtmove);
- break; /* out of walk-along-page loop */
+ Cbuf = nextBuf;
+ freeCbuf = true;
}
+ /* Set up workspace for planning the chain move */
+ vtmove = (VTupleMove) palloc(100 * sizeof(VTupleMoveData));
+ num_vtmove = 0;
+ free_vtmove = 100;
+
/*
- * Check if all items in chain can be moved
+ * Now, walk backwards up the chain (towards older tuples)
+ * and check if all items in chain can be moved. We record
+ * all the moves that need to be made in the vtmove array.
*/
for (;;)
{
Buffer Pbuf;
Page Ppage;
ItemId Pitemid;
- HeapTupleData Ptp;
+ HeapTupleHeader PTdata;
VTupleLinkData vtld,
*vtlp;
+ /* Identify a target page to move this tuple to */
if (to_vacpage == NULL ||
!enough_space(to_vacpage, tlen))
{
@@ -1942,6 +1991,8 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
if (to_vacpage->offsets_used >= to_vacpage->offsets_free)
to_vacpage->free -= sizeof(ItemIdData);
(to_vacpage->offsets_used)++;
+
+ /* Add an entry to vtmove list */
if (free_vtmove == 0)
{
free_vtmove = 1000;
@@ -1959,13 +2010,13 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
free_vtmove--;
num_vtmove++;
- /* At beginning of chain? */
+ /* Done if at beginning of chain */
if (!(tp.t_data->t_infomask & HEAP_UPDATED) ||
TransactionIdPrecedes(HeapTupleHeaderGetXmin(tp.t_data),
OldestXmin))
- break;
+ break; /* out of check-all-items loop */
- /* No, move to tuple with prior row version */
+ /* Move to tuple with prior row version */
vtld.new_tid = tp.t_self;
vtlp = (VTupleLink)
vac_bsearch((void *) &vtld,
@@ -1989,18 +2040,17 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
/* this can't happen since we saw tuple earlier: */
if (!ItemIdIsUsed(Pitemid))
elog(ERROR, "parent itemid marked as unused");
- Ptp.t_datamcxt = NULL;
- Ptp.t_data = (HeapTupleHeader) PageGetItem(Ppage, Pitemid);
+ PTdata = (HeapTupleHeader) PageGetItem(Ppage, Pitemid);
/* ctid should not have changed since we saved it */
Assert(ItemPointerEquals(&(vtld.new_tid),
- &(Ptp.t_data->t_ctid)));
+ &(PTdata->t_ctid)));
/*
- * Read above about cases when !ItemIdIsUsed(Citemid)
+ * Read above about cases when !ItemIdIsUsed(nextItemid)
* (child item is removed)... Due to the fact that at
* the moment we don't remove unuseful part of
- * update-chain, it's possible to get too old parent
+ * update-chain, it's possible to get non-matching parent
* row here. Like as in the case which caused this
* problem, we stop shrinking here. I could try to
* find real parent row but want not to do it because
@@ -2008,8 +2058,8 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
* and we are too close to 6.5 release. - vadim
* 06/11/99
*/
- if (Ptp.t_data->t_infomask & HEAP_XMAX_IS_MULTI ||
- !(TransactionIdEquals(HeapTupleHeaderGetXmax(Ptp.t_data),
+ if ((PTdata->t_infomask & HEAP_XMAX_IS_MULTI) ||
+ !(TransactionIdEquals(HeapTupleHeaderGetXmax(PTdata),
HeapTupleHeaderGetXmin(tp.t_data))))
{
ReleaseBuffer(Pbuf);
@@ -2017,8 +2067,8 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
chain_move_failed = true;
break; /* out of check-all-items loop */
}
- tp.t_datamcxt = Ptp.t_datamcxt;
- tp.t_data = Ptp.t_data;
+ tp.t_datamcxt = NULL;
+ tp.t_data = PTdata;
tlen = tp.t_len = ItemIdGetLength(Pitemid);
if (freeCbuf)
ReleaseBuffer(Cbuf);
@@ -2047,7 +2097,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
}
/*
- * Okay, move the whole tuple chain
+ * Okay, move the whole tuple chain in reverse order.
+ *
+ * Ctid tracks the new location of the previously-moved tuple.
*/
ItemPointerSetInvalid(&Ctid);
for (ti = 0; ti < num_vtmove; ti++)
@@ -2077,10 +2129,6 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
tuple.t_data = (HeapTupleHeader) PageGetItem(Cpage, Citemid);
tuple_len = tuple.t_len = ItemIdGetLength(Citemid);
- /*
- * make a copy of the source tuple, and then mark the
- * source tuple MOVED_OFF.
- */
move_chain_tuple(onerel, Cbuf, Cpage, &tuple,
dst_buffer, dst_page, destvacpage,
&ec, &Ctid, vtmove[ti].cleanVpd);
@@ -2143,7 +2191,6 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
move_plain_tuple(onerel, buf, page, &tuple,
dst_buffer, dst_page, dst_vacpage, &ec);
-
num_moved++;
if (dst_vacpage->blkno > last_move_dest_block)
last_move_dest_block = dst_vacpage->blkno;
@@ -2474,6 +2521,9 @@ move_chain_tuple(Relation rel,
ItemId newitemid;
Size tuple_len = old_tup->t_len;
+ /*
+ * make a modifiable copy of the source tuple.
+ */
heap_copytuple_with_tuple(old_tup, &newtup);
/*
@@ -2484,6 +2534,9 @@ move_chain_tuple(Relation rel,
/* NO EREPORT(ERROR) TILL CHANGES ARE LOGGED */
START_CRIT_SECTION();
+ /*
+ * mark the source tuple MOVED_OFF.
+ */
old_tup->t_data->t_infomask &= ~(HEAP_XMIN_COMMITTED |
HEAP_XMIN_INVALID |
HEAP_MOVED_IN);
@@ -2529,16 +2582,27 @@ move_chain_tuple(Relation rel,
newoff = PageAddItem(dst_page, (Item) newtup.t_data, tuple_len,
InvalidOffsetNumber, LP_USED);
if (newoff == InvalidOffsetNumber)
- {
elog(PANIC, "failed to add item with len = %lu to page %u while moving tuple chain",
(unsigned long) tuple_len, dst_vacpage->blkno);
- }
newitemid = PageGetItemId(dst_page, newoff);
+ /* drop temporary copy, and point to the version on the dest page */
pfree(newtup.t_data);
newtup.t_datamcxt = NULL;
newtup.t_data = (HeapTupleHeader) PageGetItem(dst_page, newitemid);
+
ItemPointerSet(&(newtup.t_self), dst_vacpage->blkno, newoff);
+ /*
+ * Set new tuple's t_ctid pointing to itself if last tuple in chain,
+ * and to next tuple in chain otherwise. (Since we move the chain
+ * in reverse order, this is actually the previously processed tuple.)
+ */
+ if (!ItemPointerIsValid(ctid))
+ newtup.t_data->t_ctid = newtup.t_self;
+ else
+ newtup.t_data->t_ctid = *ctid;
+ *ctid = newtup.t_self;
+
/* XLOG stuff */
if (!rel->rd_istemp)
{
@@ -2563,17 +2627,6 @@ move_chain_tuple(Relation rel,
END_CRIT_SECTION();
- /*
- * Set new tuple's t_ctid pointing to itself for last tuple in chain,
- * and to next tuple in chain otherwise.
- */
- /* Is this ok after log_heap_move() and END_CRIT_SECTION()? */
- if (!ItemPointerIsValid(ctid))
- newtup.t_data->t_ctid = newtup.t_self;
- else
- newtup.t_data->t_ctid = *ctid;
- *ctid = newtup.t_self;
-
LockBuffer(dst_buf, BUFFER_LOCK_UNLOCK);
if (dst_buf != old_buf)
LockBuffer(old_buf, BUFFER_LOCK_UNLOCK);
@@ -2638,12 +2691,10 @@ move_plain_tuple(Relation rel,
newoff = PageAddItem(dst_page, (Item) newtup.t_data, tuple_len,
InvalidOffsetNumber, LP_USED);
if (newoff == InvalidOffsetNumber)
- {
elog(PANIC, "failed to add item with len = %lu to page %u (free space %lu, nusd %u, noff %u)",
(unsigned long) tuple_len,
dst_vacpage->blkno, (unsigned long) dst_vacpage->free,
dst_vacpage->offsets_used, dst_vacpage->offsets_free);
- }
newitemid = PageGetItemId(dst_page, newoff);
pfree(newtup.t_data);
newtup.t_datamcxt = NULL;