diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2007-04-08 01:26:33 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2007-04-08 01:26:33 +0000 |
commit | 7b78474da35604152805b2fdd939326e1bc364da (patch) | |
tree | 6d8d52106c1b10a596f761344b1cfa683a227b0e /src/backend/commands/cluster.c | |
parent | 2fca2c05e7d22dfa1dd2cacf048243adfdf519ce (diff) | |
download | postgresql-7b78474da35604152805b2fdd939326e1bc364da.tar.gz postgresql-7b78474da35604152805b2fdd939326e1bc364da.zip |
Make CLUSTER MVCC-safe. Heikki Linnakangas
Diffstat (limited to 'src/backend/commands/cluster.c')
-rw-r--r-- | src/backend/commands/cluster.c | 130 |
1 files changed, 93 insertions, 37 deletions
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index ac771b77a60..656d7d710f0 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.158 2007/03/29 00:15:37 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.159 2007/04/08 01:26:28 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,6 +19,7 @@ #include "access/genam.h" #include "access/heapam.h" +#include "access/rewriteheap.h" #include "access/xact.h" #include "catalog/catalog.h" #include "catalog/dependency.h" @@ -29,13 +30,14 @@ #include "catalog/toasting.h" #include "commands/cluster.h" #include "miscadmin.h" +#include "storage/procarray.h" #include "utils/acl.h" #include "utils/fmgroids.h" #include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/memutils.h" -#include "utils/syscache.h" #include "utils/relcache.h" +#include "utils/syscache.h" /* @@ -76,7 +78,7 @@ static List *get_tables_to_cluster(MemoryContext cluster_context); * * The single-relation case does not have any such overhead. * - * We also allow a relation being specified without index. In that case, + * We also allow a relation to be specified without index. In that case, * the indisclustered bit will be looked up, and an ERROR will be thrown * if there is no index with the bit set. *--------------------------------------------------------------------------- @@ -650,11 +652,12 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex) TupleDesc newTupDesc; int natts; Datum *values; - char *nulls; + bool *isnull; IndexScanDesc scan; HeapTuple tuple; - CommandId mycid = GetCurrentCommandId(); bool use_wal; + TransactionId OldestXmin; + RewriteState rwstate; /* * Open the relations we need. @@ -671,84 +674,137 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex) newTupDesc = RelationGetDescr(NewHeap); Assert(newTupDesc->natts == oldTupDesc->natts); - /* Preallocate values/nulls arrays */ + /* Preallocate values/isnull arrays */ natts = newTupDesc->natts; - values = (Datum *) palloc0(natts * sizeof(Datum)); - nulls = (char *) palloc(natts * sizeof(char)); - memset(nulls, 'n', natts * sizeof(char)); + values = (Datum *) palloc(natts * sizeof(Datum)); + isnull = (bool *) palloc(natts * sizeof(bool)); /* * We need to log the copied data in WAL iff WAL archiving is enabled AND - * it's not a temp rel. (Since we know the target relation is new and - * can't have any FSM data, we can always tell heap_insert to ignore FSM, - * even when using WAL.) + * it's not a temp rel. */ use_wal = XLogArchivingActive() && !NewHeap->rd_istemp; /* use_wal off requires rd_targblock be initially invalid */ Assert(NewHeap->rd_targblock == InvalidBlockNumber); + /* Get the cutoff xmin we'll use to weed out dead tuples */ + OldestXmin = GetOldestXmin(OldHeap->rd_rel->relisshared, true); + + /* Initialize the rewrite operation */ + rwstate = begin_heap_rewrite(NewHeap, OldestXmin, use_wal); + /* - * Scan through the OldHeap on the OldIndex and copy each tuple into the - * NewHeap. + * Scan through the OldHeap in OldIndex order and copy each tuple into the + * NewHeap. To ensure we see recently-dead tuples that still need to be + * copied, we scan with SnapshotAny and use HeapTupleSatisfiesVacuum + * for the visibility test. */ scan = index_beginscan(OldHeap, OldIndex, - SnapshotNow, 0, (ScanKey) NULL); + SnapshotAny, 0, (ScanKey) NULL); while ((tuple = index_getnext(scan, ForwardScanDirection)) != NULL) { + HeapTuple copiedTuple; + bool isdead; + int i; + + CHECK_FOR_INTERRUPTS(); + + LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); + + switch (HeapTupleSatisfiesVacuum(tuple->t_data, OldestXmin, + scan->xs_cbuf)) + { + case HEAPTUPLE_DEAD: + /* Definitely dead */ + isdead = true; + break; + case HEAPTUPLE_LIVE: + case HEAPTUPLE_RECENTLY_DEAD: + /* Live or recently dead, must copy it */ + isdead = false; + break; + case HEAPTUPLE_INSERT_IN_PROGRESS: + /* + * We should not see this unless it's been inserted earlier + * in our own transaction. + */ + if (!TransactionIdIsCurrentTransactionId( + HeapTupleHeaderGetXmin(tuple->t_data))) + elog(ERROR, "concurrent insert in progress"); + /* treat as live */ + isdead = false; + break; + case HEAPTUPLE_DELETE_IN_PROGRESS: + /* + * We should not see this unless it's been deleted earlier + * in our own transaction. + */ + Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI)); + if (!TransactionIdIsCurrentTransactionId( + HeapTupleHeaderGetXmax(tuple->t_data))) + elog(ERROR, "concurrent delete in progress"); + /* treat as recently dead */ + isdead = false; + break; + default: + elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); + isdead = false; /* keep compiler quiet */ + break; + } + + LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); + + if (isdead) + { + /* heap rewrite module still needs to see it... */ + rewrite_heap_dead_tuple(rwstate, tuple); + continue; + } + /* - * We cannot simply pass the tuple to heap_insert(), for several - * reasons: - * - * 1. heap_insert() will overwrite the commit-status fields of the - * tuple it's handed. This would trash the source relation, which is - * bad news if we abort later on. (This was a bug in releases thru - * 7.0) + * We cannot simply copy the tuple as-is, for several reasons: * - * 2. We'd like to squeeze out the values of any dropped columns, both + * 1. We'd like to squeeze out the values of any dropped columns, both * to save space and to ensure we have no corner-case failures. (It's * possible for example that the new table hasn't got a TOAST table * and so is unable to store any large values of dropped cols.) * - * 3. The tuple might not even be legal for the new table; this is + * 2. The tuple might not even be legal for the new table; this is * currently only known to happen as an after-effect of ALTER TABLE * SET WITHOUT OIDS. * * So, we must reconstruct the tuple from component Datums. */ - HeapTuple copiedTuple; - int i; - - heap_deformtuple(tuple, oldTupDesc, values, nulls); + heap_deform_tuple(tuple, oldTupDesc, values, isnull); /* Be sure to null out any dropped columns */ for (i = 0; i < natts; i++) { if (newTupDesc->attrs[i]->attisdropped) - nulls[i] = 'n'; + isnull[i] = true; } - copiedTuple = heap_formtuple(newTupDesc, values, nulls); + copiedTuple = heap_form_tuple(newTupDesc, values, isnull); /* Preserve OID, if any */ if (NewHeap->rd_rel->relhasoids) HeapTupleSetOid(copiedTuple, HeapTupleGetOid(tuple)); - heap_insert(NewHeap, copiedTuple, mycid, use_wal, false); + /* The heap rewrite module does the rest */ + rewrite_heap_tuple(rwstate, tuple, copiedTuple); heap_freetuple(copiedTuple); - - CHECK_FOR_INTERRUPTS(); } index_endscan(scan); - pfree(values); - pfree(nulls); + /* Write out any remaining tuples, and fsync if needed */ + end_heap_rewrite(rwstate); - if (!use_wal) - heap_sync(NewHeap); + pfree(values); + pfree(isnull); index_close(OldIndex, NoLock); heap_close(OldHeap, NoLock); |