aboutsummaryrefslogtreecommitdiff
path: root/src/backend/commands/cluster.c
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2007-04-08 01:26:33 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2007-04-08 01:26:33 +0000
commit7b78474da35604152805b2fdd939326e1bc364da (patch)
tree6d8d52106c1b10a596f761344b1cfa683a227b0e /src/backend/commands/cluster.c
parent2fca2c05e7d22dfa1dd2cacf048243adfdf519ce (diff)
downloadpostgresql-7b78474da35604152805b2fdd939326e1bc364da.tar.gz
postgresql-7b78474da35604152805b2fdd939326e1bc364da.zip
Make CLUSTER MVCC-safe. Heikki Linnakangas
Diffstat (limited to 'src/backend/commands/cluster.c')
-rw-r--r--src/backend/commands/cluster.c130
1 files changed, 93 insertions, 37 deletions
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index ac771b77a60..656d7d710f0 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.158 2007/03/29 00:15:37 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.159 2007/04/08 01:26:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -19,6 +19,7 @@
#include "access/genam.h"
#include "access/heapam.h"
+#include "access/rewriteheap.h"
#include "access/xact.h"
#include "catalog/catalog.h"
#include "catalog/dependency.h"
@@ -29,13 +30,14 @@
#include "catalog/toasting.h"
#include "commands/cluster.h"
#include "miscadmin.h"
+#include "storage/procarray.h"
#include "utils/acl.h"
#include "utils/fmgroids.h"
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
-#include "utils/syscache.h"
#include "utils/relcache.h"
+#include "utils/syscache.h"
/*
@@ -76,7 +78,7 @@ static List *get_tables_to_cluster(MemoryContext cluster_context);
*
* The single-relation case does not have any such overhead.
*
- * We also allow a relation being specified without index. In that case,
+ * We also allow a relation to be specified without index. In that case,
* the indisclustered bit will be looked up, and an ERROR will be thrown
* if there is no index with the bit set.
*---------------------------------------------------------------------------
@@ -650,11 +652,12 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
TupleDesc newTupDesc;
int natts;
Datum *values;
- char *nulls;
+ bool *isnull;
IndexScanDesc scan;
HeapTuple tuple;
- CommandId mycid = GetCurrentCommandId();
bool use_wal;
+ TransactionId OldestXmin;
+ RewriteState rwstate;
/*
* Open the relations we need.
@@ -671,84 +674,137 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex)
newTupDesc = RelationGetDescr(NewHeap);
Assert(newTupDesc->natts == oldTupDesc->natts);
- /* Preallocate values/nulls arrays */
+ /* Preallocate values/isnull arrays */
natts = newTupDesc->natts;
- values = (Datum *) palloc0(natts * sizeof(Datum));
- nulls = (char *) palloc(natts * sizeof(char));
- memset(nulls, 'n', natts * sizeof(char));
+ values = (Datum *) palloc(natts * sizeof(Datum));
+ isnull = (bool *) palloc(natts * sizeof(bool));
/*
* We need to log the copied data in WAL iff WAL archiving is enabled AND
- * it's not a temp rel. (Since we know the target relation is new and
- * can't have any FSM data, we can always tell heap_insert to ignore FSM,
- * even when using WAL.)
+ * it's not a temp rel.
*/
use_wal = XLogArchivingActive() && !NewHeap->rd_istemp;
/* use_wal off requires rd_targblock be initially invalid */
Assert(NewHeap->rd_targblock == InvalidBlockNumber);
+ /* Get the cutoff xmin we'll use to weed out dead tuples */
+ OldestXmin = GetOldestXmin(OldHeap->rd_rel->relisshared, true);
+
+ /* Initialize the rewrite operation */
+ rwstate = begin_heap_rewrite(NewHeap, OldestXmin, use_wal);
+
/*
- * Scan through the OldHeap on the OldIndex and copy each tuple into the
- * NewHeap.
+ * Scan through the OldHeap in OldIndex order and copy each tuple into the
+ * NewHeap. To ensure we see recently-dead tuples that still need to be
+ * copied, we scan with SnapshotAny and use HeapTupleSatisfiesVacuum
+ * for the visibility test.
*/
scan = index_beginscan(OldHeap, OldIndex,
- SnapshotNow, 0, (ScanKey) NULL);
+ SnapshotAny, 0, (ScanKey) NULL);
while ((tuple = index_getnext(scan, ForwardScanDirection)) != NULL)
{
+ HeapTuple copiedTuple;
+ bool isdead;
+ int i;
+
+ CHECK_FOR_INTERRUPTS();
+
+ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
+
+ switch (HeapTupleSatisfiesVacuum(tuple->t_data, OldestXmin,
+ scan->xs_cbuf))
+ {
+ case HEAPTUPLE_DEAD:
+ /* Definitely dead */
+ isdead = true;
+ break;
+ case HEAPTUPLE_LIVE:
+ case HEAPTUPLE_RECENTLY_DEAD:
+ /* Live or recently dead, must copy it */
+ isdead = false;
+ break;
+ case HEAPTUPLE_INSERT_IN_PROGRESS:
+ /*
+ * We should not see this unless it's been inserted earlier
+ * in our own transaction.
+ */
+ if (!TransactionIdIsCurrentTransactionId(
+ HeapTupleHeaderGetXmin(tuple->t_data)))
+ elog(ERROR, "concurrent insert in progress");
+ /* treat as live */
+ isdead = false;
+ break;
+ case HEAPTUPLE_DELETE_IN_PROGRESS:
+ /*
+ * We should not see this unless it's been deleted earlier
+ * in our own transaction.
+ */
+ Assert(!(tuple->t_data->t_infomask & HEAP_XMAX_IS_MULTI));
+ if (!TransactionIdIsCurrentTransactionId(
+ HeapTupleHeaderGetXmax(tuple->t_data)))
+ elog(ERROR, "concurrent delete in progress");
+ /* treat as recently dead */
+ isdead = false;
+ break;
+ default:
+ elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
+ isdead = false; /* keep compiler quiet */
+ break;
+ }
+
+ LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
+
+ if (isdead)
+ {
+ /* heap rewrite module still needs to see it... */
+ rewrite_heap_dead_tuple(rwstate, tuple);
+ continue;
+ }
+
/*
- * We cannot simply pass the tuple to heap_insert(), for several
- * reasons:
- *
- * 1. heap_insert() will overwrite the commit-status fields of the
- * tuple it's handed. This would trash the source relation, which is
- * bad news if we abort later on. (This was a bug in releases thru
- * 7.0)
+ * We cannot simply copy the tuple as-is, for several reasons:
*
- * 2. We'd like to squeeze out the values of any dropped columns, both
+ * 1. We'd like to squeeze out the values of any dropped columns, both
* to save space and to ensure we have no corner-case failures. (It's
* possible for example that the new table hasn't got a TOAST table
* and so is unable to store any large values of dropped cols.)
*
- * 3. The tuple might not even be legal for the new table; this is
+ * 2. The tuple might not even be legal for the new table; this is
* currently only known to happen as an after-effect of ALTER TABLE
* SET WITHOUT OIDS.
*
* So, we must reconstruct the tuple from component Datums.
*/
- HeapTuple copiedTuple;
- int i;
-
- heap_deformtuple(tuple, oldTupDesc, values, nulls);
+ heap_deform_tuple(tuple, oldTupDesc, values, isnull);
/* Be sure to null out any dropped columns */
for (i = 0; i < natts; i++)
{
if (newTupDesc->attrs[i]->attisdropped)
- nulls[i] = 'n';
+ isnull[i] = true;
}
- copiedTuple = heap_formtuple(newTupDesc, values, nulls);
+ copiedTuple = heap_form_tuple(newTupDesc, values, isnull);
/* Preserve OID, if any */
if (NewHeap->rd_rel->relhasoids)
HeapTupleSetOid(copiedTuple, HeapTupleGetOid(tuple));
- heap_insert(NewHeap, copiedTuple, mycid, use_wal, false);
+ /* The heap rewrite module does the rest */
+ rewrite_heap_tuple(rwstate, tuple, copiedTuple);
heap_freetuple(copiedTuple);
-
- CHECK_FOR_INTERRUPTS();
}
index_endscan(scan);
- pfree(values);
- pfree(nulls);
+ /* Write out any remaining tuples, and fsync if needed */
+ end_heap_rewrite(rwstate);
- if (!use_wal)
- heap_sync(NewHeap);
+ pfree(values);
+ pfree(isnull);
index_close(OldIndex, NoLock);
heap_close(OldHeap, NoLock);