aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/transam/clog.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/transam/clog.c')
-rw-r--r--src/backend/access/transam/clog.c229
1 files changed, 214 insertions, 15 deletions
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index f30ef3a2262..fcff3ea3cfd 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -26,7 +26,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.47 2008/08/01 13:16:08 alvherre Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.48 2008/10/20 19:18:18 alvherre Exp $
*
*-------------------------------------------------------------------------
*/
@@ -80,32 +80,182 @@ static int ZeroCLOGPage(int pageno, bool writeXlog);
static bool CLOGPagePrecedes(int page1, int page2);
static void WriteZeroPageXlogRec(int pageno);
static void WriteTruncateXlogRec(int pageno);
+static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
+ TransactionId *subxids, XidStatus status,
+ XLogRecPtr lsn, int pageno);
+static void TransactionIdSetStatusBit(TransactionId xid, XidStatus status,
+ XLogRecPtr lsn, int slotno);
+static void set_status_by_pages(int nsubxids, TransactionId *subxids,
+ XidStatus status, XLogRecPtr lsn);
/*
- * Record the final state of a transaction in the commit log.
+ * TransactionIdSetTreeStatus
+ *
+ * Record the final state of transaction entries in the commit log for
+ * a transaction and its subtransaction tree. Take care to ensure this is
+ * efficient, and as atomic as possible.
+ *
+ * xid is a single xid to set status for. This will typically be
+ * the top level transactionid for a top level commit or abort. It can
+ * also be a subtransaction when we record transaction aborts.
+ *
+ * subxids is an array of xids of length nsubxids, representing subtransactions
+ * in the tree of xid. In various cases nsubxids may be zero.
*
* lsn must be the WAL location of the commit record when recording an async
* commit. For a synchronous commit it can be InvalidXLogRecPtr, since the
* caller guarantees the commit record is already flushed in that case. It
* should be InvalidXLogRecPtr for abort cases, too.
*
+ * In the commit case, atomicity is limited by whether all the subxids are in
+ * the same CLOG page as xid. If they all are, then the lock will be grabbed
+ * only once, and the status will be set to committed directly. Otherwise
+ * we must
+ * 1. set sub-committed all subxids that are not on the same page as the
+ * main xid
+ * 2. atomically set committed the main xid and the subxids on the same page
+ * 3. go over the first bunch again and set them committed
+ * Note that as far as concurrent checkers are concerned, main transaction
+ * commit as a whole is still atomic.
+ *
+ * Example:
+ * TransactionId t commits and has subxids t1, t2, t3, t4
+ * t is on page p1, t1 is also on p1, t2 and t3 are on p2, t4 is on p3
+ * 1. update pages2-3:
+ * page2: set t2,t3 as sub-committed
+ * page3: set t4 as sub-committed
+ * 2. update page1:
+ * set t1 as sub-committed,
+ * then set t as committed,
+ then set t1 as committed
+ * 3. update pages2-3:
+ * page2: set t2,t3 as committed
+ * page3: set t4 as committed
+ *
* NB: this is a low-level routine and is NOT the preferred entry point
- * for most uses; TransactionLogUpdate() in transam.c is the intended caller.
+ * for most uses; functions in transam.c are the intended callers.
+ *
+ * XXX Think about issuing FADVISE_WILLNEED on pages that we will need,
+ * but aren't yet in cache, as well as hinting pages not to fall out of
+ * cache yet.
*/
void
-TransactionIdSetStatus(TransactionId xid, XidStatus status, XLogRecPtr lsn)
+TransactionIdSetTreeStatus(TransactionId xid, int nsubxids,
+ TransactionId *subxids, XidStatus status, XLogRecPtr lsn)
+{
+ int pageno = TransactionIdToPage(xid); /* get page of parent */
+ int i;
+
+ Assert(status == TRANSACTION_STATUS_COMMITTED ||
+ status == TRANSACTION_STATUS_ABORTED);
+
+ /*
+ * See how many subxids, if any, are on the same page as the parent, if any.
+ */
+ for (i = 0; i < nsubxids; i++)
+ {
+ if (TransactionIdToPage(subxids[i]) != pageno)
+ break;
+ }
+
+ /*
+ * Do all items fit on a single page?
+ */
+ if (i == nsubxids)
+ {
+ /*
+ * Set the parent and all subtransactions in a single call
+ */
+ TransactionIdSetPageStatus(xid, nsubxids, subxids, status, lsn,
+ pageno);
+ }
+ else
+ {
+ int nsubxids_on_first_page = i;
+
+ /*
+ * If this is a commit then we care about doing this correctly (i.e.
+ * using the subcommitted intermediate status). By here, we know we're
+ * updating more than one page of clog, so we must mark entries that
+ * are *not* on the first page so that they show as subcommitted before
+ * we then return to update the status to fully committed.
+ *
+ * To avoid touching the first page twice, skip marking subcommitted
+ * for the subxids on that first page.
+ */
+ if (status == TRANSACTION_STATUS_COMMITTED)
+ set_status_by_pages(nsubxids - nsubxids_on_first_page,
+ subxids + nsubxids_on_first_page,
+ TRANSACTION_STATUS_SUB_COMMITTED, lsn);
+
+ /*
+ * Now set the parent and subtransactions on same page as the parent,
+ * if any
+ */
+ pageno = TransactionIdToPage(xid);
+ TransactionIdSetPageStatus(xid, nsubxids_on_first_page, subxids, status,
+ lsn, pageno);
+
+ /*
+ * Now work through the rest of the subxids one clog page at a time,
+ * starting from the second page onwards, like we did above.
+ */
+ set_status_by_pages(nsubxids - nsubxids_on_first_page,
+ subxids + nsubxids_on_first_page,
+ status, lsn);
+ }
+}
+
+/*
+ * Helper for TransactionIdSetTreeStatus: set the status for a bunch of
+ * transactions, chunking in the separate CLOG pages involved. We never
+ * pass the whole transaction tree to this function, only subtransactions
+ * that are on different pages to the top level transaction id.
+ */
+static void
+set_status_by_pages(int nsubxids, TransactionId *subxids,
+ XidStatus status, XLogRecPtr lsn)
+{
+ int pageno = TransactionIdToPage(subxids[0]);
+ int offset = 0;
+ int i = 0;
+
+ while (i < nsubxids)
+ {
+ int num_on_page = 0;
+
+ while (TransactionIdToPage(subxids[i]) == pageno && i < nsubxids)
+ {
+ num_on_page++;
+ i++;
+ }
+
+ TransactionIdSetPageStatus(InvalidTransactionId,
+ num_on_page, subxids + offset,
+ status, lsn, pageno);
+ offset = i;
+ pageno = TransactionIdToPage(subxids[offset]);
+ }
+}
+
+/*
+ * Record the final state of transaction entries in the commit log for
+ * all entries on a single page. Atomic only on this page.
+ *
+ * Otherwise API is same as TransactionIdSetTreeStatus()
+ */
+static void
+TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
+ TransactionId *subxids, XidStatus status,
+ XLogRecPtr lsn, int pageno)
{
- int pageno = TransactionIdToPage(xid);
- int byteno = TransactionIdToByte(xid);
- int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
int slotno;
- char *byteptr;
- char byteval;
+ int i;
Assert(status == TRANSACTION_STATUS_COMMITTED ||
status == TRANSACTION_STATUS_ABORTED ||
- status == TRANSACTION_STATUS_SUB_COMMITTED);
+ (status == TRANSACTION_STATUS_SUB_COMMITTED && !TransactionIdIsValid(xid)));
LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
@@ -116,9 +266,62 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status, XLogRecPtr lsn)
* mustn't let it reach disk until we've done the appropriate WAL flush.
* But when lsn is invalid, it's OK to scribble on a page while it is
* write-busy, since we don't care if the update reaches disk sooner than
- * we think. Hence, pass write_ok = XLogRecPtrIsInvalid(lsn).
+ * we think.
*/
slotno = SimpleLruReadPage(ClogCtl, pageno, XLogRecPtrIsInvalid(lsn), xid);
+
+ /*
+ * Set the main transaction id, if any.
+ *
+ * If we update more than one xid on this page while it is being written
+ * out, we might find that some of the bits go to disk and others don't.
+ * If we are updating commits on the page with the top-level xid that could
+ * break atomicity, so we subcommit the subxids first before we mark the
+ * top-level commit.
+ */
+ if (TransactionIdIsValid(xid))
+ {
+ /* Subtransactions first, if needed ... */
+ if (status == TRANSACTION_STATUS_COMMITTED)
+ {
+ for (i = 0; i < nsubxids; i++)
+ {
+ Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
+ TransactionIdSetStatusBit(subxids[i],
+ TRANSACTION_STATUS_SUB_COMMITTED,
+ lsn, slotno);
+ }
+ }
+
+ /* ... then the main transaction */
+ TransactionIdSetStatusBit(xid, status, lsn, slotno);
+ }
+
+ /* Set the subtransactions */
+ for (i = 0; i < nsubxids; i++)
+ {
+ Assert(ClogCtl->shared->page_number[slotno] == TransactionIdToPage(subxids[i]));
+ TransactionIdSetStatusBit(subxids[i], status, lsn, slotno);
+ }
+
+ ClogCtl->shared->page_dirty[slotno] = true;
+
+ LWLockRelease(CLogControlLock);
+}
+
+/*
+ * Sets the commit status of a single transaction.
+ *
+ * Must be called with CLogControlLock held
+ */
+static void
+TransactionIdSetStatusBit(TransactionId xid, XidStatus status, XLogRecPtr lsn, int slotno)
+{
+ int byteno = TransactionIdToByte(xid);
+ int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
+ char *byteptr;
+ char byteval;
+
byteptr = ClogCtl->shared->page_buffer[slotno] + byteno;
/* Current state should be 0, subcommitted or target state */
@@ -132,8 +335,6 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status, XLogRecPtr lsn)
byteval |= (status << bshift);
*byteptr = byteval;
- ClogCtl->shared->page_dirty[slotno] = true;
-
/*
* Update the group LSN if the transaction completion LSN is higher.
*
@@ -149,8 +350,6 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status, XLogRecPtr lsn)
if (XLByteLT(ClogCtl->shared->group_lsn[lsnindex], lsn))
ClogCtl->shared->group_lsn[lsnindex] = lsn;
}
-
- LWLockRelease(CLogControlLock);
}
/*