aboutsummaryrefslogtreecommitdiff
path: root/src/backend/access/transam/clog.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/access/transam/clog.c')
-rw-r--r--src/backend/access/transam/clog.c78
1 files changed, 65 insertions, 13 deletions
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c
index 5bafef1be34..9665d129541 100644
--- a/src/backend/access/transam/clog.c
+++ b/src/backend/access/transam/clog.c
@@ -14,17 +14,19 @@
* CLOG page is initialized to zeroes. Other writes of CLOG come from
* recording of transaction commit or abort in xact.c, which generates its
* own XLOG records for these events and will re-perform the status update
- * on redo; so we need make no additional XLOG entry here. Also, the XLOG
- * is guaranteed flushed through the XLOG commit record before we are called
- * to log a commit, so the WAL rule "write xlog before data" is satisfied
- * automatically for commits, and we don't really care for aborts. Therefore,
- * we don't need to mark CLOG pages with LSN information; we have enough
- * synchronization already.
+ * on redo; so we need make no additional XLOG entry here. For synchronous
+ * transaction commits, the XLOG is guaranteed flushed through the XLOG commit
+ * record before we are called to log a commit, so the WAL rule "write xlog
+ * before data" is satisfied automatically. However, for async commits we
+ * must track the latest LSN affecting each CLOG page, so that we can flush
+ * XLOG that far and satisfy the WAL rule. We don't have to worry about this
+ * for aborts (whether sync or async), since the post-crash assumption would
+ * be that such transactions failed anyway.
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.42 2007/01/05 22:19:23 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.43 2007/08/01 22:45:07 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -57,6 +59,13 @@
#define TransactionIdToByte(xid) (TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE)
#define TransactionIdToBIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_BYTE)
+/* We store the latest async LSN for each group of transactions */
+#define CLOG_XACTS_PER_LSN_GROUP 32 /* keep this a power of 2 */
+#define CLOG_LSNS_PER_PAGE (CLOG_XACTS_PER_PAGE / CLOG_XACTS_PER_LSN_GROUP)
+
+#define GetLSNIndex(slotno, xid) ((slotno) * CLOG_LSNS_PER_PAGE + \
+ ((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) / CLOG_XACTS_PER_LSN_GROUP)
+
/*
* Link to shared-memory data structures for CLOG control
@@ -75,11 +84,16 @@ static void WriteTruncateXlogRec(int pageno);
/*
* Record the final state of a transaction in the commit log.
*
+ * lsn must be the WAL location of the commit record when recording an async
+ * commit. For a synchronous commit it can be InvalidXLogRecPtr, since the
+ * caller guarantees the commit record is already flushed in that case. It
+ * should be InvalidXLogRecPtr for abort cases, too.
+ *
* NB: this is a low-level routine and is NOT the preferred entry point
* for most uses; TransactionLogUpdate() in transam.c is the intended caller.
*/
void
-TransactionIdSetStatus(TransactionId xid, XidStatus status)
+TransactionIdSetStatus(TransactionId xid, XidStatus status, XLogRecPtr lsn)
{
int pageno = TransactionIdToPage(xid);
int byteno = TransactionIdToByte(xid);
@@ -94,7 +108,16 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status)
LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
- slotno = SimpleLruReadPage(ClogCtl, pageno, xid);
+ /*
+ * If we're doing an async commit (ie, lsn is valid), then we must wait
+ * for any active write on the page slot to complete. Otherwise our
+ * update could reach disk in that write, which will not do since we
+ * mustn't let it reach disk until we've done the appropriate WAL flush.
+ * But when lsn is invalid, it's OK to scribble on a page while it is
+ * write-busy, since we don't care if the update reaches disk sooner than
+ * we think. Hence, pass write_ok = XLogRecPtrIsInvalid(lsn).
+ */
+ slotno = SimpleLruReadPage(ClogCtl, pageno, XLogRecPtrIsInvalid(lsn), xid);
byteptr = ClogCtl->shared->page_buffer[slotno] + byteno;
/* Current state should be 0, subcommitted or target state */
@@ -110,22 +133,48 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status)
ClogCtl->shared->page_dirty[slotno] = true;
+ /*
+ * Update the group LSN if the transaction completion LSN is higher.
+ *
+ * Note: lsn will be invalid when supplied during InRecovery processing,
+ * so we don't need to do anything special to avoid LSN updates during
+ * recovery. After recovery completes the next clog change will set the
+ * LSN correctly.
+ */
+ if (!XLogRecPtrIsInvalid(lsn))
+ {
+ int lsnindex = GetLSNIndex(slotno, xid);
+
+ if (XLByteLT(ClogCtl->shared->group_lsn[lsnindex], lsn))
+ ClogCtl->shared->group_lsn[lsnindex] = lsn;
+ }
+
LWLockRelease(CLogControlLock);
}
/*
* Interrogate the state of a transaction in the commit log.
*
+ * Aside from the actual commit status, this function returns (into *lsn)
+ * an LSN that is late enough to be able to guarantee that if we flush up to
+ * that LSN then we will have flushed the transaction's commit record to disk.
+ * The result is not necessarily the exact LSN of the transaction's commit
+ * record! For example, for long-past transactions (those whose clog pages
+ * already migrated to disk), we'll return InvalidXLogRecPtr. Also, because
+ * we group transactions on the same clog page to conserve storage, we might
+ * return the LSN of a later transaction that falls into the same group.
+ *
* NB: this is a low-level routine and is NOT the preferred entry point
* for most uses; TransactionLogFetch() in transam.c is the intended caller.
*/
XidStatus
-TransactionIdGetStatus(TransactionId xid)
+TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
{
int pageno = TransactionIdToPage(xid);
int byteno = TransactionIdToByte(xid);
int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
int slotno;
+ int lsnindex;
char *byteptr;
XidStatus status;
@@ -136,6 +185,9 @@ TransactionIdGetStatus(TransactionId xid)
status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
+ lsnindex = GetLSNIndex(slotno, xid);
+ *lsn = ClogCtl->shared->group_lsn[lsnindex];
+
LWLockRelease(CLogControlLock);
return status;
@@ -148,14 +200,14 @@ TransactionIdGetStatus(TransactionId xid)
Size
CLOGShmemSize(void)
{
- return SimpleLruShmemSize(NUM_CLOG_BUFFERS);
+ return SimpleLruShmemSize(NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE);
}
void
CLOGShmemInit(void)
{
ClogCtl->PagePrecedes = CLOGPagePrecedes;
- SimpleLruInit(ClogCtl, "CLOG Ctl", NUM_CLOG_BUFFERS,
+ SimpleLruInit(ClogCtl, "CLOG Ctl", NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE,
CLogControlLock, "pg_clog");
}
@@ -240,7 +292,7 @@ StartupCLOG(void)
int slotno;
char *byteptr;
- slotno = SimpleLruReadPage(ClogCtl, pageno, xid);
+ slotno = SimpleLruReadPage(ClogCtl, pageno, false, xid);
byteptr = ClogCtl->shared->page_buffer[slotno] + byteno;
/* Zero so-far-unused positions in the current byte */