diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2007-08-01 22:45:09 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2007-08-01 22:45:09 +0000 |
commit | 4a78cdeb6b598940e9d9adb92deca6494628802a (patch) | |
tree | 0c8ad45eea297dcbc647705265eab8188fd4d8b1 /src/backend/access/transam/clog.c | |
parent | c722628a430f347ff4a30419004cddc9795a3bb6 (diff) | |
download | postgresql-4a78cdeb6b598940e9d9adb92deca6494628802a.tar.gz postgresql-4a78cdeb6b598940e9d9adb92deca6494628802a.zip |
Support an optional asynchronous commit mode, in which we don't flush WAL
before reporting a transaction committed. Data consistency is still
guaranteed (unlike setting fsync = off), but a crash may lose the effects
of the last few transactions. Patch by Simon, some editorialization by Tom.
Diffstat (limited to 'src/backend/access/transam/clog.c')
-rw-r--r-- | src/backend/access/transam/clog.c | 78 |
1 files changed, 65 insertions, 13 deletions
diff --git a/src/backend/access/transam/clog.c b/src/backend/access/transam/clog.c index 5bafef1be34..9665d129541 100644 --- a/src/backend/access/transam/clog.c +++ b/src/backend/access/transam/clog.c @@ -14,17 +14,19 @@ * CLOG page is initialized to zeroes. Other writes of CLOG come from * recording of transaction commit or abort in xact.c, which generates its * own XLOG records for these events and will re-perform the status update - * on redo; so we need make no additional XLOG entry here. Also, the XLOG - * is guaranteed flushed through the XLOG commit record before we are called - * to log a commit, so the WAL rule "write xlog before data" is satisfied - * automatically for commits, and we don't really care for aborts. Therefore, - * we don't need to mark CLOG pages with LSN information; we have enough - * synchronization already. + * on redo; so we need make no additional XLOG entry here. For synchronous + * transaction commits, the XLOG is guaranteed flushed through the XLOG commit + * record before we are called to log a commit, so the WAL rule "write xlog + * before data" is satisfied automatically. However, for async commits we + * must track the latest LSN affecting each CLOG page, so that we can flush + * XLOG that far and satisfy the WAL rule. We don't have to worry about this + * for aborts (whether sync or async), since the post-crash assumption would + * be that such transactions failed anyway. * * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.42 2007/01/05 22:19:23 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.43 2007/08/01 22:45:07 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -57,6 +59,13 @@ #define TransactionIdToByte(xid) (TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE) #define TransactionIdToBIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_BYTE) +/* We store the latest async LSN for each group of transactions */ +#define CLOG_XACTS_PER_LSN_GROUP 32 /* keep this a power of 2 */ +#define CLOG_LSNS_PER_PAGE (CLOG_XACTS_PER_PAGE / CLOG_XACTS_PER_LSN_GROUP) + +#define GetLSNIndex(slotno, xid) ((slotno) * CLOG_LSNS_PER_PAGE + \ + ((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) / CLOG_XACTS_PER_LSN_GROUP) + /* * Link to shared-memory data structures for CLOG control @@ -75,11 +84,16 @@ static void WriteTruncateXlogRec(int pageno); /* * Record the final state of a transaction in the commit log. * + * lsn must be the WAL location of the commit record when recording an async + * commit. For a synchronous commit it can be InvalidXLogRecPtr, since the + * caller guarantees the commit record is already flushed in that case. It + * should be InvalidXLogRecPtr for abort cases, too. + * * NB: this is a low-level routine and is NOT the preferred entry point * for most uses; TransactionLogUpdate() in transam.c is the intended caller. */ void -TransactionIdSetStatus(TransactionId xid, XidStatus status) +TransactionIdSetStatus(TransactionId xid, XidStatus status, XLogRecPtr lsn) { int pageno = TransactionIdToPage(xid); int byteno = TransactionIdToByte(xid); @@ -94,7 +108,16 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status) LWLockAcquire(CLogControlLock, LW_EXCLUSIVE); - slotno = SimpleLruReadPage(ClogCtl, pageno, xid); + /* + * If we're doing an async commit (ie, lsn is valid), then we must wait + * for any active write on the page slot to complete. Otherwise our + * update could reach disk in that write, which will not do since we + * mustn't let it reach disk until we've done the appropriate WAL flush. + * But when lsn is invalid, it's OK to scribble on a page while it is + * write-busy, since we don't care if the update reaches disk sooner than + * we think. Hence, pass write_ok = XLogRecPtrIsInvalid(lsn). + */ + slotno = SimpleLruReadPage(ClogCtl, pageno, XLogRecPtrIsInvalid(lsn), xid); byteptr = ClogCtl->shared->page_buffer[slotno] + byteno; /* Current state should be 0, subcommitted or target state */ @@ -110,22 +133,48 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status) ClogCtl->shared->page_dirty[slotno] = true; + /* + * Update the group LSN if the transaction completion LSN is higher. + * + * Note: lsn will be invalid when supplied during InRecovery processing, + * so we don't need to do anything special to avoid LSN updates during + * recovery. After recovery completes the next clog change will set the + * LSN correctly. + */ + if (!XLogRecPtrIsInvalid(lsn)) + { + int lsnindex = GetLSNIndex(slotno, xid); + + if (XLByteLT(ClogCtl->shared->group_lsn[lsnindex], lsn)) + ClogCtl->shared->group_lsn[lsnindex] = lsn; + } + LWLockRelease(CLogControlLock); } /* * Interrogate the state of a transaction in the commit log. * + * Aside from the actual commit status, this function returns (into *lsn) + * an LSN that is late enough to be able to guarantee that if we flush up to + * that LSN then we will have flushed the transaction's commit record to disk. + * The result is not necessarily the exact LSN of the transaction's commit + * record! For example, for long-past transactions (those whose clog pages + * already migrated to disk), we'll return InvalidXLogRecPtr. Also, because + * we group transactions on the same clog page to conserve storage, we might + * return the LSN of a later transaction that falls into the same group. + * * NB: this is a low-level routine and is NOT the preferred entry point * for most uses; TransactionLogFetch() in transam.c is the intended caller. */ XidStatus -TransactionIdGetStatus(TransactionId xid) +TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn) { int pageno = TransactionIdToPage(xid); int byteno = TransactionIdToByte(xid); int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT; int slotno; + int lsnindex; char *byteptr; XidStatus status; @@ -136,6 +185,9 @@ TransactionIdGetStatus(TransactionId xid) status = (*byteptr >> bshift) & CLOG_XACT_BITMASK; + lsnindex = GetLSNIndex(slotno, xid); + *lsn = ClogCtl->shared->group_lsn[lsnindex]; + LWLockRelease(CLogControlLock); return status; @@ -148,14 +200,14 @@ TransactionIdGetStatus(TransactionId xid) Size CLOGShmemSize(void) { - return SimpleLruShmemSize(NUM_CLOG_BUFFERS); + return SimpleLruShmemSize(NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE); } void CLOGShmemInit(void) { ClogCtl->PagePrecedes = CLOGPagePrecedes; - SimpleLruInit(ClogCtl, "CLOG Ctl", NUM_CLOG_BUFFERS, + SimpleLruInit(ClogCtl, "CLOG Ctl", NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE, CLogControlLock, "pg_clog"); } @@ -240,7 +292,7 @@ StartupCLOG(void) int slotno; char *byteptr; - slotno = SimpleLruReadPage(ClogCtl, pageno, xid); + slotno = SimpleLruReadPage(ClogCtl, pageno, false, xid); byteptr = ClogCtl->shared->page_buffer[slotno] + byteno; /* Zero so-far-unused positions in the current byte */ |