aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authordanielk1977 <danielk1977@noemail.net>2004-06-25 02:38:54 +0000
committerdanielk1977 <danielk1977@noemail.net>2004-06-25 02:38:54 +0000
commit7657240a21c3a8d461e2fcf89a94a30a3bc9657c (patch)
tree52f54e92d1a4362a4e3951d77bf7f182e383d591 /src
parente9707671447d0061f13ec9fdf990c47ca0ea085c (diff)
downloadsqlite-7657240a21c3a8d461e2fcf89a94a30a3bc9657c.tar.gz
sqlite-7657240a21c3a8d461e2fcf89a94a30a3bc9657c.zip
Modifications to the journal format to make it more robust. (CVS 1686)
FossilOrigin-Name: 504246a18daca794473b17a7874096f1ec8648ee
Diffstat (limited to 'src')
-rw-r--r--src/os_test.c17
-rw-r--r--src/pager.c638
-rw-r--r--src/test5.c4
3 files changed, 466 insertions, 193 deletions
diff --git a/src/os_test.c b/src/os_test.c
index ce4055277..b36cc8fa9 100644
--- a/src/os_test.c
+++ b/src/os_test.c
@@ -207,14 +207,24 @@ static int writeCache2(OsTestFile *pFile, int crash){
u8 *p = pFile->apBlk[i];
if( p ){
int skip = 0;
+ int trash = 0;
if( crash ){
char random;
sqlite3Randomness(1, &random);
if( random & 0x01 ){
- skip = 1;
+ if( 0 && random & 0x02 ){
+ trash = 1;
#ifdef TRACE_WRITECACHE
-printf("Not writing block %d of %s\n", i, pFile->zName);
+printf("Trashing block %d of %s\n", i, pFile->zName);
+#endif
+ }else{
+ skip = 1;
+#ifdef TRACE_WRITECACHE
+printf("Skiping block %d of %s\n", i, pFile->zName);
+#endif
+ }
}else{
+#ifdef TRACE_WRITECACHE
printf("Writing block %d of %s\n", i, pFile->zName);
#endif
}
@@ -227,6 +237,9 @@ printf("Writing block %d of %s\n", i, pFile->zName);
if( BLOCK_OFFSET(i+1)>nMax ){
len = nMax-BLOCK_OFFSET(i);
}
+ if( trash ){
+ sqlite3Randomness(len, p);
+ }
rc = sqlite3RealWrite(&pFile->fd, p, len);
}
sqliteFree(p);
diff --git a/src/pager.c b/src/pager.c
index 85e09e3ef..d766f1b69 100644
--- a/src/pager.c
+++ b/src/pager.c
@@ -18,7 +18,7 @@
** file simultaneously, or one process from reading the database while
** another is writing.
**
-** @(#) $Id: pager.c,v 1.137 2004/06/23 10:43:10 danielk1977 Exp $
+** @(#) $Id: pager.c,v 1.138 2004/06/25 02:38:55 danielk1977 Exp $
*/
#include "os.h" /* Must be first to enable large file support */
#include "sqliteInt.h"
@@ -225,13 +225,17 @@ struct Pager {
u8 memDb; /* True to inhibit all file I/O */
u8 *aInJournal; /* One bit for each page in the database file */
u8 *aInStmt; /* One bit for each page in the database */
- int nMaster; /* Number of bytes to reserve for master j.p */
BusyHandler *pBusyHandler; /* Pointer to sqlite.busyHandler */
PgHdr *pFirst, *pLast; /* List of free pages */
PgHdr *pFirstSynced; /* First free page with PgHdr.needSync==0 */
PgHdr *pAll; /* List of all pages */
PgHdr *pStmt; /* List of pages in the statement subjournal */
PgHdr *aHash[N_PG_HASH]; /* Hash table to map page number to PgHdr */
+ off_t journalOff; /* Current byte offset in the journal file */
+ off_t journalHdr; /* Byte offset to previous journal header */
+ off_t stmtHdrOff; /* First journal header written this statement */
+ int sectorSize; /* Assumed sector size during rollback */
+ u8 setMaster; /* True if a m-j name has been written to jrnl */
};
/*
@@ -274,9 +278,25 @@ static const unsigned char aJournalMagic[] = {
** The size of the header and of each page in the journal is determined
** by the following macros.
*/
-#define JOURNAL_HDR_SZ(pPager) (24 + (pPager)->nMaster)
#define JOURNAL_PG_SZ(pPager) ((pPager->pageSize) + 8)
+/*
+** The journal header size for this pager. In the future, this could be
+** set to some value read from the disk controller. The important
+** characteristic is that it is the same size as a disk sector.
+*/
+#define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
+
+#define PAGER_SECTOR_SIZE 512
+
+/*
+** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is
+** reserved for working around a windows/posix incompatibility). It is
+** used in the journal to signify that the remainder of the journal file
+** is devoted to storing a master journal name - there are no more pages to
+** roll back. See comments for function writeMasterJournal() for details.
+*/
+#define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize))
/*
** Enable reference count tracking (for debugging) here:
@@ -369,6 +389,244 @@ static int pager_errcode(Pager *pPager){
}
/*
+** When this is called the journal file for pager pPager must be open.
+** The master journal file name is read from the end of the file and
+** written into memory obtained from sqliteMalloc(). *pzMaster is
+** set to point at the memory and SQLITE_OK returned. The caller must
+** sqliteFree() *pzMaster.
+**
+** If no master journal file name is present *pzMaster is set to 0 and
+** SQLITE_OK returned.
+*/
+static int readMasterJournal(OsFile *pJrnl, char **pzMaster){
+ int rc;
+ u32 len;
+ off_t szJ;
+ unsigned char aMagic[8]; /* A buffer to hold the magic header */
+
+ *pzMaster = 0;
+
+ rc = sqlite3OsFileSize(pJrnl, &szJ);
+ if( rc!=SQLITE_OK || szJ<12 ) return rc;
+
+ rc = sqlite3OsSeek(pJrnl, szJ-12);
+ if( rc!=SQLITE_OK ) return rc;
+
+ rc = read32bits(pJrnl, &len);
+ if( rc!=SQLITE_OK ) return rc;
+
+ rc = sqlite3OsRead(pJrnl, aMagic, 8);
+ if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc;
+
+ rc = sqlite3OsSeek(pJrnl, szJ-12-len);
+ if( rc!=SQLITE_OK ) return rc;
+
+ *pzMaster = (char *)sqliteMalloc(len);
+ if( !*pzMaster ){
+ return SQLITE_NOMEM;
+ }
+ rc = sqlite3OsRead(pJrnl, *pzMaster, len);
+ if( rc!=SQLITE_OK ){
+ sqliteFree(*pzMaster);
+ *pzMaster = 0;
+ return rc;
+ }
+
+ return SQLITE_OK;
+}
+
+/*
+** Seek the journal file descriptor to the next sector boundary where a
+** journal header may be read or written. Pager.journalOff is updated with
+** the new seek offset.
+**
+** i.e for a sector size of 512:
+**
+** Input Offset Output Offset
+** ---------------------------------------
+** 0 0
+** 512 512
+** 100 512
+** 2000 2048
+**
+*/
+static int seekJournalHdr(Pager *pPager){
+ off_t offset = 0;
+ off_t c = pPager->journalOff;
+ if( c ){
+ offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
+ }
+ assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
+ assert( offset>=c );
+ assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
+ pPager->journalOff = offset;
+ return sqlite3OsSeek(&pPager->jfd, pPager->journalOff);
+}
+
+/*
+** The journal file must be open when this routine is called. A journal
+** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
+** current location.
+**
+** The format for the journal header is as follows:
+** - 8 bytes: Magic identifying journal format.
+** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
+** - 4 bytes: Random number used for page hash.
+** - 4 bytes: Initial database page count.
+** - 4 bytes: Sector size used by the process that wrote this journal.
+**
+** Followed by (JOURNAL_HDR_SZ - 20) bytes of unused space.
+*/
+static int writeJournalHdr(Pager *pPager){
+
+ int rc = seekJournalHdr(pPager);
+ if( rc ) return rc;
+
+ pPager->journalHdr = pPager->journalOff;
+ if( pPager->stmtHdrOff==0 ){
+ pPager->stmtHdrOff = pPager->journalHdr;
+ }
+ pPager->journalOff += JOURNAL_HDR_SZ(pPager);
+
+ /* FIX ME:
+ **
+ ** Possibly for a pager not in no-sync mode, the journal magic should not
+ ** be written until nRec is filled in as part of next syncJournal().
+ **
+ ** Actually maybe the whole journal header should be delayed until that
+ ** point. Think about this.
+ */
+ rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
+
+ if( rc==SQLITE_OK ){
+ /* The nRec Field. 0xFFFFFFFF for no-sync journals. */
+ rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0);
+ }
+ if( rc==SQLITE_OK ){
+ /* The random check-hash initialiser */
+ sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
+ rc = write32bits(&pPager->jfd, pPager->cksumInit);
+ }
+ if( rc==SQLITE_OK ){
+ /* The initial database size */
+ rc = write32bits(&pPager->jfd, pPager->dbSize);
+ }
+ if( rc==SQLITE_OK ){
+ /* The assumed sector size for this process */
+ rc = write32bits(&pPager->jfd, pPager->sectorSize);
+ }
+
+ /* The journal header has been written successfully. Seek the journal
+ ** file descriptor to the end of the journal header sector.
+ */
+ if( rc==SQLITE_OK ){
+ sqlite3OsSeek(&pPager->jfd, pPager->journalOff-1);
+ rc = sqlite3OsWrite(&pPager->jfd, "\000", 1);
+ }
+ return rc;
+}
+
+/*
+** The journal file must be open when this is called. A journal header file
+** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
+** file. See comments above function writeJournalHdr() for a description of
+** the journal header format.
+**
+** If the header is read successfully, *nRec is set to the number of
+** page records following this header and *dbSize is set to the size of the
+** database before the transaction began, in pages. Also, pPager->cksumInit
+** is set to the value read from the journal header. SQLITE_OK is returned
+** in this case.
+**
+** If the journal header file appears to be corrupted, SQLITE_DONE is
+** returned and *nRec and *dbSize are not set. If JOURNAL_HDR_SZ bytes
+** cannot be read from the journal file an error code is returned.
+*/
+static int readJournalHdr(
+ Pager *pPager,
+ off_t journalSize,
+ u32 *pNRec,
+ u32 *pDbSize
+){
+ int rc;
+ unsigned char aMagic[8]; /* A buffer to hold the magic header */
+
+ rc = seekJournalHdr(pPager);
+ if( rc ) return rc;
+
+ if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
+ return SQLITE_DONE;
+ }
+
+ rc = sqlite3OsRead(&pPager->jfd, aMagic, sizeof(aMagic));
+ if( rc ) return rc;
+
+ if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
+ return SQLITE_DONE;
+ }
+
+ rc = read32bits(&pPager->jfd, pNRec);
+ if( rc ) return rc;
+
+ rc = read32bits(&pPager->jfd, &pPager->cksumInit);
+ if( rc ) return rc;
+
+ rc = read32bits(&pPager->jfd, pDbSize);
+ if( rc ) return rc;
+
+ /* Update the assumed sector-size to match the value used by
+ ** the process that created this journal. If this journal was
+ ** created by a process other than this one, then this routine
+ ** is being called from within pager_playback(). The local value
+ ** of Pager.sectorSize is restored at the end of that routine.
+ */
+ rc = read32bits(&pPager->jfd, &pPager->sectorSize);
+ if( rc ) return rc;
+
+ pPager->journalOff += JOURNAL_HDR_SZ(pPager);
+ rc = sqlite3OsSeek(&pPager->jfd, pPager->journalOff);
+ return rc;
+}
+
+
+/*
+** Write the supplied master journal name into the journal file for pager
+** pPager at the current location.
+*/
+static int writeMasterJournal(Pager *pPager, const char *zMaster){
+ int rc;
+ int len;
+
+ if( !zMaster || pPager->setMaster) return SQLITE_OK;
+ pPager->setMaster = 1;
+
+ len = strlen(zMaster);
+
+ /* If in full-sync mode, advance to the next disk sector before writing
+ ** the master journal name. This is in case the previous page written to
+ ** the journal has already been synced.
+ */
+ if( pPager->fullSync ){
+ rc = seekJournalHdr(pPager);
+ if( rc!=SQLITE_OK ) return rc;
+ }
+
+ pPager->journalOff += (len+16);
+
+ rc = write32bits(&pPager->jfd, PAGER_MJ_PGNO(pPager));
+ if( rc!=SQLITE_OK ) return rc;
+
+ rc = sqlite3OsWrite(&pPager->jfd, zMaster, len);
+ if( rc!=SQLITE_OK ) return rc;
+
+ rc = write32bits(&pPager->jfd, len);
+ if( rc!=SQLITE_OK ) return rc;
+
+ rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
+ return rc;
+}
+
+/*
** Add or remove a page from the list of all pages that are in the
** statement journal.
**
@@ -480,7 +738,6 @@ static int pager_unwritelock(Pager *pPager){
pPg->needSync = 0;
}
pPager->dirtyCache = 0;
- pPager->nMaster = 0;
pPager->nRec = 0;
}else{
assert( pPager->dirtyCache==0 || pPager->useJournal==0 );
@@ -488,6 +745,7 @@ static int pager_unwritelock(Pager *pPager){
sqlite3OsUnlock(&pPager->fd, SHARED_LOCK);
pPager->state = PAGER_SHARED;
pPager->origDbSize = 0;
+ pPager->setMaster = 0;
return SQLITE_OK;
}
@@ -540,6 +798,7 @@ static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int useCksum){
if( rc!=SQLITE_OK ) return rc;
rc = sqlite3OsRead(jfd, &aData, pPager->pageSize);
if( rc!=SQLITE_OK ) return rc;
+ pPager->journalOff += pPager->pageSize + 4;
/* Sanity checking on the page. This is more important that I originally
** thought. If a power failure occurs while the journal is being written,
@@ -555,6 +814,7 @@ static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int useCksum){
if( useCksum ){
rc = read32bits(jfd, &cksum);
if( rc ) return rc;
+ pPager->journalOff += 4;
if( pager_cksum(pPager, pgno, aData)!=cksum ){
return SQLITE_DONE;
}
@@ -566,10 +826,6 @@ static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int useCksum){
** page in the pager cache. In this case just update the pager cache,
** not the database file. The page is left marked dirty in this case.
**
- ** FIX ME: Ideally the page would only be left marked dirty when the
- ** pager is in RESERVED state if it was dirty when this statement
- ** transaction was started.
- **
** If in EXCLUSIVE state, then we update the pager cache if it exists
** and the main file. The page is then marked not dirty.
*/
@@ -631,23 +887,16 @@ static int pager_delmaster(const char *zMaster){
if( nMasterJournal>0 ){
char *zJournal;
- char *zMasterPtr;
- int nMasterPtr; /* Number of bytes allocated at zMasterPtr */
+ char *zMasterPtr = 0;
/* Load the entire master journal file into space obtained from
** sqliteMalloc() and pointed to by zMasterJournal.
- **
- ** Also allocate an extra (strlen(zMaster)+1) bytes. This space is used
- ** to load a master-journal filename from some other journal file to
- ** check if it points at this master journal file.
*/
- nMasterPtr = strlen(zMaster) + 1;
- zMasterJournal = (char *)sqliteMalloc(nMasterJournal) + nMasterPtr;
+ zMasterJournal = (char *)sqliteMalloc(nMasterJournal);
if( !zMasterJournal ){
rc = SQLITE_NOMEM;
goto delmaster_out;
}
- zMasterPtr = &zMasterJournal[nMasterJournal];
rc = sqlite3OsRead(&master, zMasterJournal, nMasterJournal);
if( rc!=SQLITE_OK ) goto delmaster_out;
@@ -659,46 +908,22 @@ static int pager_delmaster(const char *zMaster){
** so, return without deleting the master journal file.
*/
OsFile journal;
- int nMaster;
- off_t jsz;
memset(&journal, 0, sizeof(journal));
rc = sqlite3OsOpenReadOnly(zJournal, &journal);
if( rc!=SQLITE_OK ){
- sqlite3OsClose(&journal);
goto delmaster_out;
}
- /* Check if the file is big enough to be a journal file
- ** with the required master journal name. If not, ignore it.
- */
- rc = sqlite3OsFileSize(&journal, &jsz);
+ rc = readMasterJournal(&journal, &zMasterPtr);
+ sqlite3OsClose(&journal);
if( rc!=SQLITE_OK ){
- sqlite3OsClose(&journal);
goto delmaster_out;
}
- if( jsz<(25+strlen(zMaster)) ){
- sqlite3OsClose(&journal);
- continue;
- }
-
- /* Seek to the point in the journal where the master journal name
- ** is stored. Read the master journal name into memory obtained
- ** from malloc.
- */
- rc = sqlite3OsSeek(&journal, 20);
- if( rc!=SQLITE_OK ) goto delmaster_out;
- rc = read32bits(&journal, (u32*)&nMaster);
- if( rc!=SQLITE_OK ) goto delmaster_out;
- if( nMaster>=nMasterPtr ){
- rc = sqlite3OsRead(&journal, zMasterPtr, nMasterPtr);
- if( rc!=SQLITE_OK ){
- goto delmaster_out;
- }
- if( zMasterPtr[nMasterPtr-1]=='\0' && !strcmp(zMasterPtr, zMaster) ){
- /* We have a match. Do not delete the master journal file. */
- goto delmaster_out;
- }
+
+ if( zMasterPtr && !strcmp(zMasterPtr, zMaster) ){
+ /* We have a match. Do not delete the master journal file. */
+ goto delmaster_out;
}
}
zJournal += (strlen(zJournal)+1);
@@ -755,7 +980,6 @@ static int pager_reload_cache(Pager *pPager){
return rc;
}
-
/*
** Playback the journal and thus restore the database file to
** the state it was in before we started making changes.
@@ -809,97 +1033,94 @@ static int pager_reload_cache(Pager *pPager){
** If an I/O or malloc() error occurs, the journal-file is not deleted
** and an error code is returned.
*/
-static int pager_playback(Pager *pPager, int useJournalSize){
+static int pager_playback(Pager *pPager){
off_t szJ; /* Size of the journal file in bytes */
int nRec; /* Number of Records in the journal */
int i; /* Loop counter */
Pgno mxPg = 0; /* Size of the original file in pages */
- unsigned char aMagic[8]; /* A buffer to hold the magic header */
int rc; /* Result code of a subroutine */
- int nMaster; /* Number of bytes in the name of master journal */
char *zMaster = 0; /* Name of master journal file if any */
/* Figure out how many records are in the journal. Abort early if
** the journal is empty.
*/
assert( pPager->journalOpen );
- sqlite3OsSeek(&pPager->jfd, 0);
rc = sqlite3OsFileSize(&pPager->jfd, &szJ);
if( rc!=SQLITE_OK ){
goto end_playback;
}
- /* If the journal file is too small to contain a complete header,
- ** it must mean that the process that created the journal was just
- ** beginning to write the journal file when it died. In that case,
- ** the database file should have still been completely unchanged.
- ** Nothing needs to be rolled back. We can safely ignore this journal.
+ /* Read the master journal name from the journal, if it is present.
+ ** If a master journal file name is specified, but the file is not
+ ** present on disk, then the journal is not hot and does not need to be
+ ** played back.
*/
- if( szJ < 24 ){
- goto end_playback;
- }
-
- /* (1) Read the beginning of the journal and verify the magic string
- ** at the beginning of the journal. */
- rc = sqlite3OsRead(&pPager->jfd, aMagic, sizeof(aMagic));
- if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
+ rc = readMasterJournal(&pPager->jfd, &zMaster);
+ assert( rc!=SQLITE_DONE );
+ if( rc!=SQLITE_OK || (zMaster && !sqlite3OsFileExists(zMaster)) ){
+ sqliteFree(zMaster);
+ zMaster = 0;
+ if( rc==SQLITE_DONE ) rc = SQLITE_OK;
goto end_playback;
}
+ sqlite3OsSeek(&pPager->jfd, 0);
+ pPager->journalOff = 0;
- /* (2) Read the number of pages stored in the journal. */
- rc = read32bits(&pPager->jfd, (u32*)&nRec);
- if( rc ) goto end_playback;
- if( nRec==0xffffffff || useJournalSize ||
- nRec>(szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager)
- ){
- nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager);
- }
-
- /* (3) Read the initial value for the sanity checksum */
- rc = read32bits(&pPager->jfd, &pPager->cksumInit);
- if( rc ) goto end_playback;
-
- /* (4) Read the number of pages in the database file prior to the
- ** start of the transaction */
- rc = read32bits(&pPager->jfd, &mxPg);
- if( rc!=SQLITE_OK ){
- goto end_playback;
- }
+ /* This loop terminates either when the readJournalHdr() call returns
+ ** SQLITE_DONE or an IO error occurs. */
+ while( 1 ){
- /* (5) and (6): Check if a master journal file is specified. If one is
- ** specified, only proceed with the playback if it still exists. */
- rc = read32bits(&pPager->jfd, &nMaster);
- if( rc ) goto end_playback;
- if( szJ < 24+nMaster ) goto end_playback;
- if( nMaster>0 ){
- zMaster = sqliteMalloc(nMaster);
- if( !zMaster ){
- rc = SQLITE_NOMEM;
+ /* Read the next journal header from the journal file. If there are
+ ** not enough bytes left in the journal file for a complete header, or
+ ** it is corrupted, then a process must of failed while writing it.
+ ** This indicates nothing more needs to be rolled back.
+ */
+ rc = readJournalHdr(pPager, szJ, &nRec, &mxPg);
+ if( rc!=SQLITE_OK ){
+ if( rc==SQLITE_DONE ){
+ rc = SQLITE_OK;
+ }
goto end_playback;
}
- rc = sqlite3OsRead(&pPager->jfd, zMaster, nMaster);
- if( rc!=SQLITE_OK || (zMaster[0] && !sqlite3OsFileExists(zMaster)) ){
- goto end_playback;
+
+ /* If nRec is 0xffffffff, then this journal was created by a process
+ ** working in no-sync mode. This means that the rest of the journal
+ ** file consists of pages, there are no more journal headers. Compute
+ ** the value of nRec based on this assumption.
+ */
+ if( nRec==0xffffffff ){
+ assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
+ nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager);
}
- }
- /* Truncate the database file back to it's original size */
- assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
- rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
- if( rc!=SQLITE_OK ){
- goto end_playback;
- }
- pPager->dbSize = mxPg;
+ /* If this is the first header read from the journal, truncate the
+ ** database file back to it's original size.
+ */
+ if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
+ assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
+ rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)mxPg);
+ if( rc!=SQLITE_OK ){
+ goto end_playback;
+ }
+ pPager->dbSize = mxPg;
+ }
+
+ /* rc = sqlite3OsSeek(&pPager->jfd, JOURNAL_HDR_SZ(pPager)); */
+ if( rc!=SQLITE_OK ) goto end_playback;
- /* Copy original pages out of the journal and back into the database file.
- */
- for(i=0; i<nRec; i++){
- rc = pager_playback_one_page(pPager, &pPager->jfd, 1);
- if( rc!=SQLITE_OK ){
- if( rc==SQLITE_DONE ){
- rc = SQLITE_OK;
+ /* Copy original pages out of the journal and back into the database file.
+ */
+ for(i=0; i<nRec; i++){
+ rc = pager_playback_one_page(pPager, &pPager->jfd, 1);
+ if( rc!=SQLITE_OK ){
+ if( rc==SQLITE_DONE ){
+ rc = SQLITE_OK;
+ pPager->journalOff = szJ;
+ break;
+ }else{
+ goto end_playback;
+ }
}
- break;
}
}
@@ -907,9 +1128,8 @@ static int pager_playback(Pager *pPager, int useJournalSize){
** where not restored by the loop above. We have to restore those
** pages by reading them back from the original database.
*/
- if( rc==SQLITE_OK ){
- pager_reload_cache(pPager);
- }
+ assert( rc==SQLITE_OK );
+ pager_reload_cache(pPager);
end_playback:
if( zMaster ){
@@ -925,6 +1145,12 @@ end_playback:
if( rc==SQLITE_OK ){
rc = pager_unwritelock(pPager);
}
+
+ /* The Pager.sectorSize variable may have been updated while rolling
+ ** back a journal created by a process with a different PAGER_SECTOR_SIZE
+ ** value. Reset it to the correct value for this process.
+ */
+ pPager->sectorSize = PAGER_SECTOR_SIZE;
return rc;
}
@@ -944,10 +1170,32 @@ end_playback:
*/
static int pager_stmt_playback(Pager *pPager){
off_t szJ; /* Size of the full journal */
+ off_t hdrOff;
int nRec; /* Number of Records */
int i; /* Loop counter */
int rc;
+ szJ = pPager->journalOff;
+#ifndef NDEBUG
+ {
+ off_t os_szJ;
+ rc = sqlite3OsFileSize(&pPager->jfd, &os_szJ);
+ if( rc!=SQLITE_OK ) return rc;
+ assert( szJ==os_szJ );
+ }
+#endif
+
+ /* Set hdrOff to be the offset to the first journal header written
+ ** this statement transaction, or the end of the file if no journal
+ ** header was written.
+ */
+ hdrOff = pPager->stmtHdrOff;
+ assert( pPager->fullSync || !hdrOff );
+ if( !hdrOff ){
+ hdrOff = szJ;
+ }
+
+
/* Truncate the database back to its original size.
*/
rc = sqlite3OsTruncate(&pPager->fd, SQLITE_PAGE_SIZE*(off_t)pPager->stmtSize);
@@ -970,25 +1218,41 @@ static int pager_stmt_playback(Pager *pPager){
if( rc!=SQLITE_OK ) goto end_stmt_playback;
}
- /* Figure out how many pages need to be copied out of the transaction
- ** journal.
+ /* Now roll some pages back from the transaction journal. Pager.stmtJSize
+ ** was the size of the journal file when this statement was started, so
+ ** everything after that needs to be rolled back, either into the
+ ** database, the memory cache, or both.
+ **
+ ** If it is not zero, then Pager.stmtHdrOff is the offset to the start
+ ** of the first journal header written during this statement transaction.
*/
rc = sqlite3OsSeek(&pPager->jfd, pPager->stmtJSize);
if( rc!=SQLITE_OK ){
goto end_stmt_playback;
}
- rc = sqlite3OsFileSize(&pPager->jfd, &szJ);
- if( rc!=SQLITE_OK ){
- goto end_stmt_playback;
- }
- nRec = (szJ - pPager->stmtJSize)/JOURNAL_PG_SZ(pPager);
- for(i=nRec-1; i>=0; i--){
+ pPager->journalOff = pPager->stmtJSize;
+ while( pPager->journalOff < hdrOff ){
rc = pager_playback_one_page(pPager, &pPager->jfd, 1);
+ assert( rc!=SQLITE_DONE );
+ if( rc!=SQLITE_OK ) goto end_stmt_playback;
+ }
+
+ while( pPager->journalOff < szJ ){
+ u32 nRec;
+ u32 dummy;
+ rc = readJournalHdr(pPager, szJ, &nRec, &dummy);
if( rc!=SQLITE_OK ){
assert( rc!=SQLITE_DONE );
goto end_stmt_playback;
}
+ for(i=nRec-1; i>=0 && pPager->journalOff < szJ; i--){
+ rc = pager_playback_one_page(pPager, &pPager->jfd, 1);
+ assert( rc!=SQLITE_DONE );
+ if( rc!=SQLITE_OK ) goto end_stmt_playback;
+ }
}
+
+ pPager->journalOff = szJ;
end_stmt_playback:
if( rc!=SQLITE_OK ){
@@ -1177,6 +1441,7 @@ int sqlite3pager_open(
pPager->pFirstSynced = 0;
pPager->pLast = 0;
pPager->nExtra = nExtra;
+ pPager->sectorSize = PAGER_SECTOR_SIZE;
pPager->pBusyHandler = (BusyHandler *)pBusyHandler;
memset(pPager->aHash, 0, sizeof(pPager->aHash));
*ppPager = pPager;
@@ -1233,7 +1498,7 @@ int sqlite3pager_pagecount(Pager *pPager){
/*
** Forward declaration
*/
-static int syncJournal(Pager*, const char*);
+static int syncJournal(Pager*);
/*
@@ -1325,7 +1590,7 @@ int sqlite3pager_truncate(Pager *pPager, Pgno nPage){
memoryTruncate(pPager);
return SQLITE_OK;
}
- rc = syncJournal(pPager, 0);
+ rc = syncJournal(pPager);
if( rc!=SQLITE_OK ){
return rc;
}
@@ -1482,14 +1747,14 @@ int sqlite3pager_ref(void *pData){
** This routine clears the needSync field of every page current held in
** memory.
*/
-static int syncJournal(Pager *pPager, const char *zMaster){
+static int syncJournal(Pager *pPager){
PgHdr *pPg;
int rc = SQLITE_OK;
/* Sync the journal before modifying the main database
** (assuming there is a journal and it needs to be synced.)
*/
- if( pPager->needSync || zMaster ){
+ if( pPager->needSync ){
if( !pPager->tempFile ){
assert( pPager->journalOpen );
/* assert( !pPager->noSync ); // noSync might be set if synchronous
@@ -1499,37 +1764,28 @@ static int syncJournal(Pager *pPager, const char *zMaster){
/* Make sure the pPager->nRec counter we are keeping agrees
** with the nRec computed from the size of the journal file.
*/
- off_t hdrSz, pgSz, jSz;
- hdrSz = JOURNAL_HDR_SZ(pPager);
- pgSz = JOURNAL_PG_SZ(pPager);
+ off_t jSz;
rc = sqlite3OsFileSize(&pPager->jfd, &jSz);
if( rc!=0 ) return rc;
- assert( pPager->nRec*pgSz+hdrSz==jSz );
+ assert( pPager->journalOff==jSz );
}
#endif
{
- /* Write the nRec value into the journal file header */
- off_t szJ;
+ /* Write the nRec value into the journal file header. If in
+ ** full-synchronous mode, sync the journal first. This ensures that
+ ** all data has really hit the disk before nRec is updated to mark
+ ** it as a candidate for rollback.
+ */
if( pPager->fullSync ){
TRACE2("SYNC journal of %d\n", pPager->fd.h);
rc = sqlite3OsSync(&pPager->jfd);
if( rc!=0 ) return rc;
}
- sqlite3OsSeek(&pPager->jfd, sizeof(aJournalMagic));
+ sqlite3OsSeek(&pPager->jfd, pPager->journalHdr + sizeof(aJournalMagic));
rc = write32bits(&pPager->jfd, pPager->nRec);
if( rc ) return rc;
- /* Write the name of the master journal file if one is specified */
- if( zMaster ){
- assert( strlen(zMaster)<pPager->nMaster );
- rc = sqlite3OsSeek(&pPager->jfd, 20);
- if( rc ) return rc;
- rc = sqlite3OsWrite(&pPager->jfd, zMaster, strlen(zMaster)+1);
- if( rc ) return rc;
- }
-
- szJ = JOURNAL_HDR_SZ(pPager) + pPager->nRec*JOURNAL_PG_SZ(pPager);
- sqlite3OsSeek(&pPager->jfd, szJ);
+ sqlite3OsSeek(&pPager->jfd, pPager->journalOff);
}
TRACE2("SYNC journal of %d\n", pPager->fd.h);
rc = sqlite3OsSync(&pPager->jfd);
@@ -1721,11 +1977,14 @@ int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage){
}
pPager->journalOpen = 1;
pPager->journalStarted = 0;
+ pPager->journalOff = 0;
+ pPager->setMaster = 0;
+ pPager->journalHdr = 0;
/* Playback and delete the journal. Drop the database write
** lock and reacquire the read lock.
*/
- rc = pager_playback(pPager, 0);
+ rc = pager_playback(pPager);
if( rc!=SQLITE_OK ){
return rc;
}
@@ -1774,11 +2033,25 @@ int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage){
** it can't be helped.
*/
if( pPg==0 ){
- int rc = syncJournal(pPager, 0);
+ int rc = syncJournal(pPager);
if( rc!=0 ){
sqlite3pager_rollback(pPager);
return SQLITE_IOERR;
}
+ if( pPager->fullSync ){
+ /* If in full-sync mode, write a new journal header into the
+ ** journal file. This is done to avoid ever modifying a journal
+ ** header that is involved in the rollback of pages that have
+ ** already been written to the database (in case the header is
+ ** trashed when the nRec field is updated).
+ */
+ pPager->nRec = 0;
+ rc = writeJournalHdr(pPager);
+ if( rc!=0 ){
+ sqlite3pager_rollback(pPager);
+ return SQLITE_IOERR;
+ }
+ }
pPg = pPager->pFirst;
}
assert( pPg->nRef==0 );
@@ -1975,6 +2248,9 @@ static int pager_open_journal(Pager *pPager){
return SQLITE_NOMEM;
}
rc = sqlite3OsOpenExclusive(pPager->zJournal, &pPager->jfd,pPager->tempFile);
+ pPager->journalOff = 0;
+ pPager->setMaster = 0;
+ pPager->journalHdr = 0;
if( rc!=SQLITE_OK ){
sqliteFree(pPager->aInJournal);
pPager->aInJournal = 0;
@@ -1994,32 +2270,8 @@ static int pager_open_journal(Pager *pPager){
}
pPager->origDbSize = pPager->dbSize;
- /* Create the header for the journal:
- ** - 8 bytes: Magic identifying journal format.
- ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
- ** - 4 bytes: Magic used for page checksums.
- ** - 4 bytes: Initial database page count.
- ** - 4 bytes: Number of bytes reserved for master journal ptr (nMaster)
- ** - nMaster bytes: Space for a master journal pointer.
- */
- rc = sqlite3OsWrite(&pPager->jfd, aJournalMagic, sizeof(aJournalMagic));
- if( rc==SQLITE_OK ){
- rc = write32bits(&pPager->jfd, pPager->noSync ? 0xffffffff : 0);
- }
- if( rc==SQLITE_OK ){
- sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
- rc = write32bits(&pPager->jfd, pPager->cksumInit);
- }
- if( rc==SQLITE_OK ){
- rc = write32bits(&pPager->jfd, pPager->dbSize);
- }
- if( rc==SQLITE_OK ){
- rc = write32bits(&pPager->jfd, pPager->nMaster);
- }
- if( rc==SQLITE_OK ){
- sqlite3OsSeek(&pPager->jfd, 24 + pPager->nMaster - 1);
- rc = sqlite3OsWrite(&pPager->jfd, "\000", 1);
- }
+ rc = writeJournalHdr(pPager);
+
if( pPager->stmtAutoopen && rc==SQLITE_OK ){
rc = sqlite3pager_stmt_begin(pPager);
}
@@ -2055,12 +2307,11 @@ static int pager_open_journal(Pager *pPager){
**
** If the database is already reserved for writing, this routine is a no-op.
*/
-int sqlite3pager_begin(void *pData, int nMaster){
+int sqlite3pager_begin(void *pData, int dummy_fixme){
PgHdr *pPg = DATA_TO_PGHDR(pData);
Pager *pPager = pPg->pPager;
int rc = SQLITE_OK;
assert( pPg->nRef>0 );
- assert( nMaster>=0 );
assert( pPager->state!=PAGER_UNLOCK );
if( pPager->state==PAGER_SHARED ){
assert( pPager->aInJournal==0 );
@@ -2079,10 +2330,9 @@ int sqlite3pager_begin(void *pData, int nMaster){
if( rc!=SQLITE_OK ){
return rc;
}
- pPager->nMaster = nMaster;
pPager->state = PAGER_RESERVED;
pPager->dirtyCache = 0;
- TRACE3("TRANSACTION %d nMaster=%d\n", pPager->fd.h, nMaster);
+ TRACE2("TRANSACTION %d\n", pPager->fd.h);
if( pPager->useJournal && !pPager->tempFile ){
rc = pager_open_journal(pPager);
}
@@ -2122,6 +2372,8 @@ int sqlite3pager_write(void *pData){
return SQLITE_PERM;
}
+ assert( !pPager->setMaster );
+
/* Mark the page as dirty. If the page has already been written
** to the journal then we can return right away.
*/
@@ -2176,6 +2428,7 @@ int sqlite3pager_write(void *pData){
store32bits(pPg->pgno, pPg, -4);
CODEC(pPager, pData, pPg->pgno, 7);
rc = sqlite3OsWrite(&pPager->jfd, &((char*)pData)[-4], szPg);
+ pPager->journalOff += szPg;
TRACE3("JOURNAL page %d needSync=%d\n", pPg->pgno, pPg->needSync);
CODEC(pPager, pData, pPg->pgno, 0);
*(u32*)PGHDR_TO_EXTRA(pPg) = saved;
@@ -2500,7 +2753,7 @@ int sqlite3pager_rollback(Pager *pPager){
if( pPager->errMask!=0 && pPager->errMask!=PAGER_ERR_FULL ){
if( pPager->state>=PAGER_EXCLUSIVE ){
- pager_playback(pPager, 1);
+ pager_playback(pPager);
}
return pager_errcode(pPager);
}
@@ -2512,7 +2765,7 @@ int sqlite3pager_rollback(Pager *pPager){
rc = rc2;
}
}else{
- rc = pager_playback(pPager, 1);
+ rc = pager_playback(pPager);
}
if( rc!=SQLITE_OK ){
rc = SQLITE_CORRUPT;
@@ -2577,12 +2830,11 @@ int sqlite3pager_stmt_begin(Pager *pPager){
#ifndef NDEBUG
rc = sqlite3OsFileSize(&pPager->jfd, &pPager->stmtJSize);
if( rc ) goto stmt_begin_failed;
- assert( pPager->stmtJSize ==
- pPager->nRec*JOURNAL_PG_SZ(pPager) + JOURNAL_HDR_SZ(pPager) );
+ assert( pPager->stmtJSize == pPager->journalOff );
#endif
- pPager->stmtJSize =
- pPager->nRec*JOURNAL_PG_SZ(pPager) + JOURNAL_HDR_SZ(pPager);
+ pPager->stmtJSize = pPager->journalOff;
pPager->stmtSize = pPager->dbSize;
+ pPager->stmtHdrOff = 0;
if( !pPager->stmtOpen ){
rc = sqlite3pager_opentemp(zTemp, &pPager->stfd);
if( rc ) goto stmt_begin_failed;
@@ -2749,12 +3001,20 @@ int sqlite3pager_sync(Pager *pPager, const char *zMaster){
PgHdr *pPg;
assert( pPager->journalOpen );
- rc = pager_incr_changecounter(pPager);
- if( rc!=SQLITE_OK ) goto sync_exit;
-
- /* Sync the journal file */
- rc = syncJournal(pPager, zMaster);
- if( rc!=SQLITE_OK ) goto sync_exit;
+ /* If a master journal file name has already been written to the
+ ** journal file, then no sync is required. This happens when it is
+ ** written, then the process fails to upgrade from a RESERVED to an
+ ** EXCLUSIVE lock. The next time the process tries to commit the
+ ** transaction the m-j name will have already been written.
+ */
+ if( !pPager->setMaster ){
+ rc = pager_incr_changecounter(pPager);
+ if( rc!=SQLITE_OK ) goto sync_exit;
+ rc = writeMasterJournal(pPager, zMaster);
+ if( rc!=SQLITE_OK ) goto sync_exit;
+ rc = syncJournal(pPager);
+ if( rc!=SQLITE_OK ) goto sync_exit;
+ }
/* Write all dirty pages to the database file */
pPg = pager_get_all_dirty_pages(pPager);
diff --git a/src/test5.c b/src/test5.c
index cd6d877f3..2b325bd99 100644
--- a/src/test5.c
+++ b/src/test5.c
@@ -15,7 +15,7 @@
** is used for testing the SQLite routines for converting between
** the various supported unicode encodings.
**
-** $Id: test5.c,v 1.12 2004/06/23 13:46:32 danielk1977 Exp $
+** $Id: test5.c,v 1.13 2004/06/25 02:38:55 danielk1977 Exp $
*/
#include "sqliteInt.h"
#include "vdbeInt.h"
@@ -168,7 +168,7 @@ static int test_translate(
sqlite3ValueSetStr(pVal, -1, z, enc_from, xDel);
}
- z = sqlite3ValueText(pVal, enc_to);
+ z = (char *)sqlite3ValueText(pVal, enc_to);
len = sqlite3ValueBytes(pVal, enc_to) + (enc_to==SQLITE_UTF8?1:2);
Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(z, len));