persistentstorage/sql/SQLite364/pager.c
changeset 0 08ec8eefde2f
equal deleted inserted replaced
-1:000000000000 0:08ec8eefde2f
       
     1 /*
       
     2 ** 2001 September 15
       
     3 **
       
     4 ** The author disclaims copyright to this source code.  In place of
       
     5 ** a legal notice, here is a blessing:
       
     6 **
       
     7 **    May you do good and not evil.
       
     8 **    May you find forgiveness for yourself and forgive others.
       
     9 **    May you share freely, never taking more than you give.
       
    10 **
       
    11 *************************************************************************
       
    12 ** This is the implementation of the page cache subsystem or "pager".
       
    13 ** 
       
    14 ** The pager is used to access a database disk file.  It implements
       
    15 ** atomic commit and rollback through the use of a journal file that
       
    16 ** is separate from the database file.  The pager also implements file
       
    17 ** locking to prevent two processes from writing the same database
       
    18 ** file simultaneously, or one process from reading the database while
       
    19 ** another is writing.
       
    20 **
       
    21 ** @(#) $Id: pager.c,v 1.497 2008/10/07 11:51:20 danielk1977 Exp $
       
    22 */
       
    23 #ifndef SQLITE_OMIT_DISKIO
       
    24 #include "sqliteInt.h"
       
    25 
       
    26 /*
       
    27 ** Macros for troubleshooting.  Normally turned off
       
    28 */
       
    29 #if 0
       
    30 #define sqlite3DebugPrintf printf
       
    31 #define PAGERTRACE1(X)       sqlite3DebugPrintf(X)
       
    32 #define PAGERTRACE2(X,Y)     sqlite3DebugPrintf(X,Y)
       
    33 #define PAGERTRACE3(X,Y,Z)   sqlite3DebugPrintf(X,Y,Z)
       
    34 #define PAGERTRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W)
       
    35 #define PAGERTRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V)
       
    36 #else
       
    37 #define PAGERTRACE1(X)
       
    38 #define PAGERTRACE2(X,Y)
       
    39 #define PAGERTRACE3(X,Y,Z)
       
    40 #define PAGERTRACE4(X,Y,Z,W)
       
    41 #define PAGERTRACE5(X,Y,Z,W,V)
       
    42 #endif
       
    43 
       
    44 /*
       
    45 ** The following two macros are used within the PAGERTRACEX() macros above
       
    46 ** to print out file-descriptors. 
       
    47 **
       
    48 ** PAGERID() takes a pointer to a Pager struct as its argument. The
       
    49 ** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file
       
    50 ** struct as its argument.
       
    51 */
       
    52 #define PAGERID(p) ((int)(p->fd))
       
    53 #define FILEHANDLEID(fd) ((int)fd)
       
    54 
       
    55 /*
       
    56 ** The page cache as a whole is always in one of the following
       
    57 ** states:
       
    58 **
       
    59 **   PAGER_UNLOCK        The page cache is not currently reading or 
       
    60 **                       writing the database file.  There is no
       
    61 **                       data held in memory.  This is the initial
       
    62 **                       state.
       
    63 **
       
    64 **   PAGER_SHARED        The page cache is reading the database.
       
    65 **                       Writing is not permitted.  There can be
       
    66 **                       multiple readers accessing the same database
       
    67 **                       file at the same time.
       
    68 **
       
    69 **   PAGER_RESERVED      This process has reserved the database for writing
       
    70 **                       but has not yet made any changes.  Only one process
       
    71 **                       at a time can reserve the database.  The original
       
    72 **                       database file has not been modified so other
       
    73 **                       processes may still be reading the on-disk
       
    74 **                       database file.
       
    75 **
       
    76 **   PAGER_EXCLUSIVE     The page cache is writing the database.
       
    77 **                       Access is exclusive.  No other processes or
       
    78 **                       threads can be reading or writing while one
       
    79 **                       process is writing.
       
    80 **
       
    81 **   PAGER_SYNCED        The pager moves to this state from PAGER_EXCLUSIVE
       
    82 **                       after all dirty pages have been written to the
       
    83 **                       database file and the file has been synced to
       
    84 **                       disk. All that remains to do is to remove or
       
    85 **                       truncate the journal file and the transaction 
       
    86 **                       will be committed.
       
    87 **
       
    88 ** The page cache comes up in PAGER_UNLOCK.  The first time a
       
    89 ** sqlite3PagerGet() occurs, the state transitions to PAGER_SHARED.
       
    90 ** After all pages have been released using sqlite_page_unref(),
       
    91 ** the state transitions back to PAGER_UNLOCK.  The first time
       
    92 ** that sqlite3PagerWrite() is called, the state transitions to
       
    93 ** PAGER_RESERVED.  (Note that sqlite3PagerWrite() can only be
       
    94 ** called on an outstanding page which means that the pager must
       
    95 ** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
       
    96 ** PAGER_RESERVED means that there is an open rollback journal.
       
    97 ** The transition to PAGER_EXCLUSIVE occurs before any changes
       
    98 ** are made to the database file, though writes to the rollback
       
    99 ** journal occurs with just PAGER_RESERVED.  After an sqlite3PagerRollback()
       
   100 ** or sqlite3PagerCommitPhaseTwo(), the state can go back to PAGER_SHARED,
       
   101 ** or it can stay at PAGER_EXCLUSIVE if we are in exclusive access mode.
       
   102 */
       
   103 #define PAGER_UNLOCK      0
       
   104 #define PAGER_SHARED      1   /* same as SHARED_LOCK */
       
   105 #define PAGER_RESERVED    2   /* same as RESERVED_LOCK */
       
   106 #define PAGER_EXCLUSIVE   4   /* same as EXCLUSIVE_LOCK */
       
   107 #define PAGER_SYNCED      5
       
   108 
       
   109 /*
       
   110 ** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time,
       
   111 ** then failed attempts to get a reserved lock will invoke the busy callback.
       
   112 ** This is off by default.  To see why, consider the following scenario:
       
   113 ** 
       
   114 ** Suppose thread A already has a shared lock and wants a reserved lock.
       
   115 ** Thread B already has a reserved lock and wants an exclusive lock.  If
       
   116 ** both threads are using their busy callbacks, it might be a long time
       
   117 ** be for one of the threads give up and allows the other to proceed.
       
   118 ** But if the thread trying to get the reserved lock gives up quickly
       
   119 ** (if it never invokes its busy callback) then the contention will be
       
   120 ** resolved quickly.
       
   121 */
       
   122 #ifndef SQLITE_BUSY_RESERVED_LOCK
       
   123 # define SQLITE_BUSY_RESERVED_LOCK 0
       
   124 #endif
       
   125 
       
   126 /*
       
   127 ** This macro rounds values up so that if the value is an address it
       
   128 ** is guaranteed to be an address that is aligned to an 8-byte boundary.
       
   129 */
       
   130 #define FORCE_ALIGNMENT(X)   (((X)+7)&~7)
       
   131 
       
   132 /*
       
   133 ** A macro used for invoking the codec if there is one
       
   134 */
       
   135 #ifdef SQLITE_HAS_CODEC
       
   136 # define CODEC1(P,D,N,X) if( P->xCodec!=0 ){ P->xCodec(P->pCodecArg,D,N,X); }
       
   137 # define CODEC2(P,D,N,X) ((char*)(P->xCodec!=0?P->xCodec(P->pCodecArg,D,N,X):D))
       
   138 #else
       
   139 # define CODEC1(P,D,N,X) /* NO-OP */
       
   140 # define CODEC2(P,D,N,X) ((char*)D)
       
   141 #endif
       
   142 
       
   143 /*
       
   144 ** A open page cache is an instance of the following structure.
       
   145 **
       
   146 ** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, or
       
   147 ** or SQLITE_FULL. Once one of the first three errors occurs, it persists
       
   148 ** and is returned as the result of every major pager API call.  The
       
   149 ** SQLITE_FULL return code is slightly different. It persists only until the
       
   150 ** next successful rollback is performed on the pager cache. Also,
       
   151 ** SQLITE_FULL does not affect the sqlite3PagerGet() and sqlite3PagerLookup()
       
   152 ** APIs, they may still be used successfully.
       
   153 */
       
   154 struct Pager {
       
   155   sqlite3_vfs *pVfs;          /* OS functions to use for IO */
       
   156   u8 journalOpen;             /* True if journal file descriptors is valid */
       
   157   u8 journalStarted;          /* True if header of journal is synced */
       
   158   u8 useJournal;              /* Use a rollback journal on this file */
       
   159   u8 noReadlock;              /* Do not bother to obtain readlocks */
       
   160   u8 stmtOpen;                /* True if the statement subjournal is open */
       
   161   u8 stmtInUse;               /* True we are in a statement subtransaction */
       
   162   u8 stmtAutoopen;            /* Open stmt journal when main journal is opened*/
       
   163   u8 noSync;                  /* Do not sync the journal if true */
       
   164   u8 fullSync;                /* Do extra syncs of the journal for robustness */
       
   165   u8 sync_flags;              /* One of SYNC_NORMAL or SYNC_FULL */
       
   166   u8 state;                   /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */
       
   167   u8 tempFile;                /* zFilename is a temporary file */
       
   168   u8 readOnly;                /* True for a read-only database */
       
   169   u8 needSync;                /* True if an fsync() is needed on the journal */
       
   170   u8 dirtyCache;              /* True if cached pages have changed */
       
   171   u8 alwaysRollback;          /* Disable DontRollback() for all pages */
       
   172   u8 memDb;                   /* True to inhibit all file I/O */
       
   173   u8 setMaster;               /* True if a m-j name has been written to jrnl */
       
   174   u8 doNotSync;               /* Boolean. While true, do not spill the cache */
       
   175   u8 exclusiveMode;           /* Boolean. True if locking_mode==EXCLUSIVE */
       
   176   u8 journalMode;             /* On of the PAGER_JOURNALMODE_* values */
       
   177   u8 dbModified;              /* True if there are any changes to the Db */
       
   178   u8 changeCountDone;         /* Set after incrementing the change-counter */
       
   179   u32 vfsFlags;               /* Flags for sqlite3_vfs.xOpen() */
       
   180   int errCode;                /* One of several kinds of errors */
       
   181   int dbSize;                 /* Number of pages in the file */
       
   182   int origDbSize;             /* dbSize before the current change */
       
   183   int stmtSize;               /* Size of database (in pages) at stmt_begin() */
       
   184   int nRec;                   /* Number of pages written to the journal */
       
   185   u32 cksumInit;              /* Quasi-random value added to every checksum */
       
   186   int stmtNRec;               /* Number of records in stmt subjournal */
       
   187   int nExtra;                 /* Add this many bytes to each in-memory page */
       
   188   int pageSize;               /* Number of bytes in a page */
       
   189   int nPage;                  /* Total number of in-memory pages */
       
   190   int mxPage;                 /* Maximum number of pages to hold in cache */
       
   191   Pgno mxPgno;                /* Maximum allowed size of the database */
       
   192   Bitvec *pInJournal;         /* One bit for each page in the database file */
       
   193   Bitvec *pInStmt;            /* One bit for each page in the database */
       
   194   Bitvec *pAlwaysRollback;    /* One bit for each page marked always-rollback */
       
   195   char *zFilename;            /* Name of the database file */
       
   196   char *zJournal;             /* Name of the journal file */
       
   197   char *zDirectory;           /* Directory hold database and journal files */
       
   198   sqlite3_file *fd, *jfd;     /* File descriptors for database and journal */
       
   199   sqlite3_file *stfd;         /* File descriptor for the statement subjournal*/
       
   200   BusyHandler *pBusyHandler;  /* Pointer to sqlite.busyHandler */
       
   201   i64 journalOff;             /* Current byte offset in the journal file */
       
   202   i64 journalHdr;             /* Byte offset to previous journal header */
       
   203   i64 stmtHdrOff;             /* First journal header written this statement */
       
   204   i64 stmtCksum;              /* cksumInit when statement was started */
       
   205   i64 stmtJSize;              /* Size of journal at stmt_begin() */
       
   206   u32 sectorSize;             /* Assumed sector size during rollback */
       
   207 #ifdef SQLITE_TEST
       
   208   int nHit, nMiss;            /* Cache hits and missing */
       
   209   int nRead, nWrite;          /* Database pages read/written */
       
   210 #endif
       
   211   void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */
       
   212 #ifdef SQLITE_HAS_CODEC
       
   213   void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
       
   214   void *pCodecArg;            /* First argument to xCodec() */
       
   215 #endif
       
   216   char *pTmpSpace;            /* Pager.pageSize bytes of space for tmp use */
       
   217   char dbFileVers[16];        /* Changes whenever database file changes */
       
   218   i64 journalSizeLimit;       /* Size limit for persistent journal files */
       
   219   PCache *pPCache;            /* Pointer to page cache object */
       
   220 };
       
   221 
       
   222 /*
       
   223 ** The following global variables hold counters used for
       
   224 ** testing purposes only.  These variables do not exist in
       
   225 ** a non-testing build.  These variables are not thread-safe.
       
   226 */
       
   227 #ifdef SQLITE_TEST
       
   228 int sqlite3_pager_readdb_count = 0;    /* Number of full pages read from DB */
       
   229 int sqlite3_pager_writedb_count = 0;   /* Number of full pages written to DB */
       
   230 int sqlite3_pager_writej_count = 0;    /* Number of pages written to journal */
       
   231 # define PAGER_INCR(v)  v++
       
   232 #else
       
   233 # define PAGER_INCR(v)
       
   234 #endif
       
   235 
       
   236 
       
   237 
       
   238 /*
       
   239 ** Journal files begin with the following magic string.  The data
       
   240 ** was obtained from /dev/random.  It is used only as a sanity check.
       
   241 **
       
   242 ** Since version 2.8.0, the journal format contains additional sanity
       
   243 ** checking information.  If the power fails while the journal is begin
       
   244 ** written, semi-random garbage data might appear in the journal
       
   245 ** file after power is restored.  If an attempt is then made
       
   246 ** to roll the journal back, the database could be corrupted.  The additional
       
   247 ** sanity checking data is an attempt to discover the garbage in the
       
   248 ** journal and ignore it.
       
   249 **
       
   250 ** The sanity checking information for the new journal format consists
       
   251 ** of a 32-bit checksum on each page of data.  The checksum covers both
       
   252 ** the page number and the pPager->pageSize bytes of data for the page.
       
   253 ** This cksum is initialized to a 32-bit random value that appears in the
       
   254 ** journal file right after the header.  The random initializer is important,
       
   255 ** because garbage data that appears at the end of a journal is likely
       
   256 ** data that was once in other files that have now been deleted.  If the
       
   257 ** garbage data came from an obsolete journal file, the checksums might
       
   258 ** be correct.  But by initializing the checksum to random value which
       
   259 ** is different for every journal, we minimize that risk.
       
   260 */
       
   261 static const unsigned char aJournalMagic[] = {
       
   262   0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
       
   263 };
       
   264 
       
   265 /*
       
   266 ** The size of the header and of each page in the journal is determined
       
   267 ** by the following macros.
       
   268 */
       
   269 #define JOURNAL_PG_SZ(pPager)  ((pPager->pageSize) + 8)
       
   270 
       
   271 /*
       
   272 ** The journal header size for this pager. In the future, this could be
       
   273 ** set to some value read from the disk controller. The important
       
   274 ** characteristic is that it is the same size as a disk sector.
       
   275 */
       
   276 #define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
       
   277 
       
   278 /*
       
   279 ** The macro MEMDB is true if we are dealing with an in-memory database.
       
   280 ** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
       
   281 ** the value of MEMDB will be a constant and the compiler will optimize
       
   282 ** out code that would never execute.
       
   283 */
       
   284 #ifdef SQLITE_OMIT_MEMORYDB
       
   285 # define MEMDB 0
       
   286 #else
       
   287 # define MEMDB pPager->memDb
       
   288 #endif
       
   289 
       
   290 /*
       
   291 ** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is
       
   292 ** reserved for working around a windows/posix incompatibility). It is
       
   293 ** used in the journal to signify that the remainder of the journal file 
       
   294 ** is devoted to storing a master journal name - there are no more pages to
       
   295 ** roll back. See comments for function writeMasterJournal() for details.
       
   296 */
       
   297 /* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */
       
   298 #define PAGER_MJ_PGNO(x) ((PENDING_BYTE/((x)->pageSize))+1)
       
   299 
       
   300 /*
       
   301 ** The maximum legal page number is (2^31 - 1).
       
   302 */
       
   303 #define PAGER_MAX_PGNO 2147483647
       
   304 
       
   305 /*
       
   306 ** Return true if page *pPg has already been written to the statement
       
   307 ** journal (or statement snapshot has been created, if *pPg is part
       
   308 ** of an in-memory database).
       
   309 */
       
   310 static int pageInStatement(PgHdr *pPg){
       
   311   Pager *pPager = pPg->pPager;
       
   312   if( MEMDB ){
       
   313     return pPg->apSave[1]!=0;
       
   314   }else{
       
   315     return sqlite3BitvecTest(pPager->pInStmt, pPg->pgno);
       
   316   }
       
   317 }
       
   318 
       
   319 /*
       
   320 ** Read a 32-bit integer from the given file descriptor.  Store the integer
       
   321 ** that is read in *pRes.  Return SQLITE_OK if everything worked, or an
       
   322 ** error code is something goes wrong.
       
   323 **
       
   324 ** All values are stored on disk as big-endian.
       
   325 */
       
   326 static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){
       
   327   unsigned char ac[4];
       
   328   int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset);
       
   329   if( rc==SQLITE_OK ){
       
   330     *pRes = sqlite3Get4byte(ac);
       
   331   }
       
   332   return rc;
       
   333 }
       
   334 
       
   335 /*
       
   336 ** Write a 32-bit integer into a string buffer in big-endian byte order.
       
   337 */
       
   338 #define put32bits(A,B)  sqlite3Put4byte((u8*)A,B)
       
   339 
       
   340 /*
       
   341 ** Write a 32-bit integer into the given file descriptor.  Return SQLITE_OK
       
   342 ** on success or an error code is something goes wrong.
       
   343 */
       
   344 static int write32bits(sqlite3_file *fd, i64 offset, u32 val){
       
   345   char ac[4];
       
   346   put32bits(ac, val);
       
   347   return sqlite3OsWrite(fd, ac, 4, offset);
       
   348 }
       
   349 
       
   350 /*
       
   351 ** If file pFd is open, call sqlite3OsUnlock() on it.
       
   352 */
       
   353 static int osUnlock(sqlite3_file *pFd, int eLock){
       
   354   if( !pFd->pMethods ){
       
   355     return SQLITE_OK;
       
   356   }
       
   357   return sqlite3OsUnlock(pFd, eLock);
       
   358 }
       
   359 
       
   360 /*
       
   361 ** This function determines whether or not the atomic-write optimization
       
   362 ** can be used with this pager. The optimization can be used if:
       
   363 **
       
   364 **  (a) the value returned by OsDeviceCharacteristics() indicates that
       
   365 **      a database page may be written atomically, and
       
   366 **  (b) the value returned by OsSectorSize() is less than or equal
       
   367 **      to the page size.
       
   368 **
       
   369 ** If the optimization cannot be used, 0 is returned. If it can be used,
       
   370 ** then the value returned is the size of the journal file when it
       
   371 ** contains rollback data for exactly one page.
       
   372 */
       
   373 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
       
   374 static int jrnlBufferSize(Pager *pPager){
       
   375   int dc;           /* Device characteristics */
       
   376   int nSector;      /* Sector size */
       
   377   int szPage;        /* Page size */
       
   378   sqlite3_file *fd = pPager->fd;
       
   379 
       
   380   if( fd->pMethods ){
       
   381     dc = sqlite3OsDeviceCharacteristics(fd);
       
   382     nSector = sqlite3OsSectorSize(fd);
       
   383     szPage = pPager->pageSize;
       
   384   }
       
   385 
       
   386   assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
       
   387   assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
       
   388 
       
   389   if( !fd->pMethods || 
       
   390        (dc & (SQLITE_IOCAP_ATOMIC|(szPage>>8)) && nSector<=szPage) ){
       
   391     return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager);
       
   392   }
       
   393   return 0;
       
   394 }
       
   395 #endif
       
   396 
       
   397 /*
       
   398 ** This function should be called when an error occurs within the pager
       
   399 ** code. The first argument is a pointer to the pager structure, the
       
   400 ** second the error-code about to be returned by a pager API function. 
       
   401 ** The value returned is a copy of the second argument to this function. 
       
   402 **
       
   403 ** If the second argument is SQLITE_IOERR, SQLITE_CORRUPT, or SQLITE_FULL
       
   404 ** the error becomes persistent. Until the persisten error is cleared,
       
   405 ** subsequent API calls on this Pager will immediately return the same 
       
   406 ** error code.
       
   407 **
       
   408 ** A persistent error indicates that the contents of the pager-cache 
       
   409 ** cannot be trusted. This state can be cleared by completely discarding 
       
   410 ** the contents of the pager-cache. If a transaction was active when
       
   411 ** the persistent error occured, then the rollback journal may need
       
   412 ** to be replayed.
       
   413 */
       
   414 static void pager_unlock(Pager *pPager);
       
   415 static int pager_error(Pager *pPager, int rc){
       
   416   int rc2 = rc & 0xff;
       
   417   assert(
       
   418        pPager->errCode==SQLITE_FULL ||
       
   419        pPager->errCode==SQLITE_OK ||
       
   420        (pPager->errCode & 0xff)==SQLITE_IOERR
       
   421   );
       
   422   if(
       
   423     rc2==SQLITE_FULL ||
       
   424     rc2==SQLITE_IOERR ||
       
   425     rc2==SQLITE_CORRUPT
       
   426   ){
       
   427     pPager->errCode = rc;
       
   428     if( pPager->state==PAGER_UNLOCK 
       
   429      && sqlite3PcacheRefCount(pPager->pPCache)==0 
       
   430     ){
       
   431       /* If the pager is already unlocked, call pager_unlock() now to
       
   432       ** clear the error state and ensure that the pager-cache is 
       
   433       ** completely empty.
       
   434       */
       
   435       pager_unlock(pPager);
       
   436     }
       
   437   }
       
   438   return rc;
       
   439 }
       
   440 
       
   441 /*
       
   442 ** If SQLITE_CHECK_PAGES is defined then we do some sanity checking
       
   443 ** on the cache using a hash function.  This is used for testing
       
   444 ** and debugging only.
       
   445 */
       
   446 #ifdef SQLITE_CHECK_PAGES
       
   447 /*
       
   448 ** Return a 32-bit hash of the page data for pPage.
       
   449 */
       
   450 static u32 pager_datahash(int nByte, unsigned char *pData){
       
   451   u32 hash = 0;
       
   452   int i;
       
   453   for(i=0; i<nByte; i++){
       
   454     hash = (hash*1039) + pData[i];
       
   455   }
       
   456   return hash;
       
   457 }
       
   458 static u32 pager_pagehash(PgHdr *pPage){
       
   459   return pager_datahash(pPage->pPager->pageSize, (unsigned char *)pPage->pData);
       
   460 }
       
   461 static u32 pager_set_pagehash(PgHdr *pPage){
       
   462   pPage->pageHash = pager_pagehash(pPage);
       
   463 }
       
   464 
       
   465 /*
       
   466 ** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
       
   467 ** is defined, and NDEBUG is not defined, an assert() statement checks
       
   468 ** that the page is either dirty or still matches the calculated page-hash.
       
   469 */
       
   470 #define CHECK_PAGE(x) checkPage(x)
       
   471 static void checkPage(PgHdr *pPg){
       
   472   Pager *pPager = pPg->pPager;
       
   473   assert( !pPg->pageHash || pPager->errCode || MEMDB 
       
   474       || (pPg->flags&PGHDR_DIRTY) || pPg->pageHash==pager_pagehash(pPg) );
       
   475 }
       
   476 
       
   477 #else
       
   478 #define pager_datahash(X,Y)  0
       
   479 #define pager_pagehash(X)  0
       
   480 #define CHECK_PAGE(x)
       
   481 #endif  /* SQLITE_CHECK_PAGES */
       
   482 
       
   483 /*
       
   484 ** When this is called the journal file for pager pPager must be open.
       
   485 ** The master journal file name is read from the end of the file and 
       
   486 ** written into memory supplied by the caller. 
       
   487 **
       
   488 ** zMaster must point to a buffer of at least nMaster bytes allocated by
       
   489 ** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is
       
   490 ** enough space to write the master journal name). If the master journal
       
   491 ** name in the journal is longer than nMaster bytes (including a
       
   492 ** nul-terminator), then this is handled as if no master journal name
       
   493 ** were present in the journal.
       
   494 **
       
   495 ** If no master journal file name is present zMaster[0] is set to 0 and
       
   496 ** SQLITE_OK returned.
       
   497 */
       
   498 static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, int nMaster){
       
   499   int rc;
       
   500   u32 len;
       
   501   i64 szJ;
       
   502   u32 cksum;
       
   503   u32 u;                   /* Unsigned loop counter */
       
   504   unsigned char aMagic[8]; /* A buffer to hold the magic header */
       
   505 
       
   506   zMaster[0] = '\0';
       
   507 
       
   508   rc = sqlite3OsFileSize(pJrnl, &szJ);
       
   509   if( rc!=SQLITE_OK || szJ<16 ) return rc;
       
   510 
       
   511   rc = read32bits(pJrnl, szJ-16, &len);
       
   512   if( rc!=SQLITE_OK ) return rc;
       
   513 
       
   514   if( len>=nMaster ){
       
   515     return SQLITE_OK;
       
   516   }
       
   517 
       
   518   rc = read32bits(pJrnl, szJ-12, &cksum);
       
   519   if( rc!=SQLITE_OK ) return rc;
       
   520 
       
   521   rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8);
       
   522   if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc;
       
   523 
       
   524   rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len);
       
   525   if( rc!=SQLITE_OK ){
       
   526     return rc;
       
   527   }
       
   528   zMaster[len] = '\0';
       
   529 
       
   530   /* See if the checksum matches the master journal name */
       
   531   for(u=0; u<len; u++){
       
   532     cksum -= zMaster[u];
       
   533    }
       
   534   if( cksum ){
       
   535     /* If the checksum doesn't add up, then one or more of the disk sectors
       
   536     ** containing the master journal filename is corrupted. This means
       
   537     ** definitely roll back, so just return SQLITE_OK and report a (nul)
       
   538     ** master-journal filename.
       
   539     */
       
   540     zMaster[0] = '\0';
       
   541   }
       
   542    
       
   543   return SQLITE_OK;
       
   544 }
       
   545 
       
   546 /*
       
   547 ** Seek the journal file descriptor to the next sector boundary where a
       
   548 ** journal header may be read or written. Pager.journalOff is updated with
       
   549 ** the new seek offset.
       
   550 **
       
   551 ** i.e for a sector size of 512:
       
   552 **
       
   553 ** Input Offset              Output Offset
       
   554 ** ---------------------------------------
       
   555 ** 0                         0
       
   556 ** 512                       512
       
   557 ** 100                       512
       
   558 ** 2000                      2048
       
   559 ** 
       
   560 */
       
   561 static i64 journalHdrOffset(Pager *pPager){
       
   562   i64 offset = 0;
       
   563   i64 c = pPager->journalOff;
       
   564   if( c ){
       
   565     offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
       
   566   }
       
   567   assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
       
   568   assert( offset>=c );
       
   569   assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
       
   570   return offset;
       
   571 }
       
   572 static void seekJournalHdr(Pager *pPager){
       
   573   pPager->journalOff = journalHdrOffset(pPager);
       
   574 }
       
   575 
       
   576 /*
       
   577 ** Write zeros over the header of the journal file.  This has the
       
   578 ** effect of invalidating the journal file and committing the
       
   579 ** transaction.
       
   580 */
       
   581 static int zeroJournalHdr(Pager *pPager, int doTruncate){
       
   582   int rc = SQLITE_OK;
       
   583   static const char zeroHdr[28] = {0};
       
   584 
       
   585   if( pPager->journalOff ){
       
   586     i64 iLimit = pPager->journalSizeLimit;
       
   587 
       
   588     IOTRACE(("JZEROHDR %p\n", pPager))
       
   589     if( doTruncate || iLimit==0 ){
       
   590       rc = sqlite3OsTruncate(pPager->jfd, 0);
       
   591     }else{
       
   592       rc = sqlite3OsWrite(pPager->jfd, zeroHdr, sizeof(zeroHdr), 0);
       
   593     }
       
   594     if( rc==SQLITE_OK && !pPager->noSync ){
       
   595       rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_DATAONLY|pPager->sync_flags);
       
   596     }
       
   597 
       
   598     /* At this point the transaction is committed but the write lock 
       
   599     ** is still held on the file. If there is a size limit configured for 
       
   600     ** the persistent journal and the journal file currently consumes more
       
   601     ** space than that limit allows for, truncate it now. There is no need
       
   602     ** to sync the file following this operation.
       
   603     */
       
   604     if( rc==SQLITE_OK && iLimit>0 ){
       
   605       i64 sz;
       
   606       rc = sqlite3OsFileSize(pPager->jfd, &sz);
       
   607       if( rc==SQLITE_OK && sz>iLimit ){
       
   608         rc = sqlite3OsTruncate(pPager->jfd, iLimit);
       
   609       }
       
   610     }
       
   611   }
       
   612   return rc;
       
   613 }
       
   614 
       
   615 /*
       
   616 ** The journal file must be open when this routine is called. A journal
       
   617 ** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
       
   618 ** current location.
       
   619 **
       
   620 ** The format for the journal header is as follows:
       
   621 ** - 8 bytes: Magic identifying journal format.
       
   622 ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
       
   623 ** - 4 bytes: Random number used for page hash.
       
   624 ** - 4 bytes: Initial database page count.
       
   625 ** - 4 bytes: Sector size used by the process that wrote this journal.
       
   626 ** - 4 bytes: Database page size.
       
   627 ** 
       
   628 ** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space.
       
   629 */
       
   630 static int writeJournalHdr(Pager *pPager){
       
   631   int rc = SQLITE_OK;
       
   632   char *zHeader = pPager->pTmpSpace;
       
   633   int nHeader = pPager->pageSize;
       
   634   int nWrite;
       
   635 
       
   636   if( nHeader>JOURNAL_HDR_SZ(pPager) ){
       
   637     nHeader = JOURNAL_HDR_SZ(pPager);
       
   638   }
       
   639 
       
   640   if( pPager->stmtHdrOff==0 ){
       
   641     pPager->stmtHdrOff = pPager->journalOff;
       
   642   }
       
   643 
       
   644   seekJournalHdr(pPager);
       
   645   pPager->journalHdr = pPager->journalOff;
       
   646 
       
   647   memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
       
   648 
       
   649   /* 
       
   650   ** Write the nRec Field - the number of page records that follow this
       
   651   ** journal header. Normally, zero is written to this value at this time.
       
   652   ** After the records are added to the journal (and the journal synced, 
       
   653   ** if in full-sync mode), the zero is overwritten with the true number
       
   654   ** of records (see syncJournal()).
       
   655   **
       
   656   ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When
       
   657   ** reading the journal this value tells SQLite to assume that the
       
   658   ** rest of the journal file contains valid page records. This assumption
       
   659   ** is dangerous, as if a failure occured whilst writing to the journal
       
   660   ** file it may contain some garbage data. There are two scenarios
       
   661   ** where this risk can be ignored:
       
   662   **
       
   663   **   * When the pager is in no-sync mode. Corruption can follow a
       
   664   **     power failure in this case anyway.
       
   665   **
       
   666   **   * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees
       
   667   **     that garbage data is never appended to the journal file.
       
   668   */
       
   669   assert(pPager->fd->pMethods||pPager->noSync);
       
   670   if( (pPager->noSync) 
       
   671    || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) 
       
   672   ){
       
   673     put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff);
       
   674   }else{
       
   675     put32bits(&zHeader[sizeof(aJournalMagic)], 0);
       
   676   }
       
   677 
       
   678   /* The random check-hash initialiser */ 
       
   679   sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
       
   680   put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
       
   681   /* The initial database size */
       
   682   put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbSize);
       
   683   /* The assumed sector size for this process */
       
   684   put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
       
   685   if( pPager->journalHdr==0 ){
       
   686     /* The page size */
       
   687     put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize);
       
   688   }
       
   689 
       
   690   for(nWrite=0; rc==SQLITE_OK&&nWrite<JOURNAL_HDR_SZ(pPager); nWrite+=nHeader){
       
   691     IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, nHeader))
       
   692     rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff);
       
   693     pPager->journalOff += nHeader;
       
   694   }
       
   695 
       
   696   return rc;
       
   697 }
       
   698 
       
   699 /*
       
   700 ** The journal file must be open when this is called. A journal header file
       
   701 ** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
       
   702 ** file. See comments above function writeJournalHdr() for a description of
       
   703 ** the journal header format.
       
   704 **
       
   705 ** If the header is read successfully, *nRec is set to the number of
       
   706 ** page records following this header and *dbSize is set to the size of the
       
   707 ** database before the transaction began, in pages. Also, pPager->cksumInit
       
   708 ** is set to the value read from the journal header. SQLITE_OK is returned
       
   709 ** in this case.
       
   710 **
       
   711 ** If the journal header file appears to be corrupted, SQLITE_DONE is
       
   712 ** returned and *nRec and *dbSize are not set.  If JOURNAL_HDR_SZ bytes
       
   713 ** cannot be read from the journal file an error code is returned.
       
   714 */
       
   715 static int readJournalHdr(
       
   716   Pager *pPager, 
       
   717   i64 journalSize,
       
   718   u32 *pNRec, 
       
   719   u32 *pDbSize
       
   720 ){
       
   721   int rc;
       
   722   unsigned char aMagic[8]; /* A buffer to hold the magic header */
       
   723   i64 jrnlOff;
       
   724   int iPageSize;
       
   725 
       
   726   seekJournalHdr(pPager);
       
   727   if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
       
   728     return SQLITE_DONE;
       
   729   }
       
   730   jrnlOff = pPager->journalOff;
       
   731 
       
   732   rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), jrnlOff);
       
   733   if( rc ) return rc;
       
   734   jrnlOff += sizeof(aMagic);
       
   735 
       
   736   if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
       
   737     return SQLITE_DONE;
       
   738   }
       
   739 
       
   740   rc = read32bits(pPager->jfd, jrnlOff, pNRec);
       
   741   if( rc ) return rc;
       
   742 
       
   743   rc = read32bits(pPager->jfd, jrnlOff+4, &pPager->cksumInit);
       
   744   if( rc ) return rc;
       
   745 
       
   746   rc = read32bits(pPager->jfd, jrnlOff+8, pDbSize);
       
   747   if( rc ) return rc;
       
   748 
       
   749   rc = read32bits(pPager->jfd, jrnlOff+16, (u32 *)&iPageSize);
       
   750   if( rc==SQLITE_OK 
       
   751    && iPageSize>=512 
       
   752    && iPageSize<=SQLITE_MAX_PAGE_SIZE 
       
   753    && ((iPageSize-1)&iPageSize)==0 
       
   754   ){
       
   755     u16 pagesize = iPageSize;
       
   756     rc = sqlite3PagerSetPagesize(pPager, &pagesize);
       
   757   }
       
   758   if( rc ) return rc;
       
   759 
       
   760   /* Update the assumed sector-size to match the value used by 
       
   761   ** the process that created this journal. If this journal was
       
   762   ** created by a process other than this one, then this routine
       
   763   ** is being called from within pager_playback(). The local value
       
   764   ** of Pager.sectorSize is restored at the end of that routine.
       
   765   */
       
   766   rc = read32bits(pPager->jfd, jrnlOff+12, &pPager->sectorSize);
       
   767   if( rc ) return rc;
       
   768   if( (pPager->sectorSize & (pPager->sectorSize-1))!=0
       
   769         || pPager->sectorSize>0x1000000 ){
       
   770     return SQLITE_DONE;
       
   771   }
       
   772 
       
   773   pPager->journalOff += JOURNAL_HDR_SZ(pPager);
       
   774   return SQLITE_OK;
       
   775 }
       
   776 
       
   777 
       
   778 /*
       
   779 ** Write the supplied master journal name into the journal file for pager
       
   780 ** pPager at the current location. The master journal name must be the last
       
   781 ** thing written to a journal file. If the pager is in full-sync mode, the
       
   782 ** journal file descriptor is advanced to the next sector boundary before
       
   783 ** anything is written. The format is:
       
   784 **
       
   785 ** + 4 bytes: PAGER_MJ_PGNO.
       
   786 ** + N bytes: length of master journal name.
       
   787 ** + 4 bytes: N
       
   788 ** + 4 bytes: Master journal name checksum.
       
   789 ** + 8 bytes: aJournalMagic[].
       
   790 **
       
   791 ** The master journal page checksum is the sum of the bytes in the master
       
   792 ** journal name.
       
   793 **
       
   794 ** If zMaster is a NULL pointer (occurs for a single database transaction), 
       
   795 ** this call is a no-op.
       
   796 */
       
   797 static int writeMasterJournal(Pager *pPager, const char *zMaster){
       
   798   int rc;
       
   799   int len; 
       
   800   int i; 
       
   801   i64 jrnlOff;
       
   802   i64 jrnlSize;
       
   803   u32 cksum = 0;
       
   804   char zBuf[sizeof(aJournalMagic)+2*4];
       
   805 
       
   806   if( !zMaster || pPager->setMaster) return SQLITE_OK;
       
   807   pPager->setMaster = 1;
       
   808 
       
   809   len = strlen(zMaster);
       
   810   for(i=0; i<len; i++){
       
   811     cksum += zMaster[i];
       
   812   }
       
   813 
       
   814   /* If in full-sync mode, advance to the next disk sector before writing
       
   815   ** the master journal name. This is in case the previous page written to
       
   816   ** the journal has already been synced.
       
   817   */
       
   818   if( pPager->fullSync ){
       
   819     seekJournalHdr(pPager);
       
   820   }
       
   821   jrnlOff = pPager->journalOff;
       
   822   pPager->journalOff += (len+20);
       
   823 
       
   824   rc = write32bits(pPager->jfd, jrnlOff, PAGER_MJ_PGNO(pPager));
       
   825   if( rc!=SQLITE_OK ) return rc;
       
   826   jrnlOff += 4;
       
   827 
       
   828   rc = sqlite3OsWrite(pPager->jfd, zMaster, len, jrnlOff);
       
   829   if( rc!=SQLITE_OK ) return rc;
       
   830   jrnlOff += len;
       
   831 
       
   832   put32bits(zBuf, len);
       
   833   put32bits(&zBuf[4], cksum);
       
   834   memcpy(&zBuf[8], aJournalMagic, sizeof(aJournalMagic));
       
   835   rc = sqlite3OsWrite(pPager->jfd, zBuf, 8+sizeof(aJournalMagic), jrnlOff);
       
   836   jrnlOff += 8+sizeof(aJournalMagic);
       
   837   pPager->needSync = !pPager->noSync;
       
   838 
       
   839   /* If the pager is in peristent-journal mode, then the physical 
       
   840   ** journal-file may extend past the end of the master-journal name
       
   841   ** and 8 bytes of magic data just written to the file. This is 
       
   842   ** dangerous because the code to rollback a hot-journal file
       
   843   ** will not be able to find the master-journal name to determine 
       
   844   ** whether or not the journal is hot. 
       
   845   **
       
   846   ** Easiest thing to do in this scenario is to truncate the journal 
       
   847   ** file to the required size.
       
   848   */ 
       
   849   if( (rc==SQLITE_OK)
       
   850    && (rc = sqlite3OsFileSize(pPager->jfd, &jrnlSize))==SQLITE_OK
       
   851    && jrnlSize>jrnlOff
       
   852   ){
       
   853     rc = sqlite3OsTruncate(pPager->jfd, jrnlOff);
       
   854   }
       
   855   return rc;
       
   856 }
       
   857 
       
   858 /*
       
   859 ** Find a page in the hash table given its page number.  Return
       
   860 ** a pointer to the page or NULL if not found.
       
   861 */
       
   862 static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
       
   863   PgHdr *p;
       
   864   sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &p);
       
   865   return p;
       
   866 }
       
   867 
       
   868 /*
       
   869 ** Clear the in-memory cache.  This routine
       
   870 ** sets the state of the pager back to what it was when it was first
       
   871 ** opened.  Any outstanding pages are invalidated and subsequent attempts
       
   872 ** to access those pages will likely result in a coredump.
       
   873 */
       
   874 static void pager_reset(Pager *pPager){
       
   875   if( pPager->errCode ) return;
       
   876   sqlite3PcacheClear(pPager->pPCache);
       
   877 }
       
   878 
       
   879 /*
       
   880 ** Unlock the database file. 
       
   881 **
       
   882 ** If the pager is currently in error state, discard the contents of 
       
   883 ** the cache and reset the Pager structure internal state. If there is
       
   884 ** an open journal-file, then the next time a shared-lock is obtained
       
   885 ** on the pager file (by this or any other process), it will be
       
   886 ** treated as a hot-journal and rolled back.
       
   887 */
       
   888 static void pager_unlock(Pager *pPager){
       
   889   if( !pPager->exclusiveMode ){
       
   890     if( !MEMDB ){
       
   891       int rc = osUnlock(pPager->fd, NO_LOCK);
       
   892       if( rc ) pPager->errCode = rc;
       
   893       pPager->dbSize = -1;
       
   894       IOTRACE(("UNLOCK %p\n", pPager))
       
   895 
       
   896       /* Always close the journal file when dropping the database lock.
       
   897       ** Otherwise, another connection with journal_mode=delete might
       
   898       ** delete the file out from under us.
       
   899       */
       
   900       if( pPager->journalOpen ){
       
   901         sqlite3OsClose(pPager->jfd);
       
   902         pPager->journalOpen = 0;
       
   903         sqlite3BitvecDestroy(pPager->pInJournal);
       
   904         pPager->pInJournal = 0;
       
   905         sqlite3BitvecDestroy(pPager->pAlwaysRollback);
       
   906         pPager->pAlwaysRollback = 0;
       
   907       }
       
   908 
       
   909       /* If Pager.errCode is set, the contents of the pager cache cannot be
       
   910       ** trusted. Now that the pager file is unlocked, the contents of the
       
   911       ** cache can be discarded and the error code safely cleared.
       
   912       */
       
   913       if( pPager->errCode ){
       
   914         if( rc==SQLITE_OK ) pPager->errCode = SQLITE_OK;
       
   915         pager_reset(pPager);
       
   916         if( pPager->stmtOpen ){
       
   917           sqlite3OsClose(pPager->stfd);
       
   918           sqlite3BitvecDestroy(pPager->pInStmt);
       
   919           pPager->pInStmt = 0;
       
   920         }
       
   921         pPager->stmtOpen = 0;
       
   922         pPager->stmtInUse = 0;
       
   923         pPager->journalOff = 0;
       
   924         pPager->journalStarted = 0;
       
   925         pPager->stmtAutoopen = 0;
       
   926         pPager->origDbSize = 0;
       
   927       }
       
   928     }
       
   929 
       
   930     if( !MEMDB || pPager->errCode==SQLITE_OK ){
       
   931       pPager->state = PAGER_UNLOCK;
       
   932       pPager->changeCountDone = 0;
       
   933     }
       
   934   }
       
   935 }
       
   936 
       
   937 /*
       
   938 ** Execute a rollback if a transaction is active and unlock the 
       
   939 ** database file. If the pager has already entered the error state, 
       
   940 ** do not attempt the rollback.
       
   941 */
       
   942 static void pagerUnlockAndRollback(Pager *p){
       
   943   if( p->errCode==SQLITE_OK && p->state>=PAGER_RESERVED ){
       
   944     sqlite3BeginBenignMalloc();
       
   945     sqlite3PagerRollback(p);
       
   946     sqlite3EndBenignMalloc();
       
   947   }
       
   948   pager_unlock(p);
       
   949 }
       
   950 
       
   951 /*
       
   952 ** This routine ends a transaction.  A transaction is ended by either
       
   953 ** a COMMIT or a ROLLBACK.
       
   954 **
       
   955 ** When this routine is called, the pager has the journal file open and
       
   956 ** a RESERVED or EXCLUSIVE lock on the database.  This routine will release
       
   957 ** the database lock and acquires a SHARED lock in its place if that is
       
   958 ** the appropriate thing to do.  Release locks usually is appropriate,
       
   959 ** unless we are in exclusive access mode or unless this is a 
       
   960 ** COMMIT AND BEGIN or ROLLBACK AND BEGIN operation.
       
   961 **
       
   962 ** The journal file is either deleted or truncated.
       
   963 **
       
   964 ** TODO: Consider keeping the journal file open for temporary databases.
       
   965 ** This might give a performance improvement on windows where opening
       
   966 ** a file is an expensive operation.
       
   967 */
       
   968 static int pager_end_transaction(Pager *pPager, int hasMaster){
       
   969   int rc = SQLITE_OK;
       
   970   int rc2 = SQLITE_OK;
       
   971   assert( !MEMDB );
       
   972   if( pPager->state<PAGER_RESERVED ){
       
   973     return SQLITE_OK;
       
   974   }
       
   975   sqlite3PagerStmtCommit(pPager);
       
   976   if( pPager->stmtOpen && !pPager->exclusiveMode ){
       
   977     sqlite3OsClose(pPager->stfd);
       
   978     pPager->stmtOpen = 0;
       
   979   }
       
   980   if( pPager->journalOpen ){
       
   981     if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE
       
   982          && (rc = sqlite3OsTruncate(pPager->jfd, 0))==SQLITE_OK ){
       
   983       pPager->journalOff = 0;
       
   984       pPager->journalStarted = 0;
       
   985     }else if( pPager->exclusiveMode 
       
   986      || pPager->journalMode==PAGER_JOURNALMODE_PERSIST
       
   987     ){
       
   988       rc = zeroJournalHdr(pPager, hasMaster);
       
   989       pager_error(pPager, rc);
       
   990       pPager->journalOff = 0;
       
   991       pPager->journalStarted = 0;
       
   992     }else{
       
   993       assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE || rc );
       
   994       sqlite3OsClose(pPager->jfd);
       
   995       pPager->journalOpen = 0;
       
   996       if( rc==SQLITE_OK && !pPager->tempFile ){
       
   997         rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
       
   998       }
       
   999     }
       
  1000     sqlite3BitvecDestroy(pPager->pInJournal);
       
  1001     pPager->pInJournal = 0;
       
  1002     sqlite3BitvecDestroy(pPager->pAlwaysRollback);
       
  1003     pPager->pAlwaysRollback = 0;
       
  1004     sqlite3PcacheCleanAll(pPager->pPCache);
       
  1005 #ifdef SQLITE_CHECK_PAGES
       
  1006     sqlite3PcacheIterate(pPager->pPCache, pager_set_pagehash);
       
  1007 #endif
       
  1008     sqlite3PcacheClearFlags(pPager->pPCache,
       
  1009        PGHDR_IN_JOURNAL | PGHDR_NEED_SYNC
       
  1010     );
       
  1011     pPager->dirtyCache = 0;
       
  1012     pPager->nRec = 0;
       
  1013   }else{
       
  1014     assert( pPager->pInJournal==0 );
       
  1015   }
       
  1016 
       
  1017   if( !pPager->exclusiveMode ){
       
  1018     rc2 = osUnlock(pPager->fd, SHARED_LOCK);
       
  1019     pPager->state = PAGER_SHARED;
       
  1020   }else if( pPager->state==PAGER_SYNCED ){
       
  1021     pPager->state = PAGER_EXCLUSIVE;
       
  1022   }
       
  1023   pPager->origDbSize = 0;
       
  1024   pPager->setMaster = 0;
       
  1025   pPager->needSync = 0;
       
  1026   /* lruListSetFirstSynced(pPager); */
       
  1027   pPager->dbSize = -1;
       
  1028   pPager->dbModified = 0;
       
  1029 
       
  1030   return (rc==SQLITE_OK?rc2:rc);
       
  1031 }
       
  1032 
       
  1033 /*
       
  1034 ** Compute and return a checksum for the page of data.
       
  1035 **
       
  1036 ** This is not a real checksum.  It is really just the sum of the 
       
  1037 ** random initial value and the page number.  We experimented with
       
  1038 ** a checksum of the entire data, but that was found to be too slow.
       
  1039 **
       
  1040 ** Note that the page number is stored at the beginning of data and
       
  1041 ** the checksum is stored at the end.  This is important.  If journal
       
  1042 ** corruption occurs due to a power failure, the most likely scenario
       
  1043 ** is that one end or the other of the record will be changed.  It is
       
  1044 ** much less likely that the two ends of the journal record will be
       
  1045 ** correct and the middle be corrupt.  Thus, this "checksum" scheme,
       
  1046 ** though fast and simple, catches the mostly likely kind of corruption.
       
  1047 **
       
  1048 ** FIX ME:  Consider adding every 200th (or so) byte of the data to the
       
  1049 ** checksum.  That way if a single page spans 3 or more disk sectors and
       
  1050 ** only the middle sector is corrupt, we will still have a reasonable
       
  1051 ** chance of failing the checksum and thus detecting the problem.
       
  1052 */
       
  1053 static u32 pager_cksum(Pager *pPager, const u8 *aData){
       
  1054   u32 cksum = pPager->cksumInit;
       
  1055   int i = pPager->pageSize-200;
       
  1056   while( i>0 ){
       
  1057     cksum += aData[i];
       
  1058     i -= 200;
       
  1059   }
       
  1060   return cksum;
       
  1061 }
       
  1062 
       
  1063 /*
       
  1064 ** Read a single page from the journal file opened on file descriptor
       
  1065 ** jfd.  Playback this one page.
       
  1066 **
       
  1067 ** The isMainJrnl flag is true if this is the main rollback journal and
       
  1068 ** false for the statement journal.  The main rollback journal uses
       
  1069 ** checksums - the statement journal does not.
       
  1070 */
       
  1071 static int pager_playback_one_page(
       
  1072   Pager *pPager,       /* The pager being played back */
       
  1073   sqlite3_file *jfd,   /* The file that is the journal being rolled back */
       
  1074   i64 offset,          /* Offset of the page within the journal */
       
  1075   int isMainJrnl,      /* True for main rollback journal. False for Stmt jrnl */
       
  1076   int isUnsync         /* True if reading from usynced main journal */
       
  1077 ){
       
  1078   int rc;
       
  1079   PgHdr *pPg;                   /* An existing page in the cache */
       
  1080   Pgno pgno;                    /* The page number of a page in journal */
       
  1081   u32 cksum;                    /* Checksum used for sanity checking */
       
  1082   u8 *aData = (u8 *)pPager->pTmpSpace;   /* Temp storage for a page */
       
  1083 
       
  1084   /* isMainJrnl should be true for the main journal and false for
       
  1085   ** statement journals.  Verify that this is always the case
       
  1086   */
       
  1087   assert( jfd == (isMainJrnl ? pPager->jfd : pPager->stfd) );
       
  1088   assert( aData );
       
  1089 
       
  1090   rc = read32bits(jfd, offset, &pgno);
       
  1091   if( rc!=SQLITE_OK ) return rc;
       
  1092   rc = sqlite3OsRead(jfd, aData, pPager->pageSize, offset+4);
       
  1093   if( rc!=SQLITE_OK ) return rc;
       
  1094   pPager->journalOff += pPager->pageSize + 4;
       
  1095 
       
  1096   /* Sanity checking on the page.  This is more important that I originally
       
  1097   ** thought.  If a power failure occurs while the journal is being written,
       
  1098   ** it could cause invalid data to be written into the journal.  We need to
       
  1099   ** detect this invalid data (with high probability) and ignore it.
       
  1100   */
       
  1101   if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
       
  1102     return SQLITE_DONE;
       
  1103   }
       
  1104   if( pgno>(unsigned)pPager->dbSize ){
       
  1105     return SQLITE_OK;
       
  1106   }
       
  1107   if( isMainJrnl ){
       
  1108     rc = read32bits(jfd, offset+pPager->pageSize+4, &cksum);
       
  1109     if( rc ) return rc;
       
  1110     pPager->journalOff += 4;
       
  1111     if( pager_cksum(pPager, aData)!=cksum ){
       
  1112       return SQLITE_DONE;
       
  1113     }
       
  1114   }
       
  1115 
       
  1116   assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE );
       
  1117 
       
  1118   /* If the pager is in RESERVED state, then there must be a copy of this
       
  1119   ** page in the pager cache. In this case just update the pager cache,
       
  1120   ** not the database file. The page is left marked dirty in this case.
       
  1121   **
       
  1122   ** An exception to the above rule: If the database is in no-sync mode
       
  1123   ** and a page is moved during an incremental vacuum then the page may
       
  1124   ** not be in the pager cache. Later: if a malloc() or IO error occurs
       
  1125   ** during a Movepage() call, then the page may not be in the cache
       
  1126   ** either. So the condition described in the above paragraph is not
       
  1127   ** assert()able.
       
  1128   **
       
  1129   ** If in EXCLUSIVE state, then we update the pager cache if it exists
       
  1130   ** and the main file. The page is then marked not dirty.
       
  1131   **
       
  1132   ** Ticket #1171:  The statement journal might contain page content that is
       
  1133   ** different from the page content at the start of the transaction.
       
  1134   ** This occurs when a page is changed prior to the start of a statement
       
  1135   ** then changed again within the statement.  When rolling back such a
       
  1136   ** statement we must not write to the original database unless we know
       
  1137   ** for certain that original page contents are synced into the main rollback
       
  1138   ** journal.  Otherwise, a power loss might leave modified data in the
       
  1139   ** database file without an entry in the rollback journal that can
       
  1140   ** restore the database to its original form.  Two conditions must be
       
  1141   ** met before writing to the database files. (1) the database must be
       
  1142   ** locked.  (2) we know that the original page content is fully synced
       
  1143   ** in the main journal either because the page is not in cache or else
       
  1144   ** the page is marked as needSync==0.
       
  1145   **
       
  1146   ** 2008-04-14:  When attempting to vacuum a corrupt database file, it
       
  1147   ** is possible to fail a statement on a database that does not yet exist.
       
  1148   ** Do not attempt to write if database file has never been opened.
       
  1149   */
       
  1150   pPg = pager_lookup(pPager, pgno);
       
  1151   PAGERTRACE4("PLAYBACK %d page %d hash(%08x)\n",
       
  1152                PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, aData));
       
  1153   if( (pPager->state>=PAGER_EXCLUSIVE)
       
  1154    && (pPg==0 || 0==(pPg->flags&PGHDR_NEED_SYNC))
       
  1155    && (pPager->fd->pMethods)
       
  1156    && !isUnsync
       
  1157   ){
       
  1158     i64 ofst = (pgno-1)*(i64)pPager->pageSize;
       
  1159     rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize, ofst);
       
  1160   }
       
  1161   if( pPg ){
       
  1162     /* No page should ever be explicitly rolled back that is in use, except
       
  1163     ** for page 1 which is held in use in order to keep the lock on the
       
  1164     ** database active. However such a page may be rolled back as a result
       
  1165     ** of an internal error resulting in an automatic call to
       
  1166     ** sqlite3PagerRollback().
       
  1167     */
       
  1168     void *pData;
       
  1169     pData = pPg->pData;
       
  1170     memcpy(pData, aData, pPager->pageSize);
       
  1171     if( pPager->xReiniter ){
       
  1172       pPager->xReiniter(pPg);
       
  1173     }
       
  1174     if( isMainJrnl ) sqlite3PcacheMakeClean(pPg);
       
  1175 #ifdef SQLITE_CHECK_PAGES
       
  1176     pPg->pageHash = pager_pagehash(pPg);
       
  1177 #endif
       
  1178     /* If this was page 1, then restore the value of Pager.dbFileVers.
       
  1179     ** Do this before any decoding. */
       
  1180     if( pgno==1 ){
       
  1181       memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers));
       
  1182     }
       
  1183 
       
  1184     /* Decode the page just read from disk */
       
  1185     CODEC1(pPager, pData, pPg->pgno, 3);
       
  1186     sqlite3PcacheRelease(pPg);
       
  1187   }
       
  1188   return rc;
       
  1189 }
       
  1190 
       
  1191 /*
       
  1192 ** Parameter zMaster is the name of a master journal file. A single journal
       
  1193 ** file that referred to the master journal file has just been rolled back.
       
  1194 ** This routine checks if it is possible to delete the master journal file,
       
  1195 ** and does so if it is.
       
  1196 **
       
  1197 ** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not 
       
  1198 ** available for use within this function.
       
  1199 **
       
  1200 **
       
  1201 ** The master journal file contains the names of all child journals.
       
  1202 ** To tell if a master journal can be deleted, check to each of the
       
  1203 ** children.  If all children are either missing or do not refer to
       
  1204 ** a different master journal, then this master journal can be deleted.
       
  1205 */
       
  1206 static int pager_delmaster(Pager *pPager, const char *zMaster){
       
  1207   sqlite3_vfs *pVfs = pPager->pVfs;
       
  1208   int rc;
       
  1209   int master_open = 0;
       
  1210   sqlite3_file *pMaster;
       
  1211   sqlite3_file *pJournal;
       
  1212   char *zMasterJournal = 0; /* Contents of master journal file */
       
  1213   i64 nMasterJournal;       /* Size of master journal file */
       
  1214 
       
  1215   /* Open the master journal file exclusively in case some other process
       
  1216   ** is running this routine also. Not that it makes too much difference.
       
  1217   */
       
  1218   pMaster = (sqlite3_file *)sqlite3Malloc(pVfs->szOsFile * 2);
       
  1219   pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile);
       
  1220   if( !pMaster ){
       
  1221     rc = SQLITE_NOMEM;
       
  1222   }else{
       
  1223     int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL);
       
  1224     rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0);
       
  1225   }
       
  1226   if( rc!=SQLITE_OK ) goto delmaster_out;
       
  1227   master_open = 1;
       
  1228 
       
  1229   rc = sqlite3OsFileSize(pMaster, &nMasterJournal);
       
  1230   if( rc!=SQLITE_OK ) goto delmaster_out;
       
  1231 
       
  1232   if( nMasterJournal>0 ){
       
  1233     char *zJournal;
       
  1234     char *zMasterPtr = 0;
       
  1235     int nMasterPtr = pPager->pVfs->mxPathname+1;
       
  1236 
       
  1237     /* Load the entire master journal file into space obtained from
       
  1238     ** sqlite3_malloc() and pointed to by zMasterJournal. 
       
  1239     */
       
  1240     zMasterJournal = (char *)sqlite3Malloc(nMasterJournal + nMasterPtr);
       
  1241     if( !zMasterJournal ){
       
  1242       rc = SQLITE_NOMEM;
       
  1243       goto delmaster_out;
       
  1244     }
       
  1245     zMasterPtr = &zMasterJournal[nMasterJournal];
       
  1246     rc = sqlite3OsRead(pMaster, zMasterJournal, nMasterJournal, 0);
       
  1247     if( rc!=SQLITE_OK ) goto delmaster_out;
       
  1248 
       
  1249     zJournal = zMasterJournal;
       
  1250     while( (zJournal-zMasterJournal)<nMasterJournal ){
       
  1251       int exists;
       
  1252       rc = sqlite3OsAccess(pVfs, zJournal, SQLITE_ACCESS_EXISTS, &exists);
       
  1253       if( rc!=SQLITE_OK ){
       
  1254         goto delmaster_out;
       
  1255       }
       
  1256       if( exists ){
       
  1257         /* One of the journals pointed to by the master journal exists.
       
  1258         ** Open it and check if it points at the master journal. If
       
  1259         ** so, return without deleting the master journal file.
       
  1260         */
       
  1261         int c;
       
  1262         int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL);
       
  1263         rc = sqlite3OsOpen(pVfs, zJournal, pJournal, flags, 0);
       
  1264         if( rc!=SQLITE_OK ){
       
  1265           goto delmaster_out;
       
  1266         }
       
  1267 
       
  1268         rc = readMasterJournal(pJournal, zMasterPtr, nMasterPtr);
       
  1269         sqlite3OsClose(pJournal);
       
  1270         if( rc!=SQLITE_OK ){
       
  1271           goto delmaster_out;
       
  1272         }
       
  1273 
       
  1274         c = zMasterPtr[0]!=0 && strcmp(zMasterPtr, zMaster)==0;
       
  1275         if( c ){
       
  1276           /* We have a match. Do not delete the master journal file. */
       
  1277           goto delmaster_out;
       
  1278         }
       
  1279       }
       
  1280       zJournal += (strlen(zJournal)+1);
       
  1281     }
       
  1282   }
       
  1283   
       
  1284   rc = sqlite3OsDelete(pVfs, zMaster, 0);
       
  1285 
       
  1286 delmaster_out:
       
  1287   if( zMasterJournal ){
       
  1288     sqlite3_free(zMasterJournal);
       
  1289   }  
       
  1290   if( master_open ){
       
  1291     sqlite3OsClose(pMaster);
       
  1292   }
       
  1293   sqlite3_free(pMaster);
       
  1294   return rc;
       
  1295 }
       
  1296 
       
  1297 
       
  1298 static void pager_truncate_cache(Pager *pPager);
       
  1299 
       
  1300 /*
       
  1301 ** Truncate the main file of the given pager to the number of pages
       
  1302 ** indicated. Also truncate the cached representation of the file.
       
  1303 **
       
  1304 ** Might might be the case that the file on disk is smaller than nPage.
       
  1305 ** This can happen, for example, if we are in the middle of a transaction
       
  1306 ** which has extended the file size and the new pages are still all held
       
  1307 ** in cache, then an INSERT or UPDATE does a statement rollback.  Some
       
  1308 ** operating system implementations can get confused if you try to
       
  1309 ** truncate a file to some size that is larger than it currently is,
       
  1310 ** so detect this case and write a single zero byte to the end of the new
       
  1311 ** file instead.
       
  1312 */
       
  1313 static int pager_truncate(Pager *pPager, int nPage){
       
  1314   int rc = SQLITE_OK;
       
  1315   if( pPager->state>=PAGER_EXCLUSIVE && pPager->fd->pMethods ){
       
  1316     i64 currentSize, newSize;
       
  1317     rc = sqlite3OsFileSize(pPager->fd, &currentSize);
       
  1318     newSize = pPager->pageSize*(i64)nPage;
       
  1319     if( rc==SQLITE_OK && currentSize!=newSize ){
       
  1320       if( currentSize>newSize ){
       
  1321         rc = sqlite3OsTruncate(pPager->fd, newSize);
       
  1322       }else{
       
  1323         rc = sqlite3OsWrite(pPager->fd, "", 1, newSize-1);
       
  1324       }
       
  1325     }
       
  1326   }
       
  1327   if( rc==SQLITE_OK ){
       
  1328     pPager->dbSize = nPage;
       
  1329     pager_truncate_cache(pPager);
       
  1330   }
       
  1331   return rc;
       
  1332 }
       
  1333 
       
  1334 /*
       
  1335 ** Set the sectorSize for the given pager.
       
  1336 **
       
  1337 ** The sector size is at least as big as the sector size reported
       
  1338 ** by sqlite3OsSectorSize().  The minimum sector size is 512.
       
  1339 */
       
  1340 static void setSectorSize(Pager *pPager){
       
  1341   assert(pPager->fd->pMethods||pPager->tempFile);
       
  1342   if( !pPager->tempFile ){
       
  1343     /* Sector size doesn't matter for temporary files. Also, the file
       
  1344     ** may not have been opened yet, in whcih case the OsSectorSize()
       
  1345     ** call will segfault.
       
  1346     */
       
  1347     pPager->sectorSize = sqlite3OsSectorSize(pPager->fd);
       
  1348   }
       
  1349   if( pPager->sectorSize<512 ){
       
  1350     pPager->sectorSize = 512;
       
  1351   }
       
  1352 }
       
  1353 
       
  1354 /*
       
  1355 ** Playback the journal and thus restore the database file to
       
  1356 ** the state it was in before we started making changes.  
       
  1357 **
       
  1358 ** The journal file format is as follows: 
       
  1359 **
       
  1360 **  (1)  8 byte prefix.  A copy of aJournalMagic[].
       
  1361 **  (2)  4 byte big-endian integer which is the number of valid page records
       
  1362 **       in the journal.  If this value is 0xffffffff, then compute the
       
  1363 **       number of page records from the journal size.
       
  1364 **  (3)  4 byte big-endian integer which is the initial value for the 
       
  1365 **       sanity checksum.
       
  1366 **  (4)  4 byte integer which is the number of pages to truncate the
       
  1367 **       database to during a rollback.
       
  1368 **  (5)  4 byte big-endian integer which is the sector size.  The header
       
  1369 **       is this many bytes in size.
       
  1370 **  (6)  4 byte big-endian integer which is the page case.
       
  1371 **  (7)  4 byte integer which is the number of bytes in the master journal
       
  1372 **       name.  The value may be zero (indicate that there is no master
       
  1373 **       journal.)
       
  1374 **  (8)  N bytes of the master journal name.  The name will be nul-terminated
       
  1375 **       and might be shorter than the value read from (5).  If the first byte
       
  1376 **       of the name is \000 then there is no master journal.  The master
       
  1377 **       journal name is stored in UTF-8.
       
  1378 **  (9)  Zero or more pages instances, each as follows:
       
  1379 **        +  4 byte page number.
       
  1380 **        +  pPager->pageSize bytes of data.
       
  1381 **        +  4 byte checksum
       
  1382 **
       
  1383 ** When we speak of the journal header, we mean the first 8 items above.
       
  1384 ** Each entry in the journal is an instance of the 9th item.
       
  1385 **
       
  1386 ** Call the value from the second bullet "nRec".  nRec is the number of
       
  1387 ** valid page entries in the journal.  In most cases, you can compute the
       
  1388 ** value of nRec from the size of the journal file.  But if a power
       
  1389 ** failure occurred while the journal was being written, it could be the
       
  1390 ** case that the size of the journal file had already been increased but
       
  1391 ** the extra entries had not yet made it safely to disk.  In such a case,
       
  1392 ** the value of nRec computed from the file size would be too large.  For
       
  1393 ** that reason, we always use the nRec value in the header.
       
  1394 **
       
  1395 ** If the nRec value is 0xffffffff it means that nRec should be computed
       
  1396 ** from the file size.  This value is used when the user selects the
       
  1397 ** no-sync option for the journal.  A power failure could lead to corruption
       
  1398 ** in this case.  But for things like temporary table (which will be
       
  1399 ** deleted when the power is restored) we don't care.  
       
  1400 **
       
  1401 ** If the file opened as the journal file is not a well-formed
       
  1402 ** journal file then all pages up to the first corrupted page are rolled
       
  1403 ** back (or no pages if the journal header is corrupted). The journal file
       
  1404 ** is then deleted and SQLITE_OK returned, just as if no corruption had
       
  1405 ** been encountered.
       
  1406 **
       
  1407 ** If an I/O or malloc() error occurs, the journal-file is not deleted
       
  1408 ** and an error code is returned.
       
  1409 */
       
  1410 static int pager_playback(Pager *pPager, int isHot){
       
  1411   sqlite3_vfs *pVfs = pPager->pVfs;
       
  1412   i64 szJ;                 /* Size of the journal file in bytes */
       
  1413   u32 nRec;                /* Number of Records in the journal */
       
  1414   u32 u;                   /* Unsigned loop counter */
       
  1415   Pgno mxPg = 0;           /* Size of the original file in pages */
       
  1416   int rc;                  /* Result code of a subroutine */
       
  1417   int res = 1;             /* Value returned by sqlite3OsAccess() */
       
  1418   char *zMaster = 0;       /* Name of master journal file if any */
       
  1419 
       
  1420   /* Figure out how many records are in the journal.  Abort early if
       
  1421   ** the journal is empty.
       
  1422   */
       
  1423   assert( pPager->journalOpen );
       
  1424   rc = sqlite3OsFileSize(pPager->jfd, &szJ);
       
  1425   if( rc!=SQLITE_OK || szJ==0 ){
       
  1426     goto end_playback;
       
  1427   }
       
  1428 
       
  1429   /* Read the master journal name from the journal, if it is present.
       
  1430   ** If a master journal file name is specified, but the file is not
       
  1431   ** present on disk, then the journal is not hot and does not need to be
       
  1432   ** played back.
       
  1433   */
       
  1434   zMaster = pPager->pTmpSpace;
       
  1435   rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
       
  1436   if( rc==SQLITE_OK && zMaster[0] ){
       
  1437     rc = sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS, &res);
       
  1438   }
       
  1439   zMaster = 0;
       
  1440   if( rc!=SQLITE_OK || !res ){
       
  1441     goto end_playback;
       
  1442   }
       
  1443   pPager->journalOff = 0;
       
  1444 
       
  1445   /* This loop terminates either when the readJournalHdr() call returns
       
  1446   ** SQLITE_DONE or an IO error occurs. */
       
  1447   while( 1 ){
       
  1448     int isUnsync = 0;
       
  1449 
       
  1450     /* Read the next journal header from the journal file.  If there are
       
  1451     ** not enough bytes left in the journal file for a complete header, or
       
  1452     ** it is corrupted, then a process must of failed while writing it.
       
  1453     ** This indicates nothing more needs to be rolled back.
       
  1454     */
       
  1455     rc = readJournalHdr(pPager, szJ, &nRec, &mxPg);
       
  1456     if( rc!=SQLITE_OK ){ 
       
  1457       if( rc==SQLITE_DONE ){
       
  1458         rc = SQLITE_OK;
       
  1459       }
       
  1460       goto end_playback;
       
  1461     }
       
  1462 
       
  1463     /* If nRec is 0xffffffff, then this journal was created by a process
       
  1464     ** working in no-sync mode. This means that the rest of the journal
       
  1465     ** file consists of pages, there are no more journal headers. Compute
       
  1466     ** the value of nRec based on this assumption.
       
  1467     */
       
  1468     if( nRec==0xffffffff ){
       
  1469       assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
       
  1470       nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager);
       
  1471     }
       
  1472 
       
  1473     /* If nRec is 0 and this rollback is of a transaction created by this
       
  1474     ** process and if this is the final header in the journal, then it means
       
  1475     ** that this part of the journal was being filled but has not yet been
       
  1476     ** synced to disk.  Compute the number of pages based on the remaining
       
  1477     ** size of the file.
       
  1478     **
       
  1479     ** The third term of the test was added to fix ticket #2565.
       
  1480     */
       
  1481     if( nRec==0 && !isHot &&
       
  1482         pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){
       
  1483       nRec = (szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager);
       
  1484       isUnsync = 1;
       
  1485     }
       
  1486 
       
  1487     /* If this is the first header read from the journal, truncate the
       
  1488     ** database file back to its original size.
       
  1489     */
       
  1490     if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
       
  1491       rc = pager_truncate(pPager, mxPg);
       
  1492       if( rc!=SQLITE_OK ){
       
  1493         goto end_playback;
       
  1494       }
       
  1495     }
       
  1496 
       
  1497     /* Copy original pages out of the journal and back into the database file.
       
  1498     */
       
  1499     for(u=0; u<nRec; u++){
       
  1500       rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1,
       
  1501                                    isUnsync);
       
  1502       if( rc!=SQLITE_OK ){
       
  1503         if( rc==SQLITE_DONE ){
       
  1504           rc = SQLITE_OK;
       
  1505           pPager->journalOff = szJ;
       
  1506           break;
       
  1507         }else{
       
  1508           /* If we are unable to rollback, then the database is probably
       
  1509           ** going to end up being corrupt.  It is corrupt to us, anyhow.
       
  1510           ** Perhaps the next process to come along can fix it....
       
  1511           */
       
  1512           rc = SQLITE_CORRUPT_BKPT;
       
  1513           goto end_playback;
       
  1514         }
       
  1515       }
       
  1516     }
       
  1517   }
       
  1518   /*NOTREACHED*/
       
  1519   assert( 0 );
       
  1520 
       
  1521 end_playback:
       
  1522   /* If this playback is happening automatically as a result of an IO or 
       
  1523   ** malloc error that occured after the change-counter was updated but 
       
  1524   ** before the transaction was committed, then the change-counter 
       
  1525   ** modification may just have been reverted. If this happens in exclusive 
       
  1526   ** mode, then subsequent transactions performed by the connection will not
       
  1527   ** update the change-counter at all. This may lead to cache inconsistency
       
  1528   ** problems for other processes at some point in the future. So, just
       
  1529   ** in case this has happened, clear the changeCountDone flag now.
       
  1530   */
       
  1531   pPager->changeCountDone = 0;
       
  1532 
       
  1533   if( rc==SQLITE_OK ){
       
  1534     zMaster = pPager->pTmpSpace;
       
  1535     rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
       
  1536   }
       
  1537   if( rc==SQLITE_OK ){
       
  1538     rc = pager_end_transaction(pPager, zMaster[0]!='\0');
       
  1539   }
       
  1540   if( rc==SQLITE_OK && zMaster[0] && res ){
       
  1541     /* If there was a master journal and this routine will return success,
       
  1542     ** see if it is possible to delete the master journal.
       
  1543     */
       
  1544     rc = pager_delmaster(pPager, zMaster);
       
  1545   }
       
  1546 
       
  1547   /* The Pager.sectorSize variable may have been updated while rolling
       
  1548   ** back a journal created by a process with a different sector size
       
  1549   ** value. Reset it to the correct value for this process.
       
  1550   */
       
  1551   setSectorSize(pPager);
       
  1552   return rc;
       
  1553 }
       
  1554 
       
  1555 /*
       
  1556 ** Playback the statement journal.
       
  1557 **
       
  1558 ** This is similar to playing back the transaction journal but with
       
  1559 ** a few extra twists.
       
  1560 **
       
  1561 **    (1)  The number of pages in the database file at the start of
       
  1562 **         the statement is stored in pPager->stmtSize, not in the
       
  1563 **         journal file itself.
       
  1564 **
       
  1565 **    (2)  In addition to playing back the statement journal, also
       
  1566 **         playback all pages of the transaction journal beginning
       
  1567 **         at offset pPager->stmtJSize.
       
  1568 */
       
  1569 static int pager_stmt_playback(Pager *pPager){
       
  1570   i64 szJ;                 /* Size of the full journal */
       
  1571   i64 hdrOff;
       
  1572   int nRec;                /* Number of Records */
       
  1573   int i;                   /* Loop counter */
       
  1574   int rc;
       
  1575 
       
  1576   szJ = pPager->journalOff;
       
  1577 
       
  1578   /* Set hdrOff to be the offset just after the end of the last journal
       
  1579   ** page written before the first journal-header for this statement
       
  1580   ** transaction was written, or the end of the file if no journal
       
  1581   ** header was written.
       
  1582   */
       
  1583   hdrOff = pPager->stmtHdrOff;
       
  1584   assert( pPager->fullSync || !hdrOff );
       
  1585   if( !hdrOff ){
       
  1586     hdrOff = szJ;
       
  1587   }
       
  1588   
       
  1589   /* Truncate the database back to its original size.
       
  1590   */
       
  1591   rc = pager_truncate(pPager, pPager->stmtSize);
       
  1592   assert( pPager->state>=PAGER_SHARED );
       
  1593 
       
  1594   /* Figure out how many records are in the statement journal.
       
  1595   */
       
  1596   assert( pPager->stmtInUse && pPager->journalOpen );
       
  1597   nRec = pPager->stmtNRec;
       
  1598   
       
  1599   /* Copy original pages out of the statement journal and back into the
       
  1600   ** database file.  Note that the statement journal omits checksums from
       
  1601   ** each record since power-failure recovery is not important to statement
       
  1602   ** journals.
       
  1603   */
       
  1604   for(i=0; i<nRec; i++){
       
  1605     i64 offset = i*(4+pPager->pageSize);
       
  1606     rc = pager_playback_one_page(pPager, pPager->stfd, offset, 0, 0);
       
  1607     assert( rc!=SQLITE_DONE );
       
  1608     if( rc!=SQLITE_OK ) goto end_stmt_playback;
       
  1609   }
       
  1610 
       
  1611   /* Now roll some pages back from the transaction journal. Pager.stmtJSize
       
  1612   ** was the size of the journal file when this statement was started, so
       
  1613   ** everything after that needs to be rolled back, either into the
       
  1614   ** database, the memory cache, or both.
       
  1615   **
       
  1616   ** If it is not zero, then Pager.stmtHdrOff is the offset to the start
       
  1617   ** of the first journal header written during this statement transaction.
       
  1618   */
       
  1619   pPager->journalOff = pPager->stmtJSize;
       
  1620   pPager->cksumInit = pPager->stmtCksum;
       
  1621   while( pPager->journalOff < hdrOff ){
       
  1622     rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1, 0);
       
  1623     assert( rc!=SQLITE_DONE );
       
  1624     if( rc!=SQLITE_OK ) goto end_stmt_playback;
       
  1625   }
       
  1626 
       
  1627   while( pPager->journalOff < szJ ){
       
  1628     u32 nJRec;         /* Number of Journal Records */
       
  1629     u32 dummy;
       
  1630     rc = readJournalHdr(pPager, szJ, &nJRec, &dummy);
       
  1631     if( rc!=SQLITE_OK ){
       
  1632       assert( rc!=SQLITE_DONE );
       
  1633       goto end_stmt_playback;
       
  1634     }
       
  1635     if( nJRec==0 ){
       
  1636       nJRec = (szJ - pPager->journalOff) / (pPager->pageSize+8);
       
  1637     }
       
  1638     for(i=nJRec-1; i>=0 && pPager->journalOff < szJ; i--){
       
  1639       rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff,1,0);
       
  1640       assert( rc!=SQLITE_DONE );
       
  1641       if( rc!=SQLITE_OK ) goto end_stmt_playback;
       
  1642     }
       
  1643   }
       
  1644 
       
  1645   pPager->journalOff = szJ;
       
  1646   
       
  1647 end_stmt_playback:
       
  1648   if( rc==SQLITE_OK) {
       
  1649     pPager->journalOff = szJ;
       
  1650     /* pager_reload_cache(pPager); */
       
  1651   }
       
  1652   return rc;
       
  1653 }
       
  1654 
       
  1655 /*
       
  1656 ** Change the maximum number of in-memory pages that are allowed.
       
  1657 */
       
  1658 void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){
       
  1659   sqlite3PcacheSetCachesize(pPager->pPCache, mxPage);
       
  1660 }
       
  1661 
       
  1662 /*
       
  1663 ** Adjust the robustness of the database to damage due to OS crashes
       
  1664 ** or power failures by changing the number of syncs()s when writing
       
  1665 ** the rollback journal.  There are three levels:
       
  1666 **
       
  1667 **    OFF       sqlite3OsSync() is never called.  This is the default
       
  1668 **              for temporary and transient files.
       
  1669 **
       
  1670 **    NORMAL    The journal is synced once before writes begin on the
       
  1671 **              database.  This is normally adequate protection, but
       
  1672 **              it is theoretically possible, though very unlikely,
       
  1673 **              that an inopertune power failure could leave the journal
       
  1674 **              in a state which would cause damage to the database
       
  1675 **              when it is rolled back.
       
  1676 **
       
  1677 **    FULL      The journal is synced twice before writes begin on the
       
  1678 **              database (with some additional information - the nRec field
       
  1679 **              of the journal header - being written in between the two
       
  1680 **              syncs).  If we assume that writing a
       
  1681 **              single disk sector is atomic, then this mode provides
       
  1682 **              assurance that the journal will not be corrupted to the
       
  1683 **              point of causing damage to the database during rollback.
       
  1684 **
       
  1685 ** Numeric values associated with these states are OFF==1, NORMAL=2,
       
  1686 ** and FULL=3.
       
  1687 */
       
  1688 #ifndef SQLITE_OMIT_PAGER_PRAGMAS
       
  1689 void sqlite3PagerSetSafetyLevel(Pager *pPager, int level, int bFullFsync){
       
  1690   pPager->noSync =  level==1 || pPager->tempFile || MEMDB;
       
  1691   pPager->fullSync = level==3 && !pPager->tempFile;
       
  1692   pPager->sync_flags = (bFullFsync?SQLITE_SYNC_FULL:SQLITE_SYNC_NORMAL);
       
  1693   if( pPager->noSync ) pPager->needSync = 0;
       
  1694 }
       
  1695 #endif
       
  1696 
       
  1697 /*
       
  1698 ** The following global variable is incremented whenever the library
       
  1699 ** attempts to open a temporary file.  This information is used for
       
  1700 ** testing and analysis only.  
       
  1701 */
       
  1702 #ifdef SQLITE_TEST
       
  1703 int sqlite3_opentemp_count = 0;
       
  1704 #endif
       
  1705 
       
  1706 /*
       
  1707 ** Open a temporary file. 
       
  1708 **
       
  1709 ** Write the file descriptor into *fd.  Return SQLITE_OK on success or some
       
  1710 ** other error code if we fail. The OS will automatically delete the temporary
       
  1711 ** file when it is closed.
       
  1712 */
       
  1713 static int sqlite3PagerOpentemp(
       
  1714   Pager *pPager,        /* The pager object */
       
  1715   sqlite3_file *pFile,  /* Write the file descriptor here */
       
  1716   int vfsFlags          /* Flags passed through to the VFS */
       
  1717 ){
       
  1718   int rc;
       
  1719 
       
  1720 #ifdef SQLITE_TEST
       
  1721   sqlite3_opentemp_count++;  /* Used for testing and analysis only */
       
  1722 #endif
       
  1723 
       
  1724   vfsFlags |=  SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE |
       
  1725             SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE;
       
  1726   rc = sqlite3OsOpen(pPager->pVfs, 0, pFile, vfsFlags, 0);
       
  1727   assert( rc!=SQLITE_OK || pFile->pMethods );
       
  1728   return rc;
       
  1729 }
       
  1730 
       
  1731 static int pagerStress(void *,PgHdr *);
       
  1732 
       
  1733 /*
       
  1734 ** Create a new page cache and put a pointer to the page cache in *ppPager.
       
  1735 ** The file to be cached need not exist.  The file is not locked until
       
  1736 ** the first call to sqlite3PagerGet() and is only held open until the
       
  1737 ** last page is released using sqlite3PagerUnref().
       
  1738 **
       
  1739 ** If zFilename is NULL then a randomly-named temporary file is created
       
  1740 ** and used as the file to be cached.  The file will be deleted
       
  1741 ** automatically when it is closed.
       
  1742 **
       
  1743 ** If zFilename is ":memory:" then all information is held in cache.
       
  1744 ** It is never written to disk.  This can be used to implement an
       
  1745 ** in-memory database.
       
  1746 */
       
  1747 int sqlite3PagerOpen(
       
  1748   sqlite3_vfs *pVfs,       /* The virtual file system to use */
       
  1749   Pager **ppPager,         /* Return the Pager structure here */
       
  1750   const char *zFilename,   /* Name of the database file to open */
       
  1751   int nExtra,              /* Extra bytes append to each in-memory page */
       
  1752   int flags,               /* flags controlling this file */
       
  1753   int vfsFlags             /* flags passed through to sqlite3_vfs.xOpen() */
       
  1754 ){
       
  1755   u8 *pPtr;
       
  1756   Pager *pPager = 0;
       
  1757   int rc = SQLITE_OK;
       
  1758   int i;
       
  1759   int tempFile = 0;
       
  1760   int memDb = 0;
       
  1761   int readOnly = 0;
       
  1762   int useJournal = (flags & PAGER_OMIT_JOURNAL)==0;
       
  1763   int noReadlock = (flags & PAGER_NO_READLOCK)!=0;
       
  1764   int journalFileSize = sqlite3JournalSize(pVfs);
       
  1765   int pcacheSize = sqlite3PcacheSize();
       
  1766   int szPageDflt = SQLITE_DEFAULT_PAGE_SIZE;
       
  1767   char *zPathname = 0;
       
  1768   int nPathname = 0;
       
  1769 
       
  1770   /* The default return is a NULL pointer */
       
  1771   *ppPager = 0;
       
  1772 
       
  1773   /* Compute and store the full pathname in an allocated buffer pointed
       
  1774   ** to by zPathname, length nPathname. Or, if this is a temporary file,
       
  1775   ** leave both nPathname and zPathname set to 0.
       
  1776   */
       
  1777   if( zFilename && zFilename[0] ){
       
  1778     nPathname = pVfs->mxPathname+1;
       
  1779     zPathname = sqlite3Malloc(nPathname*2);
       
  1780     if( zPathname==0 ){
       
  1781       return SQLITE_NOMEM;
       
  1782     }
       
  1783 #ifndef SQLITE_OMIT_MEMORYDB
       
  1784     if( strcmp(zFilename,":memory:")==0 ){
       
  1785       memDb = 1;
       
  1786       zPathname[0] = 0;
       
  1787       useJournal = 0;
       
  1788     }else
       
  1789 #endif
       
  1790     {
       
  1791       rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname);
       
  1792     }
       
  1793     if( rc!=SQLITE_OK ){
       
  1794       sqlite3_free(zPathname);
       
  1795       return rc;
       
  1796     }
       
  1797     nPathname = strlen(zPathname);
       
  1798   }
       
  1799 
       
  1800   /* Allocate memory for the pager structure */
       
  1801   pPager = sqlite3MallocZero(
       
  1802     sizeof(*pPager) +           /* Pager structure */
       
  1803     pcacheSize      +           /* PCache object */
       
  1804     journalFileSize +           /* The journal file structure */ 
       
  1805     pVfs->szOsFile * 3 +        /* The main db and two journal files */ 
       
  1806     3*nPathname + 40            /* zFilename, zDirectory, zJournal */
       
  1807   );
       
  1808   if( !pPager ){
       
  1809     sqlite3_free(zPathname);
       
  1810     return SQLITE_NOMEM;
       
  1811   }
       
  1812   pPager->pPCache = (PCache *)&pPager[1];
       
  1813   pPtr = ((u8 *)&pPager[1]) + pcacheSize;
       
  1814   pPager->vfsFlags = vfsFlags;
       
  1815   pPager->fd = (sqlite3_file*)&pPtr[pVfs->szOsFile*0];
       
  1816   pPager->stfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*1];
       
  1817   pPager->jfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*2];
       
  1818   pPager->zFilename = (char*)&pPtr[pVfs->szOsFile*2+journalFileSize];
       
  1819   pPager->zDirectory = &pPager->zFilename[nPathname+1];
       
  1820   pPager->zJournal = &pPager->zDirectory[nPathname+1];
       
  1821   pPager->pVfs = pVfs;
       
  1822   if( zPathname ){
       
  1823     memcpy(pPager->zFilename, zPathname, nPathname+1);
       
  1824     sqlite3_free(zPathname);
       
  1825   }
       
  1826 
       
  1827   /* Open the pager file.
       
  1828   */
       
  1829   if( zFilename && zFilename[0] && !memDb ){
       
  1830     if( nPathname>(pVfs->mxPathname - sizeof("-journal")) ){
       
  1831       rc = SQLITE_CANTOPEN;
       
  1832     }else{
       
  1833       int fout = 0;
       
  1834       rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd,
       
  1835                          pPager->vfsFlags, &fout);
       
  1836       readOnly = (fout&SQLITE_OPEN_READONLY);
       
  1837 
       
  1838       /* If the file was successfully opened for read/write access,
       
  1839       ** choose a default page size in case we have to create the
       
  1840       ** database file. The default page size is the maximum of:
       
  1841       **
       
  1842       **    + SQLITE_DEFAULT_PAGE_SIZE,
       
  1843       **    + The value returned by sqlite3OsSectorSize()
       
  1844       **    + The largest page size that can be written atomically.
       
  1845       */
       
  1846       if( rc==SQLITE_OK && !readOnly ){
       
  1847         int iSectorSize = sqlite3OsSectorSize(pPager->fd);
       
  1848         if( szPageDflt<iSectorSize ){
       
  1849           szPageDflt = iSectorSize;
       
  1850         }
       
  1851 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
       
  1852         {
       
  1853           int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
       
  1854           int ii;
       
  1855           assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
       
  1856           assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
       
  1857           assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536);
       
  1858           for(ii=szPageDflt; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){
       
  1859             if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ) szPageDflt = ii;
       
  1860           }
       
  1861         }
       
  1862 #endif
       
  1863         if( szPageDflt>SQLITE_MAX_DEFAULT_PAGE_SIZE ){
       
  1864           szPageDflt = SQLITE_MAX_DEFAULT_PAGE_SIZE;
       
  1865         }
       
  1866       }
       
  1867     }
       
  1868   }else if( !memDb ){
       
  1869     /* If a temporary file is requested, it is not opened immediately.
       
  1870     ** In this case we accept the default page size and delay actually
       
  1871     ** opening the file until the first call to OsWrite().
       
  1872     */ 
       
  1873     tempFile = 1;
       
  1874     pPager->state = PAGER_EXCLUSIVE;
       
  1875   }
       
  1876 
       
  1877   if( pPager && rc==SQLITE_OK ){
       
  1878     pPager->pTmpSpace = sqlite3PageMalloc(szPageDflt);
       
  1879   }
       
  1880 
       
  1881   /* If an error occured in either of the blocks above.
       
  1882   ** Free the Pager structure and close the file.
       
  1883   ** Since the pager is not allocated there is no need to set 
       
  1884   ** any Pager.errMask variables.
       
  1885   */
       
  1886   if( !pPager || !pPager->pTmpSpace ){
       
  1887     sqlite3OsClose(pPager->fd);
       
  1888     sqlite3_free(pPager);
       
  1889     return ((rc==SQLITE_OK)?SQLITE_NOMEM:rc);
       
  1890   }
       
  1891   nExtra = FORCE_ALIGNMENT(nExtra);
       
  1892   sqlite3PcacheOpen(szPageDflt, nExtra, !memDb,
       
  1893                     !memDb?pagerStress:0, (void *)pPager, pPager->pPCache);
       
  1894 
       
  1895   PAGERTRACE3("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename);
       
  1896   IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename))
       
  1897 
       
  1898   /* Fill in Pager.zDirectory[] */
       
  1899   memcpy(pPager->zDirectory, pPager->zFilename, nPathname+1);
       
  1900   for(i=strlen(pPager->zDirectory); i>0 && pPager->zDirectory[i-1]!='/'; i--){}
       
  1901   if( i>0 ) pPager->zDirectory[i-1] = 0;
       
  1902 
       
  1903   /* Fill in Pager.zJournal[] */
       
  1904   if( zPathname ){
       
  1905     memcpy(pPager->zJournal, pPager->zFilename, nPathname);
       
  1906     memcpy(&pPager->zJournal[nPathname], "-journal", 9);
       
  1907   }else{
       
  1908     pPager->zJournal = 0;
       
  1909   }
       
  1910 
       
  1911   /* pPager->journalOpen = 0; */
       
  1912   pPager->useJournal = useJournal;
       
  1913   pPager->noReadlock = noReadlock && readOnly;
       
  1914   /* pPager->stmtOpen = 0; */
       
  1915   /* pPager->stmtInUse = 0; */
       
  1916   /* pPager->nRef = 0; */
       
  1917   pPager->dbSize = memDb-1;
       
  1918   pPager->pageSize = szPageDflt;
       
  1919   /* pPager->stmtSize = 0; */
       
  1920   /* pPager->stmtJSize = 0; */
       
  1921   /* pPager->nPage = 0; */
       
  1922   pPager->mxPage = 100;
       
  1923   pPager->mxPgno = SQLITE_MAX_PAGE_COUNT;
       
  1924   /* pPager->state = PAGER_UNLOCK; */
       
  1925   assert( pPager->state == (tempFile ? PAGER_EXCLUSIVE : PAGER_UNLOCK) );
       
  1926   /* pPager->errMask = 0; */
       
  1927   pPager->tempFile = tempFile;
       
  1928   assert( tempFile==PAGER_LOCKINGMODE_NORMAL 
       
  1929           || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE );
       
  1930   assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
       
  1931   pPager->exclusiveMode = tempFile; 
       
  1932   pPager->memDb = memDb;
       
  1933   pPager->readOnly = readOnly;
       
  1934   /* pPager->needSync = 0; */
       
  1935   pPager->noSync = pPager->tempFile || !useJournal;
       
  1936   pPager->fullSync = (pPager->noSync?0:1);
       
  1937   pPager->sync_flags = SQLITE_SYNC_NORMAL;
       
  1938   /* pPager->pFirst = 0; */
       
  1939   /* pPager->pFirstSynced = 0; */
       
  1940   /* pPager->pLast = 0; */
       
  1941   pPager->nExtra = nExtra;
       
  1942   pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT;
       
  1943   assert(pPager->fd->pMethods||memDb||tempFile);
       
  1944   if( !memDb ){
       
  1945     setSectorSize(pPager);
       
  1946   }
       
  1947   /* pPager->pBusyHandler = 0; */
       
  1948   /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
       
  1949   *ppPager = pPager;
       
  1950   return SQLITE_OK;
       
  1951 }
       
  1952 
       
  1953 /*
       
  1954 ** Set the busy handler function.
       
  1955 */
       
  1956 void sqlite3PagerSetBusyhandler(Pager *pPager, BusyHandler *pBusyHandler){
       
  1957   pPager->pBusyHandler = pBusyHandler;
       
  1958 }
       
  1959 
       
  1960 /*
       
  1961 ** Set the reinitializer for this pager.  If not NULL, the reinitializer
       
  1962 ** is called when the content of a page in cache is restored to its original
       
  1963 ** value as a result of a rollback.  The callback gives higher-level code
       
  1964 ** an opportunity to restore the EXTRA section to agree with the restored
       
  1965 ** page data.
       
  1966 */
       
  1967 void sqlite3PagerSetReiniter(Pager *pPager, void (*xReinit)(DbPage*)){
       
  1968   pPager->xReiniter = xReinit;
       
  1969 }
       
  1970 
       
  1971 /*
       
  1972 ** Set the page size to *pPageSize. If the suggest new page size is
       
  1973 ** inappropriate, then an alternative page size is set to that
       
  1974 ** value before returning.
       
  1975 */
       
  1976 int sqlite3PagerSetPagesize(Pager *pPager, u16 *pPageSize){
       
  1977   int rc = pPager->errCode;
       
  1978   if( rc==SQLITE_OK ){
       
  1979     u16 pageSize = *pPageSize;
       
  1980     assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) );
       
  1981     if( pageSize && pageSize!=pPager->pageSize 
       
  1982      && (pPager->memDb==0 || pPager->dbSize==0)
       
  1983      && sqlite3PcacheRefCount(pPager->pPCache)==0 
       
  1984     ){
       
  1985       char *pNew = (char *)sqlite3PageMalloc(pageSize);
       
  1986       if( !pNew ){
       
  1987         rc = SQLITE_NOMEM;
       
  1988       }else{
       
  1989         pager_reset(pPager);
       
  1990         pPager->pageSize = pageSize;
       
  1991         if( !pPager->memDb ) setSectorSize(pPager);
       
  1992         sqlite3PageFree(pPager->pTmpSpace);
       
  1993         pPager->pTmpSpace = pNew;
       
  1994         sqlite3PcacheSetPageSize(pPager->pPCache, pageSize);
       
  1995       }
       
  1996     }
       
  1997     *pPageSize = pPager->pageSize;
       
  1998   }
       
  1999   return rc;
       
  2000 }
       
  2001 
       
  2002 /*
       
  2003 ** Return a pointer to the "temporary page" buffer held internally
       
  2004 ** by the pager.  This is a buffer that is big enough to hold the
       
  2005 ** entire content of a database page.  This buffer is used internally
       
  2006 ** during rollback and will be overwritten whenever a rollback
       
  2007 ** occurs.  But other modules are free to use it too, as long as
       
  2008 ** no rollbacks are happening.
       
  2009 */
       
  2010 void *sqlite3PagerTempSpace(Pager *pPager){
       
  2011   return pPager->pTmpSpace;
       
  2012 }
       
  2013 
       
  2014 /*
       
  2015 ** Attempt to set the maximum database page count if mxPage is positive. 
       
  2016 ** Make no changes if mxPage is zero or negative.  And never reduce the
       
  2017 ** maximum page count below the current size of the database.
       
  2018 **
       
  2019 ** Regardless of mxPage, return the current maximum page count.
       
  2020 */
       
  2021 int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){
       
  2022   if( mxPage>0 ){
       
  2023     pPager->mxPgno = mxPage;
       
  2024   }
       
  2025   sqlite3PagerPagecount(pPager, 0);
       
  2026   return pPager->mxPgno;
       
  2027 }
       
  2028 
       
  2029 /*
       
  2030 ** The following set of routines are used to disable the simulated
       
  2031 ** I/O error mechanism.  These routines are used to avoid simulated
       
  2032 ** errors in places where we do not care about errors.
       
  2033 **
       
  2034 ** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops
       
  2035 ** and generate no code.
       
  2036 */
       
  2037 #ifdef SQLITE_TEST
       
  2038 extern int sqlite3_io_error_pending;
       
  2039 extern int sqlite3_io_error_hit;
       
  2040 static int saved_cnt;
       
  2041 void disable_simulated_io_errors(void){
       
  2042   saved_cnt = sqlite3_io_error_pending;
       
  2043   sqlite3_io_error_pending = -1;
       
  2044 }
       
  2045 void enable_simulated_io_errors(void){
       
  2046   sqlite3_io_error_pending = saved_cnt;
       
  2047 }
       
  2048 #else
       
  2049 # define disable_simulated_io_errors()
       
  2050 # define enable_simulated_io_errors()
       
  2051 #endif
       
  2052 
       
  2053 /*
       
  2054 ** Read the first N bytes from the beginning of the file into memory
       
  2055 ** that pDest points to. 
       
  2056 **
       
  2057 ** No error checking is done. The rational for this is that this function 
       
  2058 ** may be called even if the file does not exist or contain a header. In 
       
  2059 ** these cases sqlite3OsRead() will return an error, to which the correct 
       
  2060 ** response is to zero the memory at pDest and continue.  A real IO error 
       
  2061 ** will presumably recur and be picked up later (Todo: Think about this).
       
  2062 */
       
  2063 int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){
       
  2064   int rc = SQLITE_OK;
       
  2065   memset(pDest, 0, N);
       
  2066   assert(MEMDB||pPager->fd->pMethods||pPager->tempFile);
       
  2067   if( pPager->fd->pMethods ){
       
  2068     IOTRACE(("DBHDR %p 0 %d\n", pPager, N))
       
  2069     rc = sqlite3OsRead(pPager->fd, pDest, N, 0);
       
  2070     if( rc==SQLITE_IOERR_SHORT_READ ){
       
  2071       rc = SQLITE_OK;
       
  2072     }
       
  2073   }
       
  2074   return rc;
       
  2075 }
       
  2076 
       
  2077 /*
       
  2078 ** Return the total number of pages in the disk file associated with
       
  2079 ** pPager. 
       
  2080 **
       
  2081 ** If the PENDING_BYTE lies on the page directly after the end of the
       
  2082 ** file, then consider this page part of the file too. For example, if
       
  2083 ** PENDING_BYTE is byte 4096 (the first byte of page 5) and the size of the
       
  2084 ** file is 4096 bytes, 5 is returned instead of 4.
       
  2085 */
       
  2086 int sqlite3PagerPagecount(Pager *pPager, int *pnPage){
       
  2087   i64 n = 0;
       
  2088   int rc;
       
  2089   assert( pPager!=0 );
       
  2090   if( pPager->errCode ){
       
  2091     rc = pPager->errCode;
       
  2092     return rc;
       
  2093   }
       
  2094   if( pPager->dbSize>=0 ){
       
  2095     n = pPager->dbSize;
       
  2096   } else {
       
  2097     assert(pPager->fd->pMethods||pPager->tempFile);
       
  2098     if( (pPager->fd->pMethods)
       
  2099      && (rc = sqlite3OsFileSize(pPager->fd, &n))!=SQLITE_OK ){
       
  2100       pager_error(pPager, rc);
       
  2101       return rc;
       
  2102     }
       
  2103     if( n>0 && n<pPager->pageSize ){
       
  2104       n = 1;
       
  2105     }else{
       
  2106       n /= pPager->pageSize;
       
  2107     }
       
  2108     if( pPager->state!=PAGER_UNLOCK ){
       
  2109       pPager->dbSize = n;
       
  2110     }
       
  2111   }
       
  2112   if( n==(PENDING_BYTE/pPager->pageSize) ){
       
  2113     n++;
       
  2114   }
       
  2115   if( n>pPager->mxPgno ){
       
  2116     pPager->mxPgno = n;
       
  2117   }
       
  2118   if( pnPage ){
       
  2119     *pnPage = n;
       
  2120   }
       
  2121   return SQLITE_OK;
       
  2122 }
       
  2123 
       
  2124 /*
       
  2125 ** Forward declaration
       
  2126 */
       
  2127 static int syncJournal(Pager*);
       
  2128 
       
  2129 /*
       
  2130 ** This routine is used to truncate the cache when a database
       
  2131 ** is truncated.  Drop from the cache all pages whose pgno is
       
  2132 ** larger than pPager->dbSize and is unreferenced.
       
  2133 **
       
  2134 ** Referenced pages larger than pPager->dbSize are zeroed.
       
  2135 **
       
  2136 ** Actually, at the point this routine is called, it would be
       
  2137 ** an error to have a referenced page.  But rather than delete
       
  2138 ** that page and guarantee a subsequent segfault, it seems better
       
  2139 ** to zero it and hope that we error out sanely.
       
  2140 */
       
  2141 static void pager_truncate_cache(Pager *pPager){
       
  2142   sqlite3PcacheTruncate(pPager->pPCache, pPager->dbSize);
       
  2143 }
       
  2144 
       
  2145 /*
       
  2146 ** Try to obtain a lock on a file.  Invoke the busy callback if the lock
       
  2147 ** is currently not available.  Repeat until the busy callback returns
       
  2148 ** false or until the lock succeeds.
       
  2149 **
       
  2150 ** Return SQLITE_OK on success and an error code if we cannot obtain
       
  2151 ** the lock.
       
  2152 */
       
  2153 static int pager_wait_on_lock(Pager *pPager, int locktype){
       
  2154   int rc;
       
  2155 
       
  2156   /* The OS lock values must be the same as the Pager lock values */
       
  2157   assert( PAGER_SHARED==SHARED_LOCK );
       
  2158   assert( PAGER_RESERVED==RESERVED_LOCK );
       
  2159   assert( PAGER_EXCLUSIVE==EXCLUSIVE_LOCK );
       
  2160 
       
  2161   /* If the file is currently unlocked then the size must be unknown */
       
  2162   assert( pPager->state>=PAGER_SHARED || pPager->dbSize<0 || MEMDB );
       
  2163 
       
  2164   if( pPager->state>=locktype ){
       
  2165     rc = SQLITE_OK;
       
  2166   }else{
       
  2167     if( pPager->pBusyHandler ) pPager->pBusyHandler->nBusy = 0;
       
  2168     do {
       
  2169       rc = sqlite3OsLock(pPager->fd, locktype);
       
  2170     }while( rc==SQLITE_BUSY && sqlite3InvokeBusyHandler(pPager->pBusyHandler) );
       
  2171     if( rc==SQLITE_OK ){
       
  2172       pPager->state = locktype;
       
  2173       IOTRACE(("LOCK %p %d\n", pPager, locktype))
       
  2174     }
       
  2175   }
       
  2176   return rc;
       
  2177 }
       
  2178 
       
  2179 /*
       
  2180 ** Truncate the file to the number of pages specified.
       
  2181 */
       
  2182 int sqlite3PagerTruncate(Pager *pPager, Pgno nPage){
       
  2183   int rc = SQLITE_OK;
       
  2184   assert( pPager->state>=PAGER_SHARED || MEMDB );
       
  2185 
       
  2186 
       
  2187   sqlite3PagerPagecount(pPager, 0);
       
  2188   if( pPager->errCode ){
       
  2189     rc = pPager->errCode;
       
  2190   }else if( nPage<(unsigned)pPager->dbSize ){
       
  2191     if( MEMDB ){
       
  2192       pPager->dbSize = nPage;
       
  2193       pager_truncate_cache(pPager);
       
  2194     }else{
       
  2195       rc = syncJournal(pPager);
       
  2196       if( rc==SQLITE_OK ){
       
  2197         /* Get an exclusive lock on the database before truncating. */
       
  2198         rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
       
  2199       }
       
  2200       if( rc==SQLITE_OK ){
       
  2201         rc = pager_truncate(pPager, nPage);
       
  2202       }
       
  2203     }
       
  2204   }
       
  2205 
       
  2206   return rc;
       
  2207 }
       
  2208 
       
  2209 /*
       
  2210 ** Shutdown the page cache.  Free all memory and close all files.
       
  2211 **
       
  2212 ** If a transaction was in progress when this routine is called, that
       
  2213 ** transaction is rolled back.  All outstanding pages are invalidated
       
  2214 ** and their memory is freed.  Any attempt to use a page associated
       
  2215 ** with this page cache after this function returns will likely
       
  2216 ** result in a coredump.
       
  2217 **
       
  2218 ** This function always succeeds. If a transaction is active an attempt
       
  2219 ** is made to roll it back. If an error occurs during the rollback 
       
  2220 ** a hot journal may be left in the filesystem but no error is returned
       
  2221 ** to the caller.
       
  2222 */
       
  2223 int sqlite3PagerClose(Pager *pPager){
       
  2224 
       
  2225   disable_simulated_io_errors();
       
  2226   sqlite3BeginBenignMalloc();
       
  2227   pPager->errCode = 0;
       
  2228   pPager->exclusiveMode = 0;
       
  2229   pager_reset(pPager);
       
  2230   pagerUnlockAndRollback(pPager);
       
  2231   enable_simulated_io_errors();
       
  2232   sqlite3EndBenignMalloc();
       
  2233   PAGERTRACE2("CLOSE %d\n", PAGERID(pPager));
       
  2234   IOTRACE(("CLOSE %p\n", pPager))
       
  2235   if( pPager->journalOpen ){
       
  2236     sqlite3OsClose(pPager->jfd);
       
  2237   }
       
  2238   sqlite3BitvecDestroy(pPager->pInJournal);
       
  2239   sqlite3BitvecDestroy(pPager->pAlwaysRollback);
       
  2240   if( pPager->stmtOpen ){
       
  2241     sqlite3OsClose(pPager->stfd);
       
  2242   }
       
  2243   sqlite3OsClose(pPager->fd);
       
  2244   /* Temp files are automatically deleted by the OS
       
  2245   ** if( pPager->tempFile ){
       
  2246   **   sqlite3OsDelete(pPager->zFilename);
       
  2247   ** }
       
  2248   */
       
  2249 
       
  2250   sqlite3PageFree(pPager->pTmpSpace);
       
  2251   sqlite3PcacheClose(pPager->pPCache);
       
  2252   sqlite3_free(pPager);
       
  2253   return SQLITE_OK;
       
  2254 }
       
  2255 
       
  2256 #if !defined(NDEBUG) || defined(SQLITE_TEST)
       
  2257 /*
       
  2258 ** Return the page number for the given page data.
       
  2259 */
       
  2260 Pgno sqlite3PagerPagenumber(DbPage *p){
       
  2261   return p->pgno;
       
  2262 }
       
  2263 #endif
       
  2264 
       
  2265 /*
       
  2266 ** Increment the reference count for a page.  The input pointer is
       
  2267 ** a reference to the page data.
       
  2268 */
       
  2269 int sqlite3PagerRef(DbPage *pPg){
       
  2270   sqlite3PcacheRef(pPg);
       
  2271   return SQLITE_OK;
       
  2272 }
       
  2273 
       
  2274 /*
       
  2275 ** Sync the journal.  In other words, make sure all the pages that have
       
  2276 ** been written to the journal have actually reached the surface of the
       
  2277 ** disk.  It is not safe to modify the original database file until after
       
  2278 ** the journal has been synced.  If the original database is modified before
       
  2279 ** the journal is synced and a power failure occurs, the unsynced journal
       
  2280 ** data would be lost and we would be unable to completely rollback the
       
  2281 ** database changes.  Database corruption would occur.
       
  2282 ** 
       
  2283 ** This routine also updates the nRec field in the header of the journal.
       
  2284 ** (See comments on the pager_playback() routine for additional information.)
       
  2285 ** If the sync mode is FULL, two syncs will occur.  First the whole journal
       
  2286 ** is synced, then the nRec field is updated, then a second sync occurs.
       
  2287 **
       
  2288 ** For temporary databases, we do not care if we are able to rollback
       
  2289 ** after a power failure, so no sync occurs.
       
  2290 **
       
  2291 ** If the IOCAP_SEQUENTIAL flag is set for the persistent media on which
       
  2292 ** the database is stored, then OsSync() is never called on the journal
       
  2293 ** file. In this case all that is required is to update the nRec field in
       
  2294 ** the journal header.
       
  2295 **
       
  2296 ** This routine clears the needSync field of every page current held in
       
  2297 ** memory.
       
  2298 */
       
  2299 static int syncJournal(Pager *pPager){
       
  2300   int rc = SQLITE_OK;
       
  2301 
       
  2302   /* Sync the journal before modifying the main database
       
  2303   ** (assuming there is a journal and it needs to be synced.)
       
  2304   */
       
  2305   if( pPager->needSync ){
       
  2306     if( !pPager->tempFile ){
       
  2307       int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
       
  2308       assert( pPager->journalOpen );
       
  2309 
       
  2310       if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
       
  2311         i64 jrnlOff = journalHdrOffset(pPager);
       
  2312         u8 zMagic[8];
       
  2313 
       
  2314         /* This block deals with an obscure problem. If the last connection
       
  2315         ** that wrote to this database was operating in persistent-journal
       
  2316         ** mode, then the journal file may at this point actually be larger
       
  2317         ** than Pager.journalOff bytes. If the next thing in the journal
       
  2318         ** file happens to be a journal-header (written as part of the
       
  2319         ** previous connections transaction), and a crash or power-failure 
       
  2320         ** occurs after nRec is updated but before this connection writes 
       
  2321         ** anything else to the journal file (or commits/rolls back its 
       
  2322         ** transaction), then SQLite may become confused when doing the 
       
  2323         ** hot-journal rollback following recovery. It may roll back all
       
  2324         ** of this connections data, then proceed to rolling back the old,
       
  2325         ** out-of-date data that follows it. Database corruption.
       
  2326         **
       
  2327         ** To work around this, if the journal file does appear to contain
       
  2328         ** a valid header following Pager.journalOff, then write a 0x00
       
  2329         ** byte to the start of it to prevent it from being recognized.
       
  2330         */
       
  2331         rc = sqlite3OsRead(pPager->jfd, zMagic, 8, jrnlOff);
       
  2332         if( rc==SQLITE_OK && 0==memcmp(zMagic, aJournalMagic, 8) ){
       
  2333           static const u8 zerobyte = 0;
       
  2334           rc = sqlite3OsWrite(pPager->jfd, &zerobyte, 1, jrnlOff);
       
  2335         }
       
  2336         if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
       
  2337           return rc;
       
  2338         }
       
  2339 
       
  2340         /* Write the nRec value into the journal file header. If in
       
  2341         ** full-synchronous mode, sync the journal first. This ensures that
       
  2342         ** all data has really hit the disk before nRec is updated to mark
       
  2343         ** it as a candidate for rollback.
       
  2344         **
       
  2345         ** This is not required if the persistent media supports the
       
  2346         ** SAFE_APPEND property. Because in this case it is not possible 
       
  2347         ** for garbage data to be appended to the file, the nRec field
       
  2348         ** is populated with 0xFFFFFFFF when the journal header is written
       
  2349         ** and never needs to be updated.
       
  2350         */
       
  2351         if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
       
  2352           PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
       
  2353           IOTRACE(("JSYNC %p\n", pPager))
       
  2354           rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags);
       
  2355           if( rc!=0 ) return rc;
       
  2356         }
       
  2357 
       
  2358         jrnlOff = pPager->journalHdr + sizeof(aJournalMagic);
       
  2359         IOTRACE(("JHDR %p %lld %d\n", pPager, jrnlOff, 4));
       
  2360         rc = write32bits(pPager->jfd, jrnlOff, pPager->nRec);
       
  2361         if( rc ) return rc;
       
  2362       }
       
  2363       if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
       
  2364         PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
       
  2365         IOTRACE(("JSYNC %p\n", pPager))
       
  2366         rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags| 
       
  2367           (pPager->sync_flags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0)
       
  2368         );
       
  2369         if( rc!=0 ) return rc;
       
  2370       }
       
  2371       pPager->journalStarted = 1;
       
  2372     }
       
  2373     pPager->needSync = 0;
       
  2374 
       
  2375     /* Erase the needSync flag from every page.
       
  2376     */
       
  2377     sqlite3PcacheClearFlags(pPager->pPCache, PGHDR_NEED_SYNC);
       
  2378   }
       
  2379 
       
  2380 #ifndef NDEBUG
       
  2381   /* If the Pager.needSync flag is clear then the PgHdr.needSync
       
  2382   ** flag must also be clear for all pages.  Verify that this
       
  2383   ** invariant is true.
       
  2384   */
       
  2385   else{
       
  2386     sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_NEED_SYNC);
       
  2387   }
       
  2388 #endif
       
  2389 
       
  2390   return rc;
       
  2391 }
       
  2392 
       
  2393 /*
       
  2394 ** Given a list of pages (connected by the PgHdr.pDirty pointer) write
       
  2395 ** every one of those pages out to the database file. No calls are made
       
  2396 ** to the page-cache to mark the pages as clean. It is the responsibility
       
  2397 ** of the caller to use PcacheCleanAll() or PcacheMakeClean() to mark
       
  2398 ** the pages as clean.
       
  2399 */
       
  2400 static int pager_write_pagelist(PgHdr *pList){
       
  2401   Pager *pPager;
       
  2402   int rc;
       
  2403 
       
  2404   if( pList==0 ) return SQLITE_OK;
       
  2405   pPager = pList->pPager;
       
  2406 
       
  2407   /* At this point there may be either a RESERVED or EXCLUSIVE lock on the
       
  2408   ** database file. If there is already an EXCLUSIVE lock, the following
       
  2409   ** calls to sqlite3OsLock() are no-ops.
       
  2410   **
       
  2411   ** Moving the lock from RESERVED to EXCLUSIVE actually involves going
       
  2412   ** through an intermediate state PENDING.   A PENDING lock prevents new
       
  2413   ** readers from attaching to the database but is unsufficient for us to
       
  2414   ** write.  The idea of a PENDING lock is to prevent new readers from
       
  2415   ** coming in while we wait for existing readers to clear.
       
  2416   **
       
  2417   ** While the pager is in the RESERVED state, the original database file
       
  2418   ** is unchanged and we can rollback without having to playback the
       
  2419   ** journal into the original database file.  Once we transition to
       
  2420   ** EXCLUSIVE, it means the database file has been changed and any rollback
       
  2421   ** will require a journal playback.
       
  2422   */
       
  2423   rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
       
  2424   if( rc!=SQLITE_OK ){
       
  2425     return rc;
       
  2426   }
       
  2427 
       
  2428   while( pList ){
       
  2429 
       
  2430     /* If the file has not yet been opened, open it now. */
       
  2431     if( !pPager->fd->pMethods ){
       
  2432       assert(pPager->tempFile);
       
  2433       rc = sqlite3PagerOpentemp(pPager, pPager->fd, pPager->vfsFlags);
       
  2434       if( rc ) return rc;
       
  2435     }
       
  2436 
       
  2437     /* If there are dirty pages in the page cache with page numbers greater
       
  2438     ** than Pager.dbSize, this means sqlite3PagerTruncate() was called to
       
  2439     ** make the file smaller (presumably by auto-vacuum code). Do not write
       
  2440     ** any such pages to the file.
       
  2441     */
       
  2442     if( pList->pgno<=pPager->dbSize && 0==(pList->flags&PGHDR_DONT_WRITE) ){
       
  2443       i64 offset = (pList->pgno-1)*(i64)pPager->pageSize;
       
  2444       char *pData = CODEC2(pPager, pList->pData, pList->pgno, 6);
       
  2445       PAGERTRACE4("STORE %d page %d hash(%08x)\n",
       
  2446                    PAGERID(pPager), pList->pgno, pager_pagehash(pList));
       
  2447       IOTRACE(("PGOUT %p %d\n", pPager, pList->pgno));
       
  2448       rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset);
       
  2449       PAGER_INCR(sqlite3_pager_writedb_count);
       
  2450       PAGER_INCR(pPager->nWrite);
       
  2451       if( pList->pgno==1 ){
       
  2452         memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers));
       
  2453       }
       
  2454     }
       
  2455 #ifndef NDEBUG
       
  2456     else{
       
  2457       PAGERTRACE3("NOSTORE %d page %d\n", PAGERID(pPager), pList->pgno);
       
  2458     }
       
  2459 #endif
       
  2460     if( rc ) return rc;
       
  2461 #ifdef SQLITE_CHECK_PAGES
       
  2462     pList->pageHash = pager_pagehash(pList);
       
  2463 #endif
       
  2464     pList = pList->pDirty;
       
  2465   }
       
  2466 
       
  2467   return SQLITE_OK;
       
  2468 }
       
  2469 
       
  2470 /*
       
  2471 ** This function is called by the pcache layer when it has reached some
       
  2472 ** soft memory limit. The argument is a pointer to a purgeable Pager 
       
  2473 ** object. This function attempts to make a single dirty page that has no
       
  2474 ** outstanding references (if one exists) clean so that it can be recycled 
       
  2475 ** by the pcache layer.
       
  2476 */
       
  2477 static int pagerStress(void *p, PgHdr *pPg){
       
  2478   Pager *pPager = (Pager *)p;
       
  2479   int rc = SQLITE_OK;
       
  2480 
       
  2481   if( pPager->doNotSync ){
       
  2482     return SQLITE_OK;
       
  2483   }
       
  2484 
       
  2485   assert( pPg->flags&PGHDR_DIRTY );
       
  2486   if( pPager->errCode==SQLITE_OK ){
       
  2487     if( pPg->flags&PGHDR_NEED_SYNC ){
       
  2488       rc = syncJournal(pPager);
       
  2489       if( rc==SQLITE_OK && pPager->fullSync && 
       
  2490         !(sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND)
       
  2491       ){
       
  2492         pPager->nRec = 0;
       
  2493         rc = writeJournalHdr(pPager);
       
  2494       }
       
  2495     }
       
  2496     if( rc==SQLITE_OK ){
       
  2497       pPg->pDirty = 0;
       
  2498       rc = pager_write_pagelist(pPg);
       
  2499     }
       
  2500     if( rc!=SQLITE_OK ){
       
  2501       pager_error(pPager, rc);
       
  2502     }
       
  2503   }
       
  2504 
       
  2505   if( rc==SQLITE_OK ){
       
  2506     sqlite3PcacheMakeClean(pPg);
       
  2507   }
       
  2508   return rc;
       
  2509 }
       
  2510 
       
  2511 
       
  2512 /*
       
  2513 ** Return 1 if there is a hot journal on the given pager.
       
  2514 ** A hot journal is one that needs to be played back.
       
  2515 **
       
  2516 ** If the current size of the database file is 0 but a journal file
       
  2517 ** exists, that is probably an old journal left over from a prior
       
  2518 ** database with the same name.  Just delete the journal.
       
  2519 **
       
  2520 ** Return negative if unable to determine the status of the journal.
       
  2521 **
       
  2522 ** This routine does not open the journal file to examine its
       
  2523 ** content.  Hence, the journal might contain the name of a master
       
  2524 ** journal file that has been deleted, and hence not be hot.  Or
       
  2525 ** the header of the journal might be zeroed out.  This routine
       
  2526 ** does not discover these cases of a non-hot journal - if the
       
  2527 ** journal file exists and is not empty this routine assumes it
       
  2528 ** is hot.  The pager_playback() routine will discover that the
       
  2529 ** journal file is not really hot and will no-op.
       
  2530 */
       
  2531 static int hasHotJournal(Pager *pPager, int *pExists){
       
  2532   sqlite3_vfs *pVfs = pPager->pVfs;
       
  2533   int rc = SQLITE_OK;
       
  2534   int exists;
       
  2535   int locked;
       
  2536   assert( pPager!=0 );
       
  2537   assert( pPager->useJournal );
       
  2538   assert( pPager->fd->pMethods );
       
  2539   *pExists = 0;
       
  2540   rc = sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &exists);
       
  2541   if( rc==SQLITE_OK && exists ){
       
  2542     rc = sqlite3OsCheckReservedLock(pPager->fd, &locked);
       
  2543   }
       
  2544   if( rc==SQLITE_OK && exists && !locked ){
       
  2545     int nPage;
       
  2546     rc = sqlite3PagerPagecount(pPager, &nPage);
       
  2547     if( rc==SQLITE_OK ){
       
  2548      if( nPage==0 ){
       
  2549         sqlite3OsDelete(pVfs, pPager->zJournal, 0);
       
  2550       }else{
       
  2551         *pExists = 1;
       
  2552       }
       
  2553     }
       
  2554   }
       
  2555   return rc;
       
  2556 }
       
  2557 
       
  2558 /*
       
  2559 ** Read the content of page pPg out of the database file.
       
  2560 */
       
  2561 static int readDbPage(Pager *pPager, PgHdr *pPg, Pgno pgno){
       
  2562   int rc;
       
  2563   i64 offset;
       
  2564   assert( MEMDB==0 );
       
  2565   assert(pPager->fd->pMethods||pPager->tempFile);
       
  2566   if( !pPager->fd->pMethods ){
       
  2567     return SQLITE_IOERR_SHORT_READ;
       
  2568   }
       
  2569   offset = (pgno-1)*(i64)pPager->pageSize;
       
  2570   rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, offset);
       
  2571   PAGER_INCR(sqlite3_pager_readdb_count);
       
  2572   PAGER_INCR(pPager->nRead);
       
  2573   IOTRACE(("PGIN %p %d\n", pPager, pgno));
       
  2574   if( pgno==1 ){
       
  2575     memcpy(&pPager->dbFileVers, &((u8*)pPg->pData)[24],
       
  2576                                               sizeof(pPager->dbFileVers));
       
  2577   }
       
  2578   CODEC1(pPager, pPg->pData, pPg->pgno, 3);
       
  2579   PAGERTRACE4("FETCH %d page %d hash(%08x)\n",
       
  2580                PAGERID(pPager), pPg->pgno, pager_pagehash(pPg));
       
  2581   return rc;
       
  2582 }
       
  2583 
       
  2584 
       
  2585 /*
       
  2586 ** This function is called to obtain the shared lock required before
       
  2587 ** data may be read from the pager cache. If the shared lock has already
       
  2588 ** been obtained, this function is a no-op.
       
  2589 **
       
  2590 ** Immediately after obtaining the shared lock (if required), this function
       
  2591 ** checks for a hot-journal file. If one is found, an emergency rollback
       
  2592 ** is performed immediately.
       
  2593 */
       
  2594 static int pagerSharedLock(Pager *pPager){
       
  2595   int rc = SQLITE_OK;
       
  2596   int isErrorReset = 0;
       
  2597 
       
  2598   /* If this database is opened for exclusive access, has no outstanding 
       
  2599   ** page references and is in an error-state, now is the chance to clear
       
  2600   ** the error. Discard the contents of the pager-cache and treat any
       
  2601   ** open journal file as a hot-journal.
       
  2602   */
       
  2603   if( !MEMDB && pPager->exclusiveMode 
       
  2604    && sqlite3PcacheRefCount(pPager->pPCache)==0 && pPager->errCode 
       
  2605   ){
       
  2606     if( pPager->journalOpen ){
       
  2607       isErrorReset = 1;
       
  2608     }
       
  2609     pPager->errCode = SQLITE_OK;
       
  2610     pager_reset(pPager);
       
  2611   }
       
  2612 
       
  2613   /* If the pager is still in an error state, do not proceed. The error 
       
  2614   ** state will be cleared at some point in the future when all page 
       
  2615   ** references are dropped and the cache can be discarded.
       
  2616   */
       
  2617   if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
       
  2618     return pPager->errCode;
       
  2619   }
       
  2620 
       
  2621   if( pPager->state==PAGER_UNLOCK || isErrorReset ){
       
  2622     sqlite3_vfs *pVfs = pPager->pVfs;
       
  2623     if( !MEMDB ){
       
  2624       int isHotJournal;
       
  2625       assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
       
  2626       if( !pPager->noReadlock ){
       
  2627         rc = pager_wait_on_lock(pPager, SHARED_LOCK);
       
  2628         if( rc!=SQLITE_OK ){
       
  2629           assert( pPager->state==PAGER_UNLOCK );
       
  2630           return pager_error(pPager, rc);
       
  2631         }
       
  2632         assert( pPager->state>=SHARED_LOCK );
       
  2633       }
       
  2634   
       
  2635       /* If a journal file exists, and there is no RESERVED lock on the
       
  2636       ** database file, then it either needs to be played back or deleted.
       
  2637       */
       
  2638       if( !isErrorReset ){
       
  2639         rc = hasHotJournal(pPager, &isHotJournal);
       
  2640         if( rc!=SQLITE_OK ){
       
  2641           goto failed;
       
  2642         }
       
  2643       }
       
  2644       if( isErrorReset || isHotJournal ){
       
  2645         /* Get an EXCLUSIVE lock on the database file. At this point it is
       
  2646         ** important that a RESERVED lock is not obtained on the way to the
       
  2647         ** EXCLUSIVE lock. If it were, another process might open the
       
  2648         ** database file, detect the RESERVED lock, and conclude that the
       
  2649         ** database is safe to read while this process is still rolling it 
       
  2650         ** back.
       
  2651         ** 
       
  2652         ** Because the intermediate RESERVED lock is not requested, the
       
  2653         ** second process will get to this point in the code and fail to
       
  2654         ** obtain its own EXCLUSIVE lock on the database file.
       
  2655         */
       
  2656         if( pPager->state<EXCLUSIVE_LOCK ){
       
  2657           rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK);
       
  2658           if( rc!=SQLITE_OK ){
       
  2659             rc = pager_error(pPager, rc);
       
  2660             goto failed;
       
  2661           }
       
  2662           pPager->state = PAGER_EXCLUSIVE;
       
  2663         }
       
  2664  
       
  2665         /* Open the journal for read/write access. This is because in 
       
  2666         ** exclusive-access mode the file descriptor will be kept open and
       
  2667         ** possibly used for a transaction later on. On some systems, the
       
  2668         ** OsTruncate() call used in exclusive-access mode also requires
       
  2669         ** a read/write file handle.
       
  2670         */
       
  2671         if( !isErrorReset && pPager->journalOpen==0 ){
       
  2672           int res;
       
  2673           rc = sqlite3OsAccess(pVfs,pPager->zJournal,SQLITE_ACCESS_EXISTS,&res);
       
  2674           if( rc==SQLITE_OK ){
       
  2675             if( res ){
       
  2676               int fout = 0;
       
  2677               int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL;
       
  2678               assert( !pPager->tempFile );
       
  2679               rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout);
       
  2680               assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
       
  2681               if( fout&SQLITE_OPEN_READONLY ){
       
  2682                 rc = SQLITE_BUSY;
       
  2683                 sqlite3OsClose(pPager->jfd);
       
  2684               }
       
  2685             }else{
       
  2686               /* If the journal does not exist, that means some other process
       
  2687               ** has already rolled it back */
       
  2688               rc = SQLITE_BUSY;
       
  2689             }
       
  2690           }
       
  2691         }
       
  2692         if( rc!=SQLITE_OK ){
       
  2693           if( rc!=SQLITE_NOMEM && rc!=SQLITE_IOERR_UNLOCK 
       
  2694            && rc!=SQLITE_IOERR_NOMEM 
       
  2695           ){
       
  2696             rc = SQLITE_BUSY;
       
  2697           }
       
  2698           goto failed;
       
  2699         }
       
  2700         pPager->journalOpen = 1;
       
  2701         pPager->journalStarted = 0;
       
  2702         pPager->journalOff = 0;
       
  2703         pPager->setMaster = 0;
       
  2704         pPager->journalHdr = 0;
       
  2705  
       
  2706         /* Playback and delete the journal.  Drop the database write
       
  2707         ** lock and reacquire the read lock.  Purge the cache before
       
  2708         ** playing back the hot-journal so that we don't end up with
       
  2709         */
       
  2710         sqlite3PcacheClear(pPager->pPCache);
       
  2711         rc = pager_playback(pPager, 1);
       
  2712         if( rc!=SQLITE_OK ){
       
  2713           rc = pager_error(pPager, rc);
       
  2714           goto failed;
       
  2715         }
       
  2716         assert(pPager->state==PAGER_SHARED || 
       
  2717             (pPager->exclusiveMode && pPager->state>PAGER_SHARED)
       
  2718         );
       
  2719       }
       
  2720 
       
  2721       if( sqlite3PcachePagecount(pPager->pPCache)>0 ){
       
  2722         /* The shared-lock has just been acquired on the database file
       
  2723         ** and there are already pages in the cache (from a previous
       
  2724         ** read or write transaction).  Check to see if the database
       
  2725         ** has been modified.  If the database has changed, flush the
       
  2726         ** cache.
       
  2727         **
       
  2728         ** Database changes is detected by looking at 15 bytes beginning
       
  2729         ** at offset 24 into the file.  The first 4 of these 16 bytes are
       
  2730         ** a 32-bit counter that is incremented with each change.  The
       
  2731         ** other bytes change randomly with each file change when
       
  2732         ** a codec is in use.
       
  2733         ** 
       
  2734         ** There is a vanishingly small chance that a change will not be 
       
  2735         ** detected.  The chance of an undetected change is so small that
       
  2736         ** it can be neglected.
       
  2737         */
       
  2738         char dbFileVers[sizeof(pPager->dbFileVers)];
       
  2739         sqlite3PagerPagecount(pPager, 0);
       
  2740 
       
  2741         if( pPager->errCode ){
       
  2742           rc = pPager->errCode;
       
  2743           goto failed;
       
  2744         }
       
  2745 
       
  2746         if( pPager->dbSize>0 ){
       
  2747           IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers)));
       
  2748           rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24);
       
  2749           if( rc!=SQLITE_OK ){
       
  2750             goto failed;
       
  2751           }
       
  2752         }else{
       
  2753           memset(dbFileVers, 0, sizeof(dbFileVers));
       
  2754         }
       
  2755 
       
  2756         if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
       
  2757           pager_reset(pPager);
       
  2758         }
       
  2759       }
       
  2760     }
       
  2761     assert( pPager->exclusiveMode || pPager->state<=PAGER_SHARED );
       
  2762     if( pPager->state==PAGER_UNLOCK ){
       
  2763       pPager->state = PAGER_SHARED;
       
  2764     }
       
  2765   }
       
  2766 
       
  2767  failed:
       
  2768   if( rc!=SQLITE_OK ){
       
  2769     /* pager_unlock() is a no-op for exclusive mode and in-memory databases. */
       
  2770     pager_unlock(pPager);
       
  2771   }
       
  2772   return rc;
       
  2773 }
       
  2774 
       
  2775 /*
       
  2776 ** Make sure we have the content for a page.  If the page was
       
  2777 ** previously acquired with noContent==1, then the content was
       
  2778 ** just initialized to zeros instead of being read from disk.
       
  2779 ** But now we need the real data off of disk.  So make sure we
       
  2780 ** have it.  Read it in if we do not have it already.
       
  2781 */
       
  2782 static int pager_get_content(PgHdr *pPg){
       
  2783   if( pPg->flags&PGHDR_NEED_READ ){
       
  2784     int rc = readDbPage(pPg->pPager, pPg, pPg->pgno);
       
  2785     if( rc==SQLITE_OK ){
       
  2786       pPg->flags &= ~PGHDR_NEED_READ;
       
  2787     }else{
       
  2788       return rc;
       
  2789     }
       
  2790   }
       
  2791   return SQLITE_OK;
       
  2792 }
       
  2793 
       
  2794 /*
       
  2795 ** If the reference count has reached zero, and the pager is not in the
       
  2796 ** middle of a write transaction or opened in exclusive mode, unlock it.
       
  2797 */ 
       
  2798 static void pagerUnlockIfUnused(Pager *pPager){
       
  2799   if( (sqlite3PcacheRefCount(pPager->pPCache)==0)
       
  2800     && (!pPager->exclusiveMode || pPager->journalOff>0) 
       
  2801   ){
       
  2802     pagerUnlockAndRollback(pPager);
       
  2803   }
       
  2804 }
       
  2805 
       
  2806 /*
       
  2807 ** Drop a page from the cache using sqlite3PcacheDrop().
       
  2808 **
       
  2809 ** If this means there are now no pages with references to them, a rollback
       
  2810 ** occurs and the lock on the database is removed.
       
  2811 */
       
  2812 static void pagerDropPage(DbPage *pPg){
       
  2813   Pager *pPager = pPg->pPager;
       
  2814   sqlite3PcacheDrop(pPg);
       
  2815   pagerUnlockIfUnused(pPager);
       
  2816 }
       
  2817 
       
  2818 /*
       
  2819 ** Acquire a page.
       
  2820 **
       
  2821 ** A read lock on the disk file is obtained when the first page is acquired. 
       
  2822 ** This read lock is dropped when the last page is released.
       
  2823 **
       
  2824 ** This routine works for any page number greater than 0.  If the database
       
  2825 ** file is smaller than the requested page, then no actual disk
       
  2826 ** read occurs and the memory image of the page is initialized to
       
  2827 ** all zeros.  The extra data appended to a page is always initialized
       
  2828 ** to zeros the first time a page is loaded into memory.
       
  2829 **
       
  2830 ** The acquisition might fail for several reasons.  In all cases,
       
  2831 ** an appropriate error code is returned and *ppPage is set to NULL.
       
  2832 **
       
  2833 ** See also sqlite3PagerLookup().  Both this routine and Lookup() attempt
       
  2834 ** to find a page in the in-memory cache first.  If the page is not already
       
  2835 ** in memory, this routine goes to disk to read it in whereas Lookup()
       
  2836 ** just returns 0.  This routine acquires a read-lock the first time it
       
  2837 ** has to go to disk, and could also playback an old journal if necessary.
       
  2838 ** Since Lookup() never goes to disk, it never has to deal with locks
       
  2839 ** or journal files.
       
  2840 **
       
  2841 ** If noContent is false, the page contents are actually read from disk.
       
  2842 ** If noContent is true, it means that we do not care about the contents
       
  2843 ** of the page at this time, so do not do a disk read.  Just fill in the
       
  2844 ** page content with zeros.  But mark the fact that we have not read the
       
  2845 ** content by setting the PgHdr.needRead flag.  Later on, if 
       
  2846 ** sqlite3PagerWrite() is called on this page or if this routine is
       
  2847 ** called again with noContent==0, that means that the content is needed
       
  2848 ** and the disk read should occur at that point.
       
  2849 */
       
  2850 int sqlite3PagerAcquire(
       
  2851   Pager *pPager,      /* The pager open on the database file */
       
  2852   Pgno pgno,          /* Page number to fetch */
       
  2853   DbPage **ppPage,    /* Write a pointer to the page here */
       
  2854   int noContent       /* Do not bother reading content from disk if true */
       
  2855 ){
       
  2856   PgHdr *pPg = 0;
       
  2857   int rc;
       
  2858 
       
  2859   assert( pPager->state==PAGER_UNLOCK 
       
  2860        || sqlite3PcacheRefCount(pPager->pPCache)>0 
       
  2861        || pgno==1 
       
  2862   );
       
  2863 
       
  2864   /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
       
  2865   ** number greater than this, or zero, is requested.
       
  2866   */
       
  2867   if( pgno>PAGER_MAX_PGNO || pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
       
  2868     return SQLITE_CORRUPT_BKPT;
       
  2869   }
       
  2870 
       
  2871   /* Make sure we have not hit any critical errors.
       
  2872   */ 
       
  2873   assert( pPager!=0 );
       
  2874   *ppPage = 0;
       
  2875 
       
  2876   /* If this is the first page accessed, then get a SHARED lock
       
  2877   ** on the database file. pagerSharedLock() is a no-op if 
       
  2878   ** a database lock is already held.
       
  2879   */
       
  2880   rc = pagerSharedLock(pPager);
       
  2881   if( rc!=SQLITE_OK ){
       
  2882     return rc;
       
  2883   }
       
  2884   assert( pPager->state!=PAGER_UNLOCK );
       
  2885 
       
  2886   rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, &pPg);
       
  2887   if( rc!=SQLITE_OK ){
       
  2888     return rc;
       
  2889   }
       
  2890   if( pPg->pPager==0 ){
       
  2891     /* The pager cache has created a new page. Its content needs to 
       
  2892     ** be initialized.
       
  2893     */
       
  2894     int nMax;
       
  2895     PAGER_INCR(pPager->nMiss);
       
  2896     pPg->pPager = pPager;
       
  2897     if( sqlite3BitvecTest(pPager->pInJournal, pgno) ){
       
  2898       assert( !MEMDB );
       
  2899       pPg->flags |= PGHDR_IN_JOURNAL;
       
  2900     }
       
  2901     memset(pPg->pExtra, 0, pPager->nExtra);
       
  2902 
       
  2903     rc = sqlite3PagerPagecount(pPager, &nMax);
       
  2904     if( rc!=SQLITE_OK ){
       
  2905       sqlite3PagerUnref(pPg);
       
  2906       return rc;
       
  2907     }
       
  2908 
       
  2909     if( nMax<(int)pgno || MEMDB || noContent ){
       
  2910       if( pgno>pPager->mxPgno ){
       
  2911         sqlite3PagerUnref(pPg);
       
  2912         return SQLITE_FULL;
       
  2913       }
       
  2914       memset(pPg->pData, 0, pPager->pageSize);
       
  2915       if( noContent ){
       
  2916         pPg->flags |= PGHDR_NEED_READ;
       
  2917       }
       
  2918       IOTRACE(("ZERO %p %d\n", pPager, pgno));
       
  2919     }else{
       
  2920       rc = readDbPage(pPager, pPg, pgno);
       
  2921       if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
       
  2922         /* sqlite3PagerUnref(pPg); */
       
  2923         pagerDropPage(pPg);
       
  2924         return rc;
       
  2925       }
       
  2926     }
       
  2927 #ifdef SQLITE_CHECK_PAGES
       
  2928     pPg->pageHash = pager_pagehash(pPg);
       
  2929 #endif
       
  2930   }else{
       
  2931     /* The requested page is in the page cache. */
       
  2932     assert(sqlite3PcacheRefCount(pPager->pPCache)>0 || pgno==1);
       
  2933     PAGER_INCR(pPager->nHit);
       
  2934     if( !noContent ){
       
  2935       rc = pager_get_content(pPg);
       
  2936       if( rc ){
       
  2937         sqlite3PagerUnref(pPg);
       
  2938         return rc;
       
  2939       }
       
  2940     }
       
  2941   }
       
  2942 
       
  2943   *ppPage = pPg;
       
  2944   return SQLITE_OK;
       
  2945 }
       
  2946 
       
  2947 /*
       
  2948 ** Acquire a page if it is already in the in-memory cache.  Do
       
  2949 ** not read the page from disk.  Return a pointer to the page,
       
  2950 ** or 0 if the page is not in cache.
       
  2951 **
       
  2952 ** See also sqlite3PagerGet().  The difference between this routine
       
  2953 ** and sqlite3PagerGet() is that _get() will go to the disk and read
       
  2954 ** in the page if the page is not already in cache.  This routine
       
  2955 ** returns NULL if the page is not in cache or if a disk I/O error 
       
  2956 ** has ever happened.
       
  2957 */
       
  2958 DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
       
  2959   PgHdr *pPg = 0;
       
  2960   assert( pPager!=0 );
       
  2961   assert( pgno!=0 );
       
  2962 
       
  2963   if( (pPager->state!=PAGER_UNLOCK)
       
  2964    && (pPager->errCode==SQLITE_OK || pPager->errCode==SQLITE_FULL)
       
  2965   ){
       
  2966     sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
       
  2967   }
       
  2968 
       
  2969   return pPg;
       
  2970 }
       
  2971 
       
  2972 /*
       
  2973 ** Release a page.
       
  2974 **
       
  2975 ** If the number of references to the page drop to zero, then the
       
  2976 ** page is added to the LRU list.  When all references to all pages
       
  2977 ** are released, a rollback occurs and the lock on the database is
       
  2978 ** removed.
       
  2979 */
       
  2980 int sqlite3PagerUnref(DbPage *pPg){
       
  2981   if( pPg ){
       
  2982     Pager *pPager = pPg->pPager;
       
  2983     sqlite3PcacheRelease(pPg);
       
  2984     pagerUnlockIfUnused(pPager);
       
  2985   }
       
  2986   return SQLITE_OK;
       
  2987 }
       
  2988 
       
  2989 /*
       
  2990 ** Create a journal file for pPager.  There should already be a RESERVED
       
  2991 ** or EXCLUSIVE lock on the database file when this routine is called.
       
  2992 **
       
  2993 ** Return SQLITE_OK if everything.  Return an error code and release the
       
  2994 ** write lock if anything goes wrong.
       
  2995 */
       
  2996 static int pager_open_journal(Pager *pPager){
       
  2997   sqlite3_vfs *pVfs = pPager->pVfs;
       
  2998   int flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_EXCLUSIVE|SQLITE_OPEN_CREATE);
       
  2999 
       
  3000   int rc;
       
  3001   assert( !MEMDB );
       
  3002   assert( pPager->state>=PAGER_RESERVED );
       
  3003   assert( pPager->useJournal );
       
  3004   assert( pPager->pInJournal==0 );
       
  3005   sqlite3PagerPagecount(pPager, 0);
       
  3006   pPager->pInJournal = sqlite3BitvecCreate(pPager->dbSize);
       
  3007   if( pPager->pInJournal==0 ){
       
  3008     rc = SQLITE_NOMEM;
       
  3009     goto failed_to_open_journal;
       
  3010   }
       
  3011 
       
  3012   if( pPager->journalOpen==0 ){
       
  3013     if( pPager->tempFile ){
       
  3014       flags |= (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL);
       
  3015     }else{
       
  3016       flags |= (SQLITE_OPEN_MAIN_JOURNAL);
       
  3017     }
       
  3018 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
       
  3019     rc = sqlite3JournalOpen(
       
  3020         pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager)
       
  3021     );
       
  3022 #else
       
  3023     rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0);
       
  3024 #endif
       
  3025     assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
       
  3026     pPager->journalOff = 0;
       
  3027     pPager->setMaster = 0;
       
  3028     pPager->journalHdr = 0;
       
  3029     if( rc!=SQLITE_OK ){
       
  3030       if( rc==SQLITE_NOMEM ){
       
  3031         sqlite3OsDelete(pVfs, pPager->zJournal, 0);
       
  3032       }
       
  3033       goto failed_to_open_journal;
       
  3034     }
       
  3035   }
       
  3036   pPager->journalOpen = 1;
       
  3037   pPager->journalStarted = 0;
       
  3038   pPager->needSync = 0;
       
  3039   pPager->nRec = 0;
       
  3040   if( pPager->errCode ){
       
  3041     rc = pPager->errCode;
       
  3042     goto failed_to_open_journal;
       
  3043   }
       
  3044   pPager->origDbSize = pPager->dbSize;
       
  3045 
       
  3046   rc = writeJournalHdr(pPager);
       
  3047 
       
  3048   if( pPager->stmtAutoopen && rc==SQLITE_OK ){
       
  3049     rc = sqlite3PagerStmtBegin(pPager);
       
  3050   }
       
  3051   if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && rc!=SQLITE_IOERR_NOMEM ){
       
  3052     rc = pager_end_transaction(pPager, 0);
       
  3053     if( rc==SQLITE_OK ){
       
  3054       rc = SQLITE_FULL;
       
  3055     }
       
  3056   }
       
  3057   return rc;
       
  3058 
       
  3059 failed_to_open_journal:
       
  3060   sqlite3BitvecDestroy(pPager->pInJournal);
       
  3061   pPager->pInJournal = 0;
       
  3062   return rc;
       
  3063 }
       
  3064 
       
  3065 /*
       
  3066 ** Acquire a write-lock on the database.  The lock is removed when
       
  3067 ** the any of the following happen:
       
  3068 **
       
  3069 **   *  sqlite3PagerCommitPhaseTwo() is called.
       
  3070 **   *  sqlite3PagerRollback() is called.
       
  3071 **   *  sqlite3PagerClose() is called.
       
  3072 **   *  sqlite3PagerUnref() is called to on every outstanding page.
       
  3073 **
       
  3074 ** The first parameter to this routine is a pointer to any open page of the
       
  3075 ** database file.  Nothing changes about the page - it is used merely to
       
  3076 ** acquire a pointer to the Pager structure and as proof that there is
       
  3077 ** already a read-lock on the database.
       
  3078 **
       
  3079 ** The second parameter indicates how much space in bytes to reserve for a
       
  3080 ** master journal file-name at the start of the journal when it is created.
       
  3081 **
       
  3082 ** A journal file is opened if this is not a temporary file.  For temporary
       
  3083 ** files, the opening of the journal file is deferred until there is an
       
  3084 ** actual need to write to the journal.
       
  3085 **
       
  3086 ** If the database is already reserved for writing, this routine is a no-op.
       
  3087 **
       
  3088 ** If exFlag is true, go ahead and get an EXCLUSIVE lock on the file
       
  3089 ** immediately instead of waiting until we try to flush the cache.  The
       
  3090 ** exFlag is ignored if a transaction is already active.
       
  3091 */
       
  3092 int sqlite3PagerBegin(DbPage *pPg, int exFlag){
       
  3093   Pager *pPager = pPg->pPager;
       
  3094   int rc = SQLITE_OK;
       
  3095   assert( pPg->nRef>0 );
       
  3096   assert( pPager->state!=PAGER_UNLOCK );
       
  3097   if( pPager->state==PAGER_SHARED ){
       
  3098     assert( pPager->pInJournal==0 );
       
  3099     sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_IN_JOURNAL);
       
  3100     if( MEMDB ){
       
  3101       pPager->state = PAGER_EXCLUSIVE;
       
  3102       pPager->origDbSize = pPager->dbSize;
       
  3103     }else{
       
  3104       rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
       
  3105       if( rc==SQLITE_OK ){
       
  3106         pPager->state = PAGER_RESERVED;
       
  3107         if( exFlag ){
       
  3108           rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
       
  3109         }
       
  3110       }
       
  3111       if( rc!=SQLITE_OK ){
       
  3112         return rc;
       
  3113       }
       
  3114       pPager->dirtyCache = 0;
       
  3115       PAGERTRACE2("TRANSACTION %d\n", PAGERID(pPager));
       
  3116       if( pPager->useJournal && !pPager->tempFile
       
  3117              && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
       
  3118         rc = pager_open_journal(pPager);
       
  3119       }
       
  3120     }
       
  3121   }else if( pPager->journalOpen && pPager->journalOff==0 ){
       
  3122     /* This happens when the pager was in exclusive-access mode the last
       
  3123     ** time a (read or write) transaction was successfully concluded
       
  3124     ** by this connection. Instead of deleting the journal file it was 
       
  3125     ** kept open and either was truncated to 0 bytes or its header was
       
  3126     ** overwritten with zeros.
       
  3127     */
       
  3128     assert( pPager->nRec==0 );
       
  3129     assert( pPager->origDbSize==0 );
       
  3130     assert( pPager->pInJournal==0 );
       
  3131     sqlite3PagerPagecount(pPager, 0);
       
  3132     pPager->pInJournal = sqlite3BitvecCreate( pPager->dbSize );
       
  3133     if( !pPager->pInJournal ){
       
  3134       rc = SQLITE_NOMEM;
       
  3135     }else{
       
  3136       pPager->origDbSize = pPager->dbSize;
       
  3137       rc = writeJournalHdr(pPager);
       
  3138     }
       
  3139   }
       
  3140   assert( !pPager->journalOpen || pPager->journalOff>0 || rc!=SQLITE_OK );
       
  3141   return rc;
       
  3142 }
       
  3143 
       
  3144 
       
  3145 /*
       
  3146 ** Mark a data page as writeable.  The page is written into the journal 
       
  3147 ** if it is not there already.  This routine must be called before making
       
  3148 ** changes to a page.
       
  3149 **
       
  3150 ** The first time this routine is called, the pager creates a new
       
  3151 ** journal and acquires a RESERVED lock on the database.  If the RESERVED
       
  3152 ** lock could not be acquired, this routine returns SQLITE_BUSY.  The
       
  3153 ** calling routine must check for that return value and be careful not to
       
  3154 ** change any page data until this routine returns SQLITE_OK.
       
  3155 **
       
  3156 ** If the journal file could not be written because the disk is full,
       
  3157 ** then this routine returns SQLITE_FULL and does an immediate rollback.
       
  3158 ** All subsequent write attempts also return SQLITE_FULL until there
       
  3159 ** is a call to sqlite3PagerCommit() or sqlite3PagerRollback() to
       
  3160 ** reset.
       
  3161 */
       
  3162 static int pager_write(PgHdr *pPg){
       
  3163   void *pData = pPg->pData;
       
  3164   Pager *pPager = pPg->pPager;
       
  3165   int rc = SQLITE_OK;
       
  3166 
       
  3167   /* Check for errors
       
  3168   */
       
  3169   if( pPager->errCode ){ 
       
  3170     return pPager->errCode;
       
  3171   }
       
  3172   if( pPager->readOnly ){
       
  3173     return SQLITE_PERM;
       
  3174   }
       
  3175 
       
  3176   assert( !pPager->setMaster );
       
  3177 
       
  3178   CHECK_PAGE(pPg);
       
  3179 
       
  3180   /* If this page was previously acquired with noContent==1, that means
       
  3181   ** we didn't really read in the content of the page.  This can happen
       
  3182   ** (for example) when the page is being moved to the freelist.  But
       
  3183   ** now we are (perhaps) moving the page off of the freelist for
       
  3184   ** reuse and we need to know its original content so that content
       
  3185   ** can be stored in the rollback journal.  So do the read at this
       
  3186   ** time.
       
  3187   */
       
  3188   rc = pager_get_content(pPg);
       
  3189   if( rc ){
       
  3190     return rc;
       
  3191   }
       
  3192 
       
  3193   /* Mark the page as dirty.  If the page has already been written
       
  3194   ** to the journal then we can return right away.
       
  3195   */
       
  3196   sqlite3PcacheMakeDirty(pPg);
       
  3197   if( (pPg->flags&PGHDR_IN_JOURNAL)
       
  3198    && (pageInStatement(pPg) || pPager->stmtInUse==0) 
       
  3199   ){
       
  3200     pPager->dirtyCache = 1;
       
  3201     pPager->dbModified = 1;
       
  3202   }else{
       
  3203 
       
  3204     /* If we get this far, it means that the page needs to be
       
  3205     ** written to the transaction journal or the ckeckpoint journal
       
  3206     ** or both.
       
  3207     **
       
  3208     ** First check to see that the transaction journal exists and
       
  3209     ** create it if it does not.
       
  3210     */
       
  3211     assert( pPager->state!=PAGER_UNLOCK );
       
  3212     rc = sqlite3PagerBegin(pPg, 0);
       
  3213     if( rc!=SQLITE_OK ){
       
  3214       return rc;
       
  3215     }
       
  3216     assert( pPager->state>=PAGER_RESERVED );
       
  3217     if( !pPager->journalOpen && pPager->useJournal
       
  3218           && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
       
  3219       rc = pager_open_journal(pPager);
       
  3220       if( rc!=SQLITE_OK ) return rc;
       
  3221     }
       
  3222     pPager->dirtyCache = 1;
       
  3223     pPager->dbModified = 1;
       
  3224   
       
  3225     /* The transaction journal now exists and we have a RESERVED or an
       
  3226     ** EXCLUSIVE lock on the main database file.  Write the current page to
       
  3227     ** the transaction journal if it is not there already.
       
  3228     */
       
  3229     if( !(pPg->flags&PGHDR_IN_JOURNAL) && (pPager->journalOpen || MEMDB) ){
       
  3230       if( (int)pPg->pgno <= pPager->origDbSize ){
       
  3231         if( MEMDB ){
       
  3232           PAGERTRACE3("JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
       
  3233           rc = sqlite3PcachePreserve(pPg, 0);
       
  3234           if( rc!=SQLITE_OK ){
       
  3235             return rc;
       
  3236           }
       
  3237         }else{
       
  3238           u32 cksum;
       
  3239           char *pData2;
       
  3240 
       
  3241           /* We should never write to the journal file the page that
       
  3242           ** contains the database locks.  The following assert verifies
       
  3243           ** that we do not. */
       
  3244           assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
       
  3245           pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
       
  3246           cksum = pager_cksum(pPager, (u8*)pData2);
       
  3247           rc = write32bits(pPager->jfd, pPager->journalOff, pPg->pgno);
       
  3248           if( rc==SQLITE_OK ){
       
  3249             rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize,
       
  3250                                 pPager->journalOff + 4);
       
  3251             pPager->journalOff += pPager->pageSize+4;
       
  3252           }
       
  3253           if( rc==SQLITE_OK ){
       
  3254             rc = write32bits(pPager->jfd, pPager->journalOff, cksum);
       
  3255             pPager->journalOff += 4;
       
  3256           }
       
  3257           IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, 
       
  3258                    pPager->journalOff, pPager->pageSize));
       
  3259           PAGER_INCR(sqlite3_pager_writej_count);
       
  3260           PAGERTRACE5("JOURNAL %d page %d needSync=%d hash(%08x)\n",
       
  3261                PAGERID(pPager), pPg->pgno, 
       
  3262                ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg));
       
  3263 
       
  3264           /* An error has occured writing to the journal file. The 
       
  3265           ** transaction will be rolled back by the layer above.
       
  3266           */
       
  3267           if( rc!=SQLITE_OK ){
       
  3268             return rc;
       
  3269           }
       
  3270 
       
  3271           pPager->nRec++;
       
  3272           assert( pPager->pInJournal!=0 );
       
  3273           sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
       
  3274           if( !pPager->noSync ){
       
  3275             pPg->flags |= PGHDR_NEED_SYNC;
       
  3276           }
       
  3277           if( pPager->stmtInUse ){
       
  3278             sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
       
  3279           }
       
  3280         }
       
  3281       }else{
       
  3282         if( !pPager->journalStarted && !pPager->noSync ){
       
  3283           pPg->flags |= PGHDR_NEED_SYNC;
       
  3284         }
       
  3285         PAGERTRACE4("APPEND %d page %d needSync=%d\n",
       
  3286                 PAGERID(pPager), pPg->pgno,
       
  3287                ((pPg->flags&PGHDR_NEED_SYNC)?1:0));
       
  3288       }
       
  3289       if( pPg->flags&PGHDR_NEED_SYNC ){
       
  3290         pPager->needSync = 1;
       
  3291       }
       
  3292       pPg->flags |= PGHDR_IN_JOURNAL;
       
  3293     }
       
  3294   
       
  3295     /* If the statement journal is open and the page is not in it,
       
  3296     ** then write the current page to the statement journal.  Note that
       
  3297     ** the statement journal format differs from the standard journal format
       
  3298     ** in that it omits the checksums and the header.
       
  3299     */
       
  3300     if( pPager->stmtInUse 
       
  3301      && !pageInStatement(pPg) 
       
  3302      && (int)pPg->pgno<=pPager->stmtSize 
       
  3303     ){
       
  3304       assert( (pPg->flags&PGHDR_IN_JOURNAL) 
       
  3305                  || (int)pPg->pgno>pPager->origDbSize );
       
  3306       if( MEMDB ){
       
  3307         rc = sqlite3PcachePreserve(pPg, 1);
       
  3308         if( rc!=SQLITE_OK ){
       
  3309           return rc;
       
  3310         }
       
  3311         PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
       
  3312       }else{
       
  3313         i64 offset = pPager->stmtNRec*(4+pPager->pageSize);
       
  3314         char *pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
       
  3315         rc = write32bits(pPager->stfd, offset, pPg->pgno);
       
  3316         if( rc==SQLITE_OK ){
       
  3317           rc = sqlite3OsWrite(pPager->stfd, pData2, pPager->pageSize, offset+4);
       
  3318         }
       
  3319         PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
       
  3320         if( rc!=SQLITE_OK ){
       
  3321           return rc;
       
  3322         }
       
  3323         pPager->stmtNRec++;
       
  3324         assert( pPager->pInStmt!=0 );
       
  3325         sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
       
  3326       }
       
  3327     }
       
  3328   }
       
  3329 
       
  3330   /* Update the database size and return.
       
  3331   */
       
  3332   assert( pPager->state>=PAGER_SHARED );
       
  3333   if( pPager->dbSize<(int)pPg->pgno ){
       
  3334     pPager->dbSize = pPg->pgno;
       
  3335     if( !MEMDB && pPager->dbSize==PENDING_BYTE/pPager->pageSize ){
       
  3336       pPager->dbSize++;
       
  3337     }
       
  3338   }
       
  3339   return rc;
       
  3340 }
       
  3341 
       
  3342 /*
       
  3343 ** This function is used to mark a data-page as writable. It uses 
       
  3344 ** pager_write() to open a journal file (if it is not already open)
       
  3345 ** and write the page *pData to the journal.
       
  3346 **
       
  3347 ** The difference between this function and pager_write() is that this
       
  3348 ** function also deals with the special case where 2 or more pages
       
  3349 ** fit on a single disk sector. In this case all co-resident pages
       
  3350 ** must have been written to the journal file before returning.
       
  3351 */
       
  3352 int sqlite3PagerWrite(DbPage *pDbPage){
       
  3353   int rc = SQLITE_OK;
       
  3354 
       
  3355   PgHdr *pPg = pDbPage;
       
  3356   Pager *pPager = pPg->pPager;
       
  3357   Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
       
  3358 
       
  3359   if( !MEMDB && nPagePerSector>1 ){
       
  3360     Pgno nPageCount;          /* Total number of pages in database file */
       
  3361     Pgno pg1;                 /* First page of the sector pPg is located on. */
       
  3362     int nPage;                /* Number of pages starting at pg1 to journal */
       
  3363     int ii;
       
  3364     int needSync = 0;
       
  3365 
       
  3366     /* Set the doNotSync flag to 1. This is because we cannot allow a journal
       
  3367     ** header to be written between the pages journaled by this function.
       
  3368     */
       
  3369     assert( pPager->doNotSync==0 );
       
  3370     pPager->doNotSync = 1;
       
  3371 
       
  3372     /* This trick assumes that both the page-size and sector-size are
       
  3373     ** an integer power of 2. It sets variable pg1 to the identifier
       
  3374     ** of the first page of the sector pPg is located on.
       
  3375     */
       
  3376     pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1;
       
  3377 
       
  3378     sqlite3PagerPagecount(pPager, (int *)&nPageCount);
       
  3379     if( pPg->pgno>nPageCount ){
       
  3380       nPage = (pPg->pgno - pg1)+1;
       
  3381     }else if( (pg1+nPagePerSector-1)>nPageCount ){
       
  3382       nPage = nPageCount+1-pg1;
       
  3383     }else{
       
  3384       nPage = nPagePerSector;
       
  3385     }
       
  3386     assert(nPage>0);
       
  3387     assert(pg1<=pPg->pgno);
       
  3388     assert((pg1+nPage)>pPg->pgno);
       
  3389 
       
  3390     for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){
       
  3391       Pgno pg = pg1+ii;
       
  3392       PgHdr *pPage;
       
  3393       if( pg==pPg->pgno || !sqlite3BitvecTest(pPager->pInJournal, pg) ){
       
  3394         if( pg!=PAGER_MJ_PGNO(pPager) ){
       
  3395           rc = sqlite3PagerGet(pPager, pg, &pPage);
       
  3396           if( rc==SQLITE_OK ){
       
  3397             rc = pager_write(pPage);
       
  3398             if( pPage->flags&PGHDR_NEED_SYNC ){
       
  3399               needSync = 1;
       
  3400             }
       
  3401             sqlite3PagerUnref(pPage);
       
  3402           }
       
  3403         }
       
  3404       }else if( (pPage = pager_lookup(pPager, pg))!=0 ){
       
  3405         if( pPage->flags&PGHDR_NEED_SYNC ){
       
  3406           needSync = 1;
       
  3407         }
       
  3408         sqlite3PagerUnref(pPage);
       
  3409       }
       
  3410     }
       
  3411 
       
  3412     /* If the PgHdr.needSync flag is set for any of the nPage pages 
       
  3413     ** starting at pg1, then it needs to be set for all of them. Because
       
  3414     ** writing to any of these nPage pages may damage the others, the
       
  3415     ** journal file must contain sync()ed copies of all of them
       
  3416     ** before any of them can be written out to the database file.
       
  3417     */
       
  3418     if( needSync ){
       
  3419       assert( !MEMDB && pPager->noSync==0 );
       
  3420       for(ii=0; ii<nPage && needSync; ii++){
       
  3421         PgHdr *pPage = pager_lookup(pPager, pg1+ii);
       
  3422         if( pPage ) pPage->flags |= PGHDR_NEED_SYNC;
       
  3423         sqlite3PagerUnref(pPage);
       
  3424       }
       
  3425       assert(pPager->needSync);
       
  3426     }
       
  3427 
       
  3428     assert( pPager->doNotSync==1 );
       
  3429     pPager->doNotSync = 0;
       
  3430   }else{
       
  3431     rc = pager_write(pDbPage);
       
  3432   }
       
  3433   return rc;
       
  3434 }
       
  3435 
       
  3436 /*
       
  3437 ** Return TRUE if the page given in the argument was previously passed
       
  3438 ** to sqlite3PagerWrite().  In other words, return TRUE if it is ok
       
  3439 ** to change the content of the page.
       
  3440 */
       
  3441 #ifndef NDEBUG
       
  3442 int sqlite3PagerIswriteable(DbPage *pPg){
       
  3443   return pPg->flags&PGHDR_DIRTY;
       
  3444 }
       
  3445 #endif
       
  3446 
       
  3447 /*
       
  3448 ** A call to this routine tells the pager that it is not necessary to
       
  3449 ** write the information on page pPg back to the disk, even though
       
  3450 ** that page might be marked as dirty.
       
  3451 **
       
  3452 ** The overlying software layer calls this routine when all of the data
       
  3453 ** on the given page is unused.  The pager marks the page as clean so
       
  3454 ** that it does not get written to disk.
       
  3455 **
       
  3456 ** Tests show that this optimization, together with the
       
  3457 ** sqlite3PagerDontRollback() below, more than double the speed
       
  3458 ** of large INSERT operations and quadruple the speed of large DELETEs.
       
  3459 **
       
  3460 ** When this routine is called, set the alwaysRollback flag to true.
       
  3461 ** Subsequent calls to sqlite3PagerDontRollback() for the same page
       
  3462 ** will thereafter be ignored.  This is necessary to avoid a problem
       
  3463 ** where a page with data is added to the freelist during one part of
       
  3464 ** a transaction then removed from the freelist during a later part
       
  3465 ** of the same transaction and reused for some other purpose.  When it
       
  3466 ** is first added to the freelist, this routine is called.  When reused,
       
  3467 ** the sqlite3PagerDontRollback() routine is called.  But because the
       
  3468 ** page contains critical data, we still need to be sure it gets
       
  3469 ** rolled back in spite of the sqlite3PagerDontRollback() call.
       
  3470 */
       
  3471 int sqlite3PagerDontWrite(DbPage *pDbPage){
       
  3472   PgHdr *pPg = pDbPage;
       
  3473   Pager *pPager = pPg->pPager;
       
  3474   int rc;
       
  3475 
       
  3476   if( MEMDB || pPg->pgno>pPager->origDbSize ){
       
  3477     return SQLITE_OK;
       
  3478   }
       
  3479   if( pPager->pAlwaysRollback==0 ){
       
  3480     assert( pPager->pInJournal );
       
  3481     pPager->pAlwaysRollback = sqlite3BitvecCreate(pPager->origDbSize);
       
  3482     if( !pPager->pAlwaysRollback ){
       
  3483       return SQLITE_NOMEM;
       
  3484     }
       
  3485   }
       
  3486   rc = sqlite3BitvecSet(pPager->pAlwaysRollback, pPg->pgno);
       
  3487 
       
  3488   if( rc==SQLITE_OK && (pPg->flags&PGHDR_DIRTY) && !pPager->stmtInUse ){
       
  3489     assert( pPager->state>=PAGER_SHARED );
       
  3490     if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
       
  3491       /* If this pages is the last page in the file and the file has grown
       
  3492       ** during the current transaction, then do NOT mark the page as clean.
       
  3493       ** When the database file grows, we must make sure that the last page
       
  3494       ** gets written at least once so that the disk file will be the correct
       
  3495       ** size. If you do not write this page and the size of the file
       
  3496       ** on the disk ends up being too small, that can lead to database
       
  3497       ** corruption during the next transaction.
       
  3498       */
       
  3499     }else{
       
  3500       PAGERTRACE3("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager));
       
  3501       IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno))
       
  3502       pPg->flags |= PGHDR_DONT_WRITE;
       
  3503 #ifdef SQLITE_CHECK_PAGES
       
  3504       pPg->pageHash = pager_pagehash(pPg);
       
  3505 #endif
       
  3506     }
       
  3507   }
       
  3508   return rc;
       
  3509 }
       
  3510 
       
  3511 /*
       
  3512 ** A call to this routine tells the pager that if a rollback occurs,
       
  3513 ** it is not necessary to restore the data on the given page.  This
       
  3514 ** means that the pager does not have to record the given page in the
       
  3515 ** rollback journal.
       
  3516 **
       
  3517 ** If we have not yet actually read the content of this page (if
       
  3518 ** the PgHdr.needRead flag is set) then this routine acts as a promise
       
  3519 ** that we will never need to read the page content in the future.
       
  3520 ** so the needRead flag can be cleared at this point.
       
  3521 */
       
  3522 void sqlite3PagerDontRollback(DbPage *pPg){
       
  3523   Pager *pPager = pPg->pPager;
       
  3524 
       
  3525   assert( pPager->state>=PAGER_RESERVED );
       
  3526 
       
  3527   /* If the journal file is not open, or DontWrite() has been called on
       
  3528   ** this page (DontWrite() sets the alwaysRollback flag), then this
       
  3529   ** function is a no-op.
       
  3530   */
       
  3531   if( pPager->journalOpen==0 
       
  3532    || sqlite3BitvecTest(pPager->pAlwaysRollback, pPg->pgno)
       
  3533    || pPg->pgno>pPager->origDbSize
       
  3534   ){
       
  3535     return;
       
  3536   }
       
  3537   assert( !MEMDB );    /* For a memdb, pPager->journalOpen is always 0 */
       
  3538 
       
  3539 #ifdef SQLITE_SECURE_DELETE
       
  3540   if( (pPg->flags & PGHDR_IN_JOURNAL)!=0 || (int)pPg->pgno>pPager->origDbSize ){
       
  3541     return;
       
  3542   }
       
  3543 #endif
       
  3544 
       
  3545   /* If SECURE_DELETE is disabled, then there is no way that this
       
  3546   ** routine can be called on a page for which sqlite3PagerDontWrite()
       
  3547   ** has not been previously called during the same transaction.
       
  3548   ** And if DontWrite() has previously been called, the following
       
  3549   ** conditions must be met.
       
  3550   **
       
  3551   ** (Later:)  Not true.  If the database is corrupted by having duplicate
       
  3552   ** pages on the freelist (ex: corrupt9.test) then the following is not
       
  3553   ** necessarily true:
       
  3554   */
       
  3555   /* assert( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ); */
       
  3556 
       
  3557   assert( pPager->pInJournal!=0 );
       
  3558   sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
       
  3559   pPg->flags |= PGHDR_IN_JOURNAL;
       
  3560   pPg->flags &= ~PGHDR_NEED_READ;
       
  3561   if( pPager->stmtInUse ){
       
  3562     assert( pPager->stmtSize >= pPager->origDbSize );
       
  3563     sqlite3BitvecSet(pPager->pInStmt, pPg->pgno);
       
  3564   }
       
  3565   PAGERTRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, PAGERID(pPager));
       
  3566   IOTRACE(("GARBAGE %p %d\n", pPager, pPg->pgno))
       
  3567 }
       
  3568 
       
  3569 
       
  3570 /*
       
  3571 ** This routine is called to increment the database file change-counter,
       
  3572 ** stored at byte 24 of the pager file.
       
  3573 */
       
  3574 static int pager_incr_changecounter(Pager *pPager, int isDirect){
       
  3575   PgHdr *pPgHdr;
       
  3576   u32 change_counter;
       
  3577   int rc = SQLITE_OK;
       
  3578 
       
  3579 #ifndef SQLITE_ENABLE_ATOMIC_WRITE
       
  3580   assert( isDirect==0 );  /* isDirect is only true for atomic writes */
       
  3581 #endif
       
  3582   if( !pPager->changeCountDone ){
       
  3583     /* Open page 1 of the file for writing. */
       
  3584     rc = sqlite3PagerGet(pPager, 1, &pPgHdr);
       
  3585     if( rc!=SQLITE_OK ) return rc;
       
  3586 
       
  3587     if( !isDirect ){
       
  3588       rc = sqlite3PagerWrite(pPgHdr);
       
  3589       if( rc!=SQLITE_OK ){
       
  3590         sqlite3PagerUnref(pPgHdr);
       
  3591         return rc;
       
  3592       }
       
  3593     }
       
  3594 
       
  3595     /* Increment the value just read and write it back to byte 24. */
       
  3596     change_counter = sqlite3Get4byte((u8*)pPager->dbFileVers);
       
  3597     change_counter++;
       
  3598     put32bits(((char*)pPgHdr->pData)+24, change_counter);
       
  3599 
       
  3600 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
       
  3601     if( isDirect && pPager->fd->pMethods ){
       
  3602       const void *zBuf = pPgHdr->pData;
       
  3603       rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0);
       
  3604     }
       
  3605 #endif
       
  3606 
       
  3607     /* Release the page reference. */
       
  3608     sqlite3PagerUnref(pPgHdr);
       
  3609     pPager->changeCountDone = 1;
       
  3610   }
       
  3611   return rc;
       
  3612 }
       
  3613 
       
  3614 /*
       
  3615 ** Sync the pager file to disk.
       
  3616 */
       
  3617 int sqlite3PagerSync(Pager *pPager){
       
  3618   int rc;
       
  3619   if( MEMDB ){
       
  3620     rc = SQLITE_OK;
       
  3621   }else{
       
  3622     rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
       
  3623   }
       
  3624   return rc;
       
  3625 }
       
  3626 
       
  3627 /*
       
  3628 ** Sync the database file for the pager pPager. zMaster points to the name
       
  3629 ** of a master journal file that should be written into the individual
       
  3630 ** journal file. zMaster may be NULL, which is interpreted as no master
       
  3631 ** journal (a single database transaction).
       
  3632 **
       
  3633 ** This routine ensures that the journal is synced, all dirty pages written
       
  3634 ** to the database file and the database file synced. The only thing that
       
  3635 ** remains to commit the transaction is to delete the journal file (or
       
  3636 ** master journal file if specified).
       
  3637 **
       
  3638 ** Note that if zMaster==NULL, this does not overwrite a previous value
       
  3639 ** passed to an sqlite3PagerCommitPhaseOne() call.
       
  3640 **
       
  3641 ** If parameter nTrunc is non-zero, then the pager file is truncated to
       
  3642 ** nTrunc pages (this is used by auto-vacuum databases).
       
  3643 **
       
  3644 ** If the final parameter - noSync - is true, then the database file itself
       
  3645 ** is not synced. The caller must call sqlite3PagerSync() directly to
       
  3646 ** sync the database file before calling CommitPhaseTwo() to delete the
       
  3647 ** journal file in this case.
       
  3648 */
       
  3649 int sqlite3PagerCommitPhaseOne(
       
  3650   Pager *pPager, 
       
  3651   const char *zMaster, 
       
  3652   Pgno nTrunc,
       
  3653   int noSync
       
  3654 ){
       
  3655   int rc = SQLITE_OK;
       
  3656 
       
  3657   if( pPager->errCode ){
       
  3658     return pPager->errCode;
       
  3659   }
       
  3660 
       
  3661   /* If no changes have been made, we can leave the transaction early.
       
  3662   */
       
  3663   if( pPager->dbModified==0 &&
       
  3664         (pPager->journalMode!=PAGER_JOURNALMODE_DELETE ||
       
  3665           pPager->exclusiveMode!=0) ){
       
  3666     assert( pPager->dirtyCache==0 || pPager->journalOpen==0 );
       
  3667     return SQLITE_OK;
       
  3668   }
       
  3669 
       
  3670   PAGERTRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n", 
       
  3671       pPager->zFilename, zMaster, nTrunc);
       
  3672 
       
  3673   /* If this is an in-memory db, or no pages have been written to, or this
       
  3674   ** function has already been called, it is a no-op.
       
  3675   */
       
  3676   if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){
       
  3677     PgHdr *pPg;
       
  3678 
       
  3679 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
       
  3680     /* The atomic-write optimization can be used if all of the
       
  3681     ** following are true:
       
  3682     **
       
  3683     **    + The file-system supports the atomic-write property for
       
  3684     **      blocks of size page-size, and
       
  3685     **    + This commit is not part of a multi-file transaction, and
       
  3686     **    + Exactly one page has been modified and store in the journal file.
       
  3687     **
       
  3688     ** If the optimization can be used, then the journal file will never
       
  3689     ** be created for this transaction.
       
  3690     */
       
  3691     int useAtomicWrite;
       
  3692     pPg = sqlite3PcacheDirtyList(pPager->pPCache);
       
  3693     useAtomicWrite = (
       
  3694         !zMaster && 
       
  3695         pPager->journalOpen &&
       
  3696         pPager->journalOff==jrnlBufferSize(pPager) && 
       
  3697         nTrunc==0 && 
       
  3698         (pPg==0 || pPg->pDirty==0)
       
  3699     );
       
  3700     assert( pPager->journalOpen || pPager->journalMode==PAGER_JOURNALMODE_OFF );
       
  3701     if( useAtomicWrite ){
       
  3702       /* Update the nRec field in the journal file. */
       
  3703       int offset = pPager->journalHdr + sizeof(aJournalMagic);
       
  3704       assert(pPager->nRec==1);
       
  3705       rc = write32bits(pPager->jfd, offset, pPager->nRec);
       
  3706 
       
  3707       /* Update the db file change counter. The following call will modify
       
  3708       ** the in-memory representation of page 1 to include the updated
       
  3709       ** change counter and then write page 1 directly to the database
       
  3710       ** file. Because of the atomic-write property of the host file-system, 
       
  3711       ** this is safe.
       
  3712       */
       
  3713       if( rc==SQLITE_OK ){
       
  3714         rc = pager_incr_changecounter(pPager, 1);
       
  3715       }
       
  3716     }else{
       
  3717       rc = sqlite3JournalCreate(pPager->jfd);
       
  3718     }
       
  3719 
       
  3720     if( !useAtomicWrite && rc==SQLITE_OK )
       
  3721 #endif
       
  3722 
       
  3723     /* If a master journal file name has already been written to the
       
  3724     ** journal file, then no sync is required. This happens when it is
       
  3725     ** written, then the process fails to upgrade from a RESERVED to an
       
  3726     ** EXCLUSIVE lock. The next time the process tries to commit the
       
  3727     ** transaction the m-j name will have already been written.
       
  3728     */
       
  3729     if( !pPager->setMaster ){
       
  3730       rc = pager_incr_changecounter(pPager, 0);
       
  3731       if( rc!=SQLITE_OK ) goto sync_exit;
       
  3732       if( pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
       
  3733 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  3734         if( nTrunc!=0 ){
       
  3735           /* If this transaction has made the database smaller, then all pages
       
  3736           ** being discarded by the truncation must be written to the journal
       
  3737           ** file.
       
  3738           */
       
  3739           Pgno i;
       
  3740           int iSkip = PAGER_MJ_PGNO(pPager);
       
  3741           for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){
       
  3742             if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){
       
  3743               rc = sqlite3PagerGet(pPager, i, &pPg);
       
  3744               if( rc!=SQLITE_OK ) goto sync_exit;
       
  3745               rc = sqlite3PagerWrite(pPg);
       
  3746               sqlite3PagerUnref(pPg);
       
  3747               if( rc!=SQLITE_OK ) goto sync_exit;
       
  3748             }
       
  3749           } 
       
  3750         }
       
  3751 #endif
       
  3752         rc = writeMasterJournal(pPager, zMaster);
       
  3753         if( rc!=SQLITE_OK ) goto sync_exit;
       
  3754         rc = syncJournal(pPager);
       
  3755       }
       
  3756     }
       
  3757     if( rc!=SQLITE_OK ) goto sync_exit;
       
  3758 
       
  3759 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  3760     if( nTrunc!=0 ){
       
  3761       rc = sqlite3PagerTruncate(pPager, nTrunc);
       
  3762       if( rc!=SQLITE_OK ) goto sync_exit;
       
  3763     }
       
  3764 #endif
       
  3765 
       
  3766     /* Write all dirty pages to the database file */
       
  3767     pPg = sqlite3PcacheDirtyList(pPager->pPCache);
       
  3768     rc = pager_write_pagelist(pPg);
       
  3769     if( rc!=SQLITE_OK ){
       
  3770       assert( rc!=SQLITE_IOERR_BLOCKED );
       
  3771       /* The error might have left the dirty list all fouled up here,
       
  3772       ** but that does not matter because if the if the dirty list did
       
  3773       ** get corrupted, then the transaction will roll back and
       
  3774       ** discard the dirty list.  There is an assert in
       
  3775       ** pager_get_all_dirty_pages() that verifies that no attempt
       
  3776       ** is made to use an invalid dirty list.
       
  3777       */
       
  3778       goto sync_exit;
       
  3779     }
       
  3780     sqlite3PcacheCleanAll(pPager->pPCache);
       
  3781 
       
  3782     /* Sync the database file. */
       
  3783     if( !pPager->noSync && !noSync ){
       
  3784       rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
       
  3785     }
       
  3786     IOTRACE(("DBSYNC %p\n", pPager))
       
  3787 
       
  3788     pPager->state = PAGER_SYNCED;
       
  3789   }else if( MEMDB && nTrunc!=0 ){
       
  3790     rc = sqlite3PagerTruncate(pPager, nTrunc);
       
  3791   }
       
  3792 
       
  3793 sync_exit:
       
  3794   if( rc==SQLITE_IOERR_BLOCKED ){
       
  3795     /* pager_incr_changecounter() may attempt to obtain an exclusive
       
  3796      * lock to spill the cache and return IOERR_BLOCKED. But since 
       
  3797      * there is no chance the cache is inconsistent, it is
       
  3798      * better to return SQLITE_BUSY.
       
  3799      */
       
  3800     rc = SQLITE_BUSY;
       
  3801   }
       
  3802   return rc;
       
  3803 }
       
  3804 
       
  3805 
       
  3806 /*
       
  3807 ** Commit all changes to the database and release the write lock.
       
  3808 **
       
  3809 ** If the commit fails for any reason, a rollback attempt is made
       
  3810 ** and an error code is returned.  If the commit worked, SQLITE_OK
       
  3811 ** is returned.
       
  3812 */
       
  3813 int sqlite3PagerCommitPhaseTwo(Pager *pPager){
       
  3814   int rc = SQLITE_OK;
       
  3815 
       
  3816   if( pPager->errCode ){
       
  3817     return pPager->errCode;
       
  3818   }
       
  3819   if( pPager->state<PAGER_RESERVED ){
       
  3820     return SQLITE_ERROR;
       
  3821   }
       
  3822   if( pPager->dbModified==0 &&
       
  3823         (pPager->journalMode!=PAGER_JOURNALMODE_DELETE ||
       
  3824           pPager->exclusiveMode!=0) ){
       
  3825     assert( pPager->dirtyCache==0 || pPager->journalOpen==0 );
       
  3826     return SQLITE_OK;
       
  3827   }
       
  3828   PAGERTRACE2("COMMIT %d\n", PAGERID(pPager));
       
  3829   if( MEMDB ){
       
  3830     sqlite3PcacheCommit(pPager->pPCache, 0);
       
  3831     sqlite3PcacheCleanAll(pPager->pPCache);
       
  3832     sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_IN_JOURNAL);
       
  3833     pPager->state = PAGER_SHARED;
       
  3834   }else{
       
  3835     assert( pPager->state==PAGER_SYNCED || !pPager->dirtyCache );
       
  3836     rc = pager_end_transaction(pPager, pPager->setMaster);
       
  3837     rc = pager_error(pPager, rc);
       
  3838   }
       
  3839   return rc;
       
  3840 }
       
  3841 
       
  3842 /*
       
  3843 ** Rollback all changes.  The database falls back to PAGER_SHARED mode.
       
  3844 ** All in-memory cache pages revert to their original data contents.
       
  3845 ** The journal is deleted.
       
  3846 **
       
  3847 ** This routine cannot fail unless some other process is not following
       
  3848 ** the correct locking protocol or unless some other
       
  3849 ** process is writing trash into the journal file (SQLITE_CORRUPT) or
       
  3850 ** unless a prior malloc() failed (SQLITE_NOMEM).  Appropriate error
       
  3851 ** codes are returned for all these occasions.  Otherwise,
       
  3852 ** SQLITE_OK is returned.
       
  3853 */
       
  3854 int sqlite3PagerRollback(Pager *pPager){
       
  3855   int rc = SQLITE_OK;
       
  3856   PAGERTRACE2("ROLLBACK %d\n", PAGERID(pPager));
       
  3857   if( MEMDB ){
       
  3858     sqlite3PcacheRollback(pPager->pPCache, 1, pPager->xReiniter);
       
  3859     sqlite3PcacheRollback(pPager->pPCache, 0, pPager->xReiniter);
       
  3860     sqlite3PcacheCleanAll(pPager->pPCache);
       
  3861     sqlite3PcacheAssertFlags(pPager->pPCache, 0, PGHDR_IN_JOURNAL);
       
  3862     pPager->dbSize = pPager->origDbSize;
       
  3863     pager_truncate_cache(pPager);
       
  3864     pPager->stmtInUse = 0;
       
  3865     pPager->state = PAGER_SHARED;
       
  3866   }else if( !pPager->dirtyCache || !pPager->journalOpen ){
       
  3867     rc = pager_end_transaction(pPager, pPager->setMaster);
       
  3868   }else if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
       
  3869     if( pPager->state>=PAGER_EXCLUSIVE ){
       
  3870       pager_playback(pPager, 0);
       
  3871     }
       
  3872     rc = pPager->errCode;
       
  3873   }else{
       
  3874     if( pPager->state==PAGER_RESERVED ){
       
  3875       int rc2;
       
  3876       rc = pager_playback(pPager, 0);
       
  3877       rc2 = pager_end_transaction(pPager, pPager->setMaster);
       
  3878       if( rc==SQLITE_OK ){
       
  3879         rc = rc2;
       
  3880       }
       
  3881     }else{
       
  3882       rc = pager_playback(pPager, 0);
       
  3883     }
       
  3884 
       
  3885     pPager->dbSize = -1;
       
  3886 
       
  3887     /* If an error occurs during a ROLLBACK, we can no longer trust the pager
       
  3888     ** cache. So call pager_error() on the way out to make any error 
       
  3889     ** persistent.
       
  3890     */
       
  3891     rc = pager_error(pPager, rc);
       
  3892   }
       
  3893   return rc;
       
  3894 }
       
  3895 
       
  3896 /*
       
  3897 ** Return TRUE if the database file is opened read-only.  Return FALSE
       
  3898 ** if the database is (in theory) writable.
       
  3899 */
       
  3900 int sqlite3PagerIsreadonly(Pager *pPager){
       
  3901   return pPager->readOnly;
       
  3902 }
       
  3903 
       
  3904 /*
       
  3905 ** Return the number of references to the pager.
       
  3906 */
       
  3907 int sqlite3PagerRefcount(Pager *pPager){
       
  3908   return sqlite3PcacheRefCount(pPager->pPCache);
       
  3909 }
       
  3910 
       
  3911 /*
       
  3912 ** Return the number of references to the specified page.
       
  3913 */
       
  3914 int sqlite3PagerPageRefcount(DbPage *pPage){
       
  3915   return sqlite3PcachePageRefcount(pPage);
       
  3916 }
       
  3917 
       
  3918 #ifdef SQLITE_TEST
       
  3919 /*
       
  3920 ** This routine is used for testing and analysis only.
       
  3921 */
       
  3922 int *sqlite3PagerStats(Pager *pPager){
       
  3923   static int a[11];
       
  3924   a[0] = sqlite3PcacheRefCount(pPager->pPCache);
       
  3925   a[1] = sqlite3PcachePagecount(pPager->pPCache);
       
  3926   a[2] = sqlite3PcacheGetCachesize(pPager->pPCache);
       
  3927   a[3] = pPager->dbSize;
       
  3928   a[4] = pPager->state;
       
  3929   a[5] = pPager->errCode;
       
  3930   a[6] = pPager->nHit;
       
  3931   a[7] = pPager->nMiss;
       
  3932   a[8] = 0;  /* Used to be pPager->nOvfl */
       
  3933   a[9] = pPager->nRead;
       
  3934   a[10] = pPager->nWrite;
       
  3935   return a;
       
  3936 }
       
  3937 int sqlite3PagerIsMemdb(Pager *pPager){
       
  3938   return MEMDB;
       
  3939 }
       
  3940 #endif
       
  3941 
       
  3942 /*
       
  3943 ** Set the statement rollback point.
       
  3944 **
       
  3945 ** This routine should be called with the transaction journal already
       
  3946 ** open.  A new statement journal is created that can be used to rollback
       
  3947 ** changes of a single SQL command within a larger transaction.
       
  3948 */
       
  3949 static int pagerStmtBegin(Pager *pPager){
       
  3950   int rc;
       
  3951   assert( !pPager->stmtInUse );
       
  3952   assert( pPager->state>=PAGER_SHARED );
       
  3953   assert( pPager->dbSize>=0 );
       
  3954   PAGERTRACE2("STMT-BEGIN %d\n", PAGERID(pPager));
       
  3955   if( MEMDB ){
       
  3956     pPager->stmtInUse = 1;
       
  3957     pPager->stmtSize = pPager->dbSize;
       
  3958     return SQLITE_OK;
       
  3959   }
       
  3960   if( !pPager->journalOpen ){
       
  3961     pPager->stmtAutoopen = 1;
       
  3962     return SQLITE_OK;
       
  3963   }
       
  3964   assert( pPager->journalOpen );
       
  3965   assert( pPager->pInStmt==0 );
       
  3966   pPager->pInStmt = sqlite3BitvecCreate(pPager->dbSize);
       
  3967   if( pPager->pInStmt==0 ){
       
  3968     /* sqlite3OsLock(pPager->fd, SHARED_LOCK); */
       
  3969     return SQLITE_NOMEM;
       
  3970   }
       
  3971   pPager->stmtJSize = pPager->journalOff;
       
  3972   pPager->stmtSize = pPager->dbSize;
       
  3973   pPager->stmtHdrOff = 0;
       
  3974   pPager->stmtCksum = pPager->cksumInit;
       
  3975   if( !pPager->stmtOpen ){
       
  3976     rc = sqlite3PagerOpentemp(pPager, pPager->stfd, SQLITE_OPEN_SUBJOURNAL);
       
  3977     if( rc ){
       
  3978       goto stmt_begin_failed;
       
  3979     }
       
  3980     pPager->stmtOpen = 1;
       
  3981     pPager->stmtNRec = 0;
       
  3982   }
       
  3983   pPager->stmtInUse = 1;
       
  3984   return SQLITE_OK;
       
  3985  
       
  3986 stmt_begin_failed:
       
  3987   if( pPager->pInStmt ){
       
  3988     sqlite3BitvecDestroy(pPager->pInStmt);
       
  3989     pPager->pInStmt = 0;
       
  3990   }
       
  3991   return rc;
       
  3992 }
       
  3993 int sqlite3PagerStmtBegin(Pager *pPager){
       
  3994   int rc;
       
  3995   rc = pagerStmtBegin(pPager);
       
  3996   return rc;
       
  3997 }
       
  3998 
       
  3999 /*
       
  4000 ** Commit a statement.
       
  4001 */
       
  4002 int sqlite3PagerStmtCommit(Pager *pPager){
       
  4003   if( pPager->stmtInUse ){
       
  4004     PAGERTRACE2("STMT-COMMIT %d\n", PAGERID(pPager));
       
  4005     if( !MEMDB ){
       
  4006       sqlite3BitvecDestroy(pPager->pInStmt);
       
  4007       pPager->pInStmt = 0;
       
  4008     }else{
       
  4009       sqlite3PcacheCommit(pPager->pPCache, 1);
       
  4010     }
       
  4011     pPager->stmtNRec = 0;
       
  4012     pPager->stmtInUse = 0;
       
  4013   }
       
  4014   pPager->stmtAutoopen = 0;
       
  4015   return SQLITE_OK;
       
  4016 }
       
  4017 
       
  4018 /*
       
  4019 ** Rollback a statement.
       
  4020 */
       
  4021 int sqlite3PagerStmtRollback(Pager *pPager){
       
  4022   int rc;
       
  4023   if( pPager->stmtInUse ){
       
  4024     PAGERTRACE2("STMT-ROLLBACK %d\n", PAGERID(pPager));
       
  4025     if( MEMDB ){
       
  4026       sqlite3PcacheRollback(pPager->pPCache, 1, pPager->xReiniter);
       
  4027       pPager->dbSize = pPager->stmtSize;
       
  4028       pager_truncate_cache(pPager);
       
  4029       rc = SQLITE_OK;
       
  4030     }else{
       
  4031       rc = pager_stmt_playback(pPager);
       
  4032     }
       
  4033     sqlite3PagerStmtCommit(pPager);
       
  4034   }else{
       
  4035     rc = SQLITE_OK;
       
  4036   }
       
  4037   pPager->stmtAutoopen = 0;
       
  4038   return rc;
       
  4039 }
       
  4040 
       
  4041 /*
       
  4042 ** Return the full pathname of the database file.
       
  4043 */
       
  4044 const char *sqlite3PagerFilename(Pager *pPager){
       
  4045   return pPager->zFilename;
       
  4046 }
       
  4047 
       
  4048 /*
       
  4049 ** Return the VFS structure for the pager.
       
  4050 */
       
  4051 const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){
       
  4052   return pPager->pVfs;
       
  4053 }
       
  4054 
       
  4055 /*
       
  4056 ** Return the file handle for the database file associated
       
  4057 ** with the pager.  This might return NULL if the file has
       
  4058 ** not yet been opened.
       
  4059 */
       
  4060 sqlite3_file *sqlite3PagerFile(Pager *pPager){
       
  4061   return pPager->fd;
       
  4062 }
       
  4063 
       
  4064 /*
       
  4065 ** Return the directory of the database file.
       
  4066 */
       
  4067 const char *sqlite3PagerDirname(Pager *pPager){
       
  4068   return pPager->zDirectory;
       
  4069 }
       
  4070 
       
  4071 /*
       
  4072 ** Return the full pathname of the journal file.
       
  4073 */
       
  4074 const char *sqlite3PagerJournalname(Pager *pPager){
       
  4075   return pPager->zJournal;
       
  4076 }
       
  4077 
       
  4078 /*
       
  4079 ** Return true if fsync() calls are disabled for this pager.  Return FALSE
       
  4080 ** if fsync()s are executed normally.
       
  4081 */
       
  4082 int sqlite3PagerNosync(Pager *pPager){
       
  4083   return pPager->noSync;
       
  4084 }
       
  4085 
       
  4086 #ifdef SQLITE_HAS_CODEC
       
  4087 /*
       
  4088 ** Set the codec for this pager
       
  4089 */
       
  4090 void sqlite3PagerSetCodec(
       
  4091   Pager *pPager,
       
  4092   void *(*xCodec)(void*,void*,Pgno,int),
       
  4093   void *pCodecArg
       
  4094 ){
       
  4095   pPager->xCodec = xCodec;
       
  4096   pPager->pCodecArg = pCodecArg;
       
  4097 }
       
  4098 #endif
       
  4099 
       
  4100 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  4101 /*
       
  4102 ** Move the page pPg to location pgno in the file.
       
  4103 **
       
  4104 ** There must be no references to the page previously located at
       
  4105 ** pgno (which we call pPgOld) though that page is allowed to be
       
  4106 ** in cache.  If the page previously located at pgno is not already
       
  4107 ** in the rollback journal, it is not put there by by this routine.
       
  4108 **
       
  4109 ** References to the page pPg remain valid. Updating any
       
  4110 ** meta-data associated with pPg (i.e. data stored in the nExtra bytes
       
  4111 ** allocated along with the page) is the responsibility of the caller.
       
  4112 **
       
  4113 ** A transaction must be active when this routine is called. It used to be
       
  4114 ** required that a statement transaction was not active, but this restriction
       
  4115 ** has been removed (CREATE INDEX needs to move a page when a statement
       
  4116 ** transaction is active).
       
  4117 **
       
  4118 ** If the fourth argument, isCommit, is non-zero, then this page is being
       
  4119 ** moved as part of a database reorganization just before the transaction 
       
  4120 ** is being committed. In this case, it is guaranteed that the database page 
       
  4121 ** pPg refers to will not be written to again within this transaction.
       
  4122 */
       
  4123 int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){
       
  4124   PgHdr *pPgOld;  /* The page being overwritten. */
       
  4125   Pgno needSyncPgno = 0;
       
  4126 
       
  4127   assert( pPg->nRef>0 );
       
  4128 
       
  4129   PAGERTRACE5("MOVE %d page %d (needSync=%d) moves to %d\n", 
       
  4130       PAGERID(pPager), pPg->pgno, (pPg->flags&PGHDR_NEED_SYNC)?1:0, pgno);
       
  4131   IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno))
       
  4132 
       
  4133   pager_get_content(pPg);
       
  4134 
       
  4135   /* If the journal needs to be sync()ed before page pPg->pgno can
       
  4136   ** be written to, store pPg->pgno in local variable needSyncPgno.
       
  4137   **
       
  4138   ** If the isCommit flag is set, there is no need to remember that
       
  4139   ** the journal needs to be sync()ed before database page pPg->pgno 
       
  4140   ** can be written to. The caller has already promised not to write to it.
       
  4141   */
       
  4142   if( (pPg->flags&PGHDR_NEED_SYNC) && !isCommit ){
       
  4143     needSyncPgno = pPg->pgno;
       
  4144     assert( (pPg->flags&PGHDR_IN_JOURNAL) || (int)pgno>pPager->origDbSize );
       
  4145     assert( pPg->flags&PGHDR_DIRTY );
       
  4146     assert( pPager->needSync );
       
  4147   }
       
  4148 
       
  4149   /* If the cache contains a page with page-number pgno, remove it
       
  4150   ** from its hash chain. Also, if the PgHdr.needSync was set for 
       
  4151   ** page pgno before the 'move' operation, it needs to be retained 
       
  4152   ** for the page moved there.
       
  4153   */
       
  4154   pPg->flags &= ~(PGHDR_NEED_SYNC|PGHDR_IN_JOURNAL);
       
  4155   pPgOld = pager_lookup(pPager, pgno);
       
  4156   assert( !pPgOld || pPgOld->nRef==1 );
       
  4157   if( pPgOld ){
       
  4158     pPg->flags |= (pPgOld->flags&PGHDR_NEED_SYNC);
       
  4159   }
       
  4160   if( sqlite3BitvecTest(pPager->pInJournal, pgno) ){
       
  4161     assert( !MEMDB );
       
  4162     pPg->flags |= PGHDR_IN_JOURNAL;
       
  4163   }
       
  4164 
       
  4165   sqlite3PcacheMove(pPg, pgno);
       
  4166   if( pPgOld ){
       
  4167     sqlite3PcacheMove(pPgOld, 0);
       
  4168     sqlite3PcacheRelease(pPgOld);
       
  4169   }
       
  4170 
       
  4171   sqlite3PcacheMakeDirty(pPg);
       
  4172   pPager->dirtyCache = 1;
       
  4173   pPager->dbModified = 1;
       
  4174 
       
  4175   if( needSyncPgno ){
       
  4176     /* If needSyncPgno is non-zero, then the journal file needs to be 
       
  4177     ** sync()ed before any data is written to database file page needSyncPgno.
       
  4178     ** Currently, no such page exists in the page-cache and the 
       
  4179     ** "is journaled" bitvec flag has been set. This needs to be remedied by
       
  4180     ** loading the page into the pager-cache and setting the PgHdr.needSync 
       
  4181     ** flag.
       
  4182     **
       
  4183     ** If the attempt to load the page into the page-cache fails, (due
       
  4184     ** to a malloc() or IO failure), clear the bit in the pInJournal[]
       
  4185     ** array. Otherwise, if the page is loaded and written again in
       
  4186     ** this transaction, it may be written to the database file before
       
  4187     ** it is synced into the journal file. This way, it may end up in
       
  4188     ** the journal file twice, but that is not a problem.
       
  4189     **
       
  4190     ** The sqlite3PagerGet() call may cause the journal to sync. So make
       
  4191     ** sure the Pager.needSync flag is set too.
       
  4192     */
       
  4193     int rc;
       
  4194     PgHdr *pPgHdr;
       
  4195     assert( pPager->needSync );
       
  4196     rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr);
       
  4197     if( rc!=SQLITE_OK ){
       
  4198       if( pPager->pInJournal && (int)needSyncPgno<=pPager->origDbSize ){
       
  4199         sqlite3BitvecClear(pPager->pInJournal, needSyncPgno);
       
  4200       }
       
  4201       return rc;
       
  4202     }
       
  4203     pPager->needSync = 1;
       
  4204     assert( pPager->noSync==0 && !MEMDB );
       
  4205     pPgHdr->flags |= PGHDR_NEED_SYNC;
       
  4206     pPgHdr->flags |= PGHDR_IN_JOURNAL;
       
  4207     sqlite3PcacheMakeDirty(pPgHdr);
       
  4208     sqlite3PagerUnref(pPgHdr);
       
  4209   }
       
  4210 
       
  4211   return SQLITE_OK;
       
  4212 }
       
  4213 #endif
       
  4214 
       
  4215 /*
       
  4216 ** Return a pointer to the data for the specified page.
       
  4217 */
       
  4218 void *sqlite3PagerGetData(DbPage *pPg){
       
  4219   assert( pPg->nRef>0 || pPg->pPager->memDb );
       
  4220   return pPg->pData;
       
  4221 }
       
  4222 
       
  4223 /*
       
  4224 ** Return a pointer to the Pager.nExtra bytes of "extra" space 
       
  4225 ** allocated along with the specified page.
       
  4226 */
       
  4227 void *sqlite3PagerGetExtra(DbPage *pPg){
       
  4228   Pager *pPager = pPg->pPager;
       
  4229   return (pPager?pPg->pExtra:0);
       
  4230 }
       
  4231 
       
  4232 /*
       
  4233 ** Get/set the locking-mode for this pager. Parameter eMode must be one
       
  4234 ** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or 
       
  4235 ** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then
       
  4236 ** the locking-mode is set to the value specified.
       
  4237 **
       
  4238 ** The returned value is either PAGER_LOCKINGMODE_NORMAL or
       
  4239 ** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated)
       
  4240 ** locking-mode.
       
  4241 */
       
  4242 int sqlite3PagerLockingMode(Pager *pPager, int eMode){
       
  4243   assert( eMode==PAGER_LOCKINGMODE_QUERY
       
  4244             || eMode==PAGER_LOCKINGMODE_NORMAL
       
  4245             || eMode==PAGER_LOCKINGMODE_EXCLUSIVE );
       
  4246   assert( PAGER_LOCKINGMODE_QUERY<0 );
       
  4247   assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 );
       
  4248   if( eMode>=0 && !pPager->tempFile ){
       
  4249     pPager->exclusiveMode = eMode;
       
  4250   }
       
  4251   return (int)pPager->exclusiveMode;
       
  4252 }
       
  4253 
       
  4254 /*
       
  4255 ** Get/set the journal-mode for this pager. Parameter eMode must be one of:
       
  4256 **
       
  4257 **    PAGER_JOURNALMODE_QUERY
       
  4258 **    PAGER_JOURNALMODE_DELETE
       
  4259 **    PAGER_JOURNALMODE_TRUNCATE
       
  4260 **    PAGER_JOURNALMODE_PERSIST
       
  4261 **    PAGER_JOURNALMODE_OFF
       
  4262 **
       
  4263 ** If the parameter is not _QUERY, then the journal-mode is set to the
       
  4264 ** value specified.
       
  4265 **
       
  4266 ** The returned indicate the current (possibly updated)
       
  4267 ** journal-mode.
       
  4268 */
       
  4269 int sqlite3PagerJournalMode(Pager *pPager, int eMode){
       
  4270   assert( eMode==PAGER_JOURNALMODE_QUERY
       
  4271             || eMode==PAGER_JOURNALMODE_DELETE
       
  4272             || eMode==PAGER_JOURNALMODE_TRUNCATE
       
  4273             || eMode==PAGER_JOURNALMODE_PERSIST
       
  4274             || eMode==PAGER_JOURNALMODE_OFF );
       
  4275   assert( PAGER_JOURNALMODE_QUERY<0 );
       
  4276   if( eMode>=0 ){
       
  4277     pPager->journalMode = eMode;
       
  4278   }else{
       
  4279     assert( eMode==PAGER_JOURNALMODE_QUERY );
       
  4280   }
       
  4281   return (int)pPager->journalMode;
       
  4282 }
       
  4283 
       
  4284 /*
       
  4285 ** Get/set the size-limit used for persistent journal files.
       
  4286 */
       
  4287 i64 sqlite3PagerJournalSizeLimit(Pager *pPager, i64 iLimit){
       
  4288   if( iLimit>=-1 ){
       
  4289     pPager->journalSizeLimit = iLimit;
       
  4290   }
       
  4291   return pPager->journalSizeLimit;
       
  4292 }
       
  4293 
       
  4294 #endif /* SQLITE_OMIT_DISKIO */