webengine/webkitutils/SqliteSymbian/pager.c
changeset 0 dd21522fd290
equal deleted inserted replaced
-1:000000000000 0:dd21522fd290
       
     1 /*
       
     2 ** 2001 September 15
       
     3 **
       
     4 ** The author disclaims copyright to this source code.  In place of
       
     5 ** a legal notice, here is a blessing:
       
     6 **
       
     7 **    May you do good and not evil.
       
     8 **    May you find forgiveness for yourself and forgive others.
       
     9 **    May you share freely, never taking more than you give.
       
    10 **
       
    11 *************************************************************************
       
    12 ** This is the implementation of the page cache subsystem or "pager".
       
    13 ** 
       
    14 ** The pager is used to access a database disk file.  It implements
       
    15 ** atomic commit and rollback through the use of a journal file that
       
    16 ** is separate from the database file.  The pager also implements file
       
    17 ** locking to prevent two processes from writing the same database
       
    18 ** file simultaneously, or one process from reading the database while
       
    19 ** another is writing.
       
    20 **
       
    21 ** @(#) $Id: pager.c,v 1.271 2006/08/08 13:51:43 drh Exp $
       
    22 */
       
    23 #ifndef SQLITE_OMIT_DISKIO
       
    24 #include "sqliteInt.h"
       
    25 #include "os.h"
       
    26 #include "pager.h"
       
    27 #include <assert.h>
       
    28 #include <string.h>
       
    29 
       
    30 /*
       
    31 ** Macros for troubleshooting.  Normally turned off
       
    32 */
       
    33 #if 0
       
    34 #define TRACE1(X)       sqlite3DebugPrintf(X)
       
    35 #define TRACE2(X,Y)     sqlite3DebugPrintf(X,Y)
       
    36 #define TRACE3(X,Y,Z)   sqlite3DebugPrintf(X,Y,Z)
       
    37 #define TRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W)
       
    38 #define TRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V)
       
    39 #else
       
    40 #define TRACE1(X)
       
    41 #define TRACE2(X,Y)
       
    42 #define TRACE3(X,Y,Z)
       
    43 #define TRACE4(X,Y,Z,W)
       
    44 #define TRACE5(X,Y,Z,W,V)
       
    45 #endif
       
    46 
       
    47 /*
       
    48 ** The following two macros are used within the TRACEX() macros above
       
    49 ** to print out file-descriptors. 
       
    50 **
       
    51 ** PAGERID() takes a pointer to a Pager struct as it's argument. The
       
    52 ** associated file-descriptor is returned. FILEHANDLEID() takes an OsFile
       
    53 ** struct as it's argument.
       
    54 */
       
    55 #define PAGERID(p) ((int)(p->fd))
       
    56 #define FILEHANDLEID(fd) ((int)fd)
       
    57 
       
    58 /*
       
    59 ** The page cache as a whole is always in one of the following
       
    60 ** states:
       
    61 **
       
    62 **   PAGER_UNLOCK        The page cache is not currently reading or 
       
    63 **                       writing the database file.  There is no
       
    64 **                       data held in memory.  This is the initial
       
    65 **                       state.
       
    66 **
       
    67 **   PAGER_SHARED        The page cache is reading the database.
       
    68 **                       Writing is not permitted.  There can be
       
    69 **                       multiple readers accessing the same database
       
    70 **                       file at the same time.
       
    71 **
       
    72 **   PAGER_RESERVED      This process has reserved the database for writing
       
    73 **                       but has not yet made any changes.  Only one process
       
    74 **                       at a time can reserve the database.  The original
       
    75 **                       database file has not been modified so other
       
    76 **                       processes may still be reading the on-disk
       
    77 **                       database file.
       
    78 **
       
    79 **   PAGER_EXCLUSIVE     The page cache is writing the database.
       
    80 **                       Access is exclusive.  No other processes or
       
    81 **                       threads can be reading or writing while one
       
    82 **                       process is writing.
       
    83 **
       
    84 **   PAGER_SYNCED        The pager moves to this state from PAGER_EXCLUSIVE
       
    85 **                       after all dirty pages have been written to the
       
    86 **                       database file and the file has been synced to
       
    87 **                       disk. All that remains to do is to remove the
       
    88 **                       journal file and the transaction will be
       
    89 **                       committed.
       
    90 **
       
    91 ** The page cache comes up in PAGER_UNLOCK.  The first time a
       
    92 ** sqlite3pager_get() occurs, the state transitions to PAGER_SHARED.
       
    93 ** After all pages have been released using sqlite_page_unref(),
       
    94 ** the state transitions back to PAGER_UNLOCK.  The first time
       
    95 ** that sqlite3pager_write() is called, the state transitions to
       
    96 ** PAGER_RESERVED.  (Note that sqlite_page_write() can only be
       
    97 ** called on an outstanding page which means that the pager must
       
    98 ** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
       
    99 ** The transition to PAGER_EXCLUSIVE occurs when before any changes
       
   100 ** are made to the database file.  After an sqlite3pager_rollback()
       
   101 ** or sqlite_pager_commit(), the state goes back to PAGER_SHARED.
       
   102 */
       
   103 #define PAGER_UNLOCK      0
       
   104 #define PAGER_SHARED      1   /* same as SHARED_LOCK */
       
   105 #define PAGER_RESERVED    2   /* same as RESERVED_LOCK */
       
   106 #define PAGER_EXCLUSIVE   4   /* same as EXCLUSIVE_LOCK */
       
   107 #define PAGER_SYNCED      5
       
   108 
       
   109 /*
       
   110 ** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time,
       
   111 ** then failed attempts to get a reserved lock will invoke the busy callback.
       
   112 ** This is off by default.  To see why, consider the following scenario:
       
   113 ** 
       
   114 ** Suppose thread A already has a shared lock and wants a reserved lock.
       
   115 ** Thread B already has a reserved lock and wants an exclusive lock.  If
       
   116 ** both threads are using their busy callbacks, it might be a long time
       
   117 ** be for one of the threads give up and allows the other to proceed.
       
   118 ** But if the thread trying to get the reserved lock gives up quickly
       
   119 ** (if it never invokes its busy callback) then the contention will be
       
   120 ** resolved quickly.
       
   121 */
       
   122 #ifndef SQLITE_BUSY_RESERVED_LOCK
       
   123 # define SQLITE_BUSY_RESERVED_LOCK 0
       
   124 #endif
       
   125 
       
   126 /*
       
   127 ** This macro rounds values up so that if the value is an address it
       
   128 ** is guaranteed to be an address that is aligned to an 8-byte boundary.
       
   129 */
       
   130 #define FORCE_ALIGNMENT(X)   (((X)+7)&~7)
       
   131 
       
   132 /*
       
   133 ** Each in-memory image of a page begins with the following header.
       
   134 ** This header is only visible to this pager module.  The client
       
   135 ** code that calls pager sees only the data that follows the header.
       
   136 **
       
   137 ** Client code should call sqlite3pager_write() on a page prior to making
       
   138 ** any modifications to that page.  The first time sqlite3pager_write()
       
   139 ** is called, the original page contents are written into the rollback
       
   140 ** journal and PgHdr.inJournal and PgHdr.needSync are set.  Later, once
       
   141 ** the journal page has made it onto the disk surface, PgHdr.needSync
       
   142 ** is cleared.  The modified page cannot be written back into the original
       
   143 ** database file until the journal pages has been synced to disk and the
       
   144 ** PgHdr.needSync has been cleared.
       
   145 **
       
   146 ** The PgHdr.dirty flag is set when sqlite3pager_write() is called and
       
   147 ** is cleared again when the page content is written back to the original
       
   148 ** database file.
       
   149 */
       
   150 typedef struct PgHdr PgHdr;
       
   151 struct PgHdr {
       
   152   Pager *pPager;                 /* The pager to which this page belongs */
       
   153   Pgno pgno;                     /* The page number for this page */
       
   154   PgHdr *pNextHash, *pPrevHash;  /* Hash collision chain for PgHdr.pgno */
       
   155   PgHdr *pNextFree, *pPrevFree;  /* Freelist of pages where nRef==0 */
       
   156   PgHdr *pNextAll;               /* A list of all pages */
       
   157   PgHdr *pNextStmt, *pPrevStmt;  /* List of pages in the statement journal */
       
   158   u8 inJournal;                  /* TRUE if has been written to journal */
       
   159   u8 inStmt;                     /* TRUE if in the statement subjournal */
       
   160   u8 dirty;                      /* TRUE if we need to write back changes */
       
   161   u8 needSync;                   /* Sync journal before writing this page */
       
   162   u8 alwaysRollback;             /* Disable dont_rollback() for this page */
       
   163   short int nRef;                /* Number of users of this page */
       
   164   PgHdr *pDirty, *pPrevDirty;    /* Dirty pages */
       
   165   u32 notUsed;                   /* Buffer space */
       
   166 #ifdef SQLITE_CHECK_PAGES
       
   167   u32 pageHash;
       
   168 #endif
       
   169   /* pPager->pageSize bytes of page data follow this header */
       
   170   /* Pager.nExtra bytes of local data follow the page data */
       
   171 };
       
   172 
       
   173 /*
       
   174 ** For an in-memory only database, some extra information is recorded about
       
   175 ** each page so that changes can be rolled back.  (Journal files are not
       
   176 ** used for in-memory databases.)  The following information is added to
       
   177 ** the end of every EXTRA block for in-memory databases.
       
   178 **
       
   179 ** This information could have been added directly to the PgHdr structure.
       
   180 ** But then it would take up an extra 8 bytes of storage on every PgHdr
       
   181 ** even for disk-based databases.  Splitting it out saves 8 bytes.  This
       
   182 ** is only a savings of 0.8% but those percentages add up.
       
   183 */
       
   184 typedef struct PgHistory PgHistory;
       
   185 struct PgHistory {
       
   186   u8 *pOrig;     /* Original page text.  Restore to this on a full rollback */
       
   187   u8 *pStmt;     /* Text as it was at the beginning of the current statement */
       
   188 };
       
   189 
       
   190 /*
       
   191 ** A macro used for invoking the codec if there is one
       
   192 */
       
   193 #ifdef SQLITE_HAS_CODEC
       
   194 # define CODEC1(P,D,N,X) if( P->xCodec!=0 ){ P->xCodec(P->pCodecArg,D,N,X); }
       
   195 # define CODEC2(P,D,N,X) ((char*)(P->xCodec!=0?P->xCodec(P->pCodecArg,D,N,X):D))
       
   196 #else
       
   197 # define CODEC1(P,D,N,X) /* NO-OP */
       
   198 # define CODEC2(P,D,N,X) ((char*)D)
       
   199 #endif
       
   200 
       
   201 /*
       
   202 ** Convert a pointer to a PgHdr into a pointer to its data
       
   203 ** and back again.
       
   204 */
       
   205 #define PGHDR_TO_DATA(P)  ((void*)(&(P)[1]))
       
   206 #define DATA_TO_PGHDR(D)  (&((PgHdr*)(D))[-1])
       
   207 #define PGHDR_TO_EXTRA(G,P) ((void*)&((char*)(&(G)[1]))[(P)->pageSize])
       
   208 #define PGHDR_TO_HIST(P,PGR)  \
       
   209             ((PgHistory*)&((char*)(&(P)[1]))[(PGR)->pageSize+(PGR)->nExtra])
       
   210 
       
   211 /*
       
   212 ** A open page cache is an instance of the following structure.
       
   213 **
       
   214 ** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, SQLITE_PROTOCOL
       
   215 ** or SQLITE_FULL. Once one of the first three errors occurs, it persists
       
   216 ** and is returned as the result of every major pager API call.  The
       
   217 ** SQLITE_FULL return code is slightly different. It persists only until the
       
   218 ** next successful rollback is performed on the pager cache. Also,
       
   219 ** SQLITE_FULL does not affect the sqlite3pager_get() and sqlite3pager_lookup()
       
   220 ** APIs, they may still be used successfully.
       
   221 */
       
   222 struct Pager {
       
   223   u8 journalOpen;             /* True if journal file descriptors is valid */
       
   224   u8 journalStarted;          /* True if header of journal is synced */
       
   225   u8 useJournal;              /* Use a rollback journal on this file */
       
   226   u8 noReadlock;              /* Do not bother to obtain readlocks */
       
   227   u8 stmtOpen;                /* True if the statement subjournal is open */
       
   228   u8 stmtInUse;               /* True we are in a statement subtransaction */
       
   229   u8 stmtAutoopen;            /* Open stmt journal when main journal is opened*/
       
   230   u8 noSync;                  /* Do not sync the journal if true */
       
   231   u8 fullSync;                /* Do extra syncs of the journal for robustness */
       
   232   u8 full_fsync;              /* Use F_FULLFSYNC when available */
       
   233   u8 state;                   /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */
       
   234   u8 errCode;                 /* One of several kinds of errors */
       
   235   u8 tempFile;                /* zFilename is a temporary file */
       
   236   u8 readOnly;                /* True for a read-only database */
       
   237   u8 needSync;                /* True if an fsync() is needed on the journal */
       
   238   u8 dirtyCache;              /* True if cached pages have changed */
       
   239   u8 alwaysRollback;          /* Disable dont_rollback() for all pages */
       
   240   u8 memDb;                   /* True to inhibit all file I/O */
       
   241   u8 setMaster;               /* True if a m-j name has been written to jrnl */
       
   242   int dbSize;                 /* Number of pages in the file */
       
   243   int origDbSize;             /* dbSize before the current change */
       
   244   int stmtSize;               /* Size of database (in pages) at stmt_begin() */
       
   245   int nRec;                   /* Number of pages written to the journal */
       
   246   u32 cksumInit;              /* Quasi-random value added to every checksum */
       
   247   int stmtNRec;               /* Number of records in stmt subjournal */
       
   248   int nExtra;                 /* Add this many bytes to each in-memory page */
       
   249   int pageSize;               /* Number of bytes in a page */
       
   250   int nPage;                  /* Total number of in-memory pages */
       
   251   int nMaxPage;               /* High water mark of nPage */
       
   252   int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */
       
   253   int mxPage;                 /* Maximum number of pages to hold in cache */
       
   254   u8 *aInJournal;             /* One bit for each page in the database file */
       
   255   u8 *aInStmt;                /* One bit for each page in the database */
       
   256   char *zFilename;            /* Name of the database file */
       
   257   char *zJournal;             /* Name of the journal file */
       
   258   char *zDirectory;           /* Directory hold database and journal files */
       
   259   OsFile *fd, *jfd;           /* File descriptors for database and journal */
       
   260   OsFile *stfd;               /* File descriptor for the statement subjournal*/
       
   261   BusyHandler *pBusyHandler;  /* Pointer to sqlite.busyHandler */
       
   262   PgHdr *pFirst, *pLast;      /* List of free pages */
       
   263   PgHdr *pFirstSynced;        /* First free page with PgHdr.needSync==0 */
       
   264   PgHdr *pAll;                /* List of all pages */
       
   265   PgHdr *pStmt;               /* List of pages in the statement subjournal */
       
   266   PgHdr *pDirty;              /* List of all dirty pages */
       
   267   i64 journalOff;             /* Current byte offset in the journal file */
       
   268   i64 journalHdr;             /* Byte offset to previous journal header */
       
   269   i64 stmtHdrOff;             /* First journal header written this statement */
       
   270   i64 stmtCksum;              /* cksumInit when statement was started */
       
   271   i64 stmtJSize;              /* Size of journal at stmt_begin() */
       
   272   int sectorSize;             /* Assumed sector size during rollback */
       
   273 #ifdef SQLITE_TEST
       
   274   int nHit, nMiss, nOvfl;     /* Cache hits, missing, and LRU overflows */
       
   275   int nRead,nWrite;           /* Database pages read/written */
       
   276 #endif
       
   277   void (*xDestructor)(void*,int); /* Call this routine when freeing pages */
       
   278   void (*xReiniter)(void*,int);   /* Call this routine when reloading pages */
       
   279   void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
       
   280   void *pCodecArg;            /* First argument to xCodec() */
       
   281   int nHash;                  /* Size of the pager hash table */
       
   282   PgHdr **aHash;              /* Hash table to map page number to PgHdr */
       
   283 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
   284   Pager *pNext;               /* Linked list of pagers in this thread */
       
   285 #endif
       
   286 };
       
   287 
       
   288 /*
       
   289 ** If SQLITE_TEST is defined then increment the variable given in
       
   290 ** the argument
       
   291 */
       
   292 #ifdef SQLITE_TEST
       
   293 # define TEST_INCR(x)  x++
       
   294 #else
       
   295 # define TEST_INCR(x)
       
   296 #endif
       
   297 
       
   298 /*
       
   299 ** Journal files begin with the following magic string.  The data
       
   300 ** was obtained from /dev/random.  It is used only as a sanity check.
       
   301 **
       
   302 ** Since version 2.8.0, the journal format contains additional sanity
       
   303 ** checking information.  If the power fails while the journal is begin
       
   304 ** written, semi-random garbage data might appear in the journal
       
   305 ** file after power is restored.  If an attempt is then made
       
   306 ** to roll the journal back, the database could be corrupted.  The additional
       
   307 ** sanity checking data is an attempt to discover the garbage in the
       
   308 ** journal and ignore it.
       
   309 **
       
   310 ** The sanity checking information for the new journal format consists
       
   311 ** of a 32-bit checksum on each page of data.  The checksum covers both
       
   312 ** the page number and the pPager->pageSize bytes of data for the page.
       
   313 ** This cksum is initialized to a 32-bit random value that appears in the
       
   314 ** journal file right after the header.  The random initializer is important,
       
   315 ** because garbage data that appears at the end of a journal is likely
       
   316 ** data that was once in other files that have now been deleted.  If the
       
   317 ** garbage data came from an obsolete journal file, the checksums might
       
   318 ** be correct.  But by initializing the checksum to random value which
       
   319 ** is different for every journal, we minimize that risk.
       
   320 */
       
   321 static const unsigned char aJournalMagic[] = {
       
   322   0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
       
   323 };
       
   324 
       
   325 /*
       
   326 ** The size of the header and of each page in the journal is determined
       
   327 ** by the following macros.
       
   328 */
       
   329 #define JOURNAL_PG_SZ(pPager)  ((pPager->pageSize) + 8)
       
   330 
       
   331 /*
       
   332 ** The journal header size for this pager. In the future, this could be
       
   333 ** set to some value read from the disk controller. The important
       
   334 ** characteristic is that it is the same size as a disk sector.
       
   335 */
       
   336 #define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
       
   337 
       
   338 /*
       
   339 ** The macro MEMDB is true if we are dealing with an in-memory database.
       
   340 ** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
       
   341 ** the value of MEMDB will be a constant and the compiler will optimize
       
   342 ** out code that would never execute.
       
   343 */
       
   344 #ifdef SQLITE_OMIT_MEMORYDB
       
   345 # define MEMDB 0
       
   346 #else
       
   347 # define MEMDB pPager->memDb
       
   348 #endif
       
   349 
       
   350 /*
       
   351 ** The default size of a disk sector
       
   352 */
       
   353 #define PAGER_SECTOR_SIZE 512
       
   354 
       
   355 /*
       
   356 ** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is
       
   357 ** reserved for working around a windows/posix incompatibility). It is
       
   358 ** used in the journal to signify that the remainder of the journal file 
       
   359 ** is devoted to storing a master journal name - there are no more pages to
       
   360 ** roll back. See comments for function writeMasterJournal() for details.
       
   361 */
       
   362 /* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */
       
   363 #define PAGER_MJ_PGNO(x) ((PENDING_BYTE/((x)->pageSize))+1)
       
   364 
       
   365 /*
       
   366 ** The maximum legal page number is (2^31 - 1).
       
   367 */
       
   368 #define PAGER_MAX_PGNO 2147483647
       
   369 
       
   370 /*
       
   371 ** Enable reference count tracking (for debugging) here:
       
   372 */
       
   373 #ifdef SQLITE_TEST
       
   374   int pager3_refinfo_enable = 0;
       
   375   static void pager_refinfo(PgHdr *p){
       
   376     static int cnt = 0;
       
   377     if( !pager3_refinfo_enable ) return;
       
   378     sqlite3DebugPrintf(
       
   379        "REFCNT: %4d addr=%p nRef=%d\n",
       
   380        p->pgno, PGHDR_TO_DATA(p), p->nRef
       
   381     );
       
   382     cnt++;   /* Something to set a breakpoint on */
       
   383   }
       
   384 # define REFINFO(X)  pager_refinfo(X)
       
   385 #else
       
   386 # define REFINFO(X)
       
   387 #endif
       
   388 
       
   389 
       
   390 /*
       
   391 ** Change the size of the pager hash table to N.  N must be a power
       
   392 ** of two.
       
   393 */
       
   394 static void pager_resize_hash_table(Pager *pPager, int N){
       
   395   PgHdr **aHash, *pPg;
       
   396   assert( N>0 && (N&(N-1))==0 );
       
   397   aHash = sqliteMalloc( sizeof(aHash[0])*N );
       
   398   if( aHash==0 ){
       
   399     /* Failure to rehash is not an error.  It is only a performance hit. */
       
   400     return;
       
   401   }
       
   402   sqliteFree(pPager->aHash);
       
   403   pPager->nHash = N;
       
   404   pPager->aHash = aHash;
       
   405   for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
       
   406     int h;
       
   407     if( pPg->pgno==0 ){
       
   408       assert( pPg->pNextHash==0 && pPg->pPrevHash==0 );
       
   409       continue;
       
   410     }
       
   411     h = pPg->pgno & (N-1);
       
   412     pPg->pNextHash = aHash[h];
       
   413     if( aHash[h] ){
       
   414       aHash[h]->pPrevHash = pPg;
       
   415     }
       
   416     aHash[h] = pPg;
       
   417     pPg->pPrevHash = 0;
       
   418   }
       
   419 }
       
   420 
       
   421 /*
       
   422 ** Read a 32-bit integer from the given file descriptor.  Store the integer
       
   423 ** that is read in *pRes.  Return SQLITE_OK if everything worked, or an
       
   424 ** error code is something goes wrong.
       
   425 **
       
   426 ** All values are stored on disk as big-endian.
       
   427 */
       
   428 static int read32bits(OsFile *fd, u32 *pRes){
       
   429   unsigned char ac[4];
       
   430   int rc = sqlite3OsRead(fd, ac, sizeof(ac));
       
   431   if( rc==SQLITE_OK ){
       
   432     *pRes = (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
       
   433   }
       
   434   return rc;
       
   435 }
       
   436 
       
   437 /*
       
   438 ** Write a 32-bit integer into a string buffer in big-endian byte order.
       
   439 */
       
   440 static void put32bits(char *ac, u32 val){
       
   441   ac[0] = (val>>24) & 0xff;
       
   442   ac[1] = (val>>16) & 0xff;
       
   443   ac[2] = (val>>8) & 0xff;
       
   444   ac[3] = val & 0xff;
       
   445 }
       
   446 
       
   447 /*
       
   448 ** Write a 32-bit integer into the given file descriptor.  Return SQLITE_OK
       
   449 ** on success or an error code is something goes wrong.
       
   450 */
       
   451 static int write32bits(OsFile *fd, u32 val){
       
   452   char ac[4];
       
   453   put32bits(ac, val);
       
   454   return sqlite3OsWrite(fd, ac, 4);
       
   455 }
       
   456 
       
   457 /*
       
   458 ** Read a 32-bit integer at offset 'offset' from the page identified by
       
   459 ** page header 'p'.
       
   460 */
       
   461 static u32 retrieve32bits(PgHdr *p, int offset){
       
   462   unsigned char *ac;
       
   463   ac = &((unsigned char*)PGHDR_TO_DATA(p))[offset];
       
   464   return (ac[0]<<24) | (ac[1]<<16) | (ac[2]<<8) | ac[3];
       
   465 }
       
   466 
       
   467 
       
   468 /*
       
   469 ** This function should be called when an error occurs within the pager
       
   470 ** code. The first argument is a pointer to the pager structure, the
       
   471 ** second the error-code about to be returned by a pager API function. 
       
   472 ** The value returned is a copy of the second argument to this function. 
       
   473 **
       
   474 ** If the second argument is SQLITE_IOERR, SQLITE_CORRUPT or SQLITE_PROTOCOL,
       
   475 ** the error becomes persistent. All subsequent API calls on this Pager
       
   476 ** will immediately return the same error code.
       
   477 */
       
   478 static int pager_error(Pager *pPager, int rc){
       
   479   assert( pPager->errCode==SQLITE_FULL || pPager->errCode==SQLITE_OK );
       
   480   if( 
       
   481     rc==SQLITE_FULL ||
       
   482     rc==SQLITE_IOERR ||
       
   483     rc==SQLITE_CORRUPT ||
       
   484     rc==SQLITE_PROTOCOL
       
   485   ){
       
   486     pPager->errCode = rc;
       
   487   }
       
   488   return rc;
       
   489 }
       
   490 
       
   491 #ifdef SQLITE_CHECK_PAGES
       
   492 /*
       
   493 ** Return a 32-bit hash of the page data for pPage.
       
   494 */
       
   495 static u32 pager_pagehash(PgHdr *pPage){
       
   496   u32 hash = 0;
       
   497   int i;
       
   498   unsigned char *pData = (unsigned char *)PGHDR_TO_DATA(pPage);
       
   499   for(i=0; i<pPage->pPager->pageSize; i++){
       
   500     hash = (hash+i)^pData[i];
       
   501   }
       
   502   return hash;
       
   503 }
       
   504 
       
   505 /*
       
   506 ** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
       
   507 ** is defined, and NDEBUG is not defined, an assert() statement checks
       
   508 ** that the page is either dirty or still matches the calculated page-hash.
       
   509 */
       
   510 #define CHECK_PAGE(x) checkPage(x)
       
   511 static void checkPage(PgHdr *pPg){
       
   512   Pager *pPager = pPg->pPager;
       
   513   assert( !pPg->pageHash || pPager->errCode || MEMDB || pPg->dirty || 
       
   514       pPg->pageHash==pager_pagehash(pPg) );
       
   515 }
       
   516 
       
   517 #else
       
   518 #define CHECK_PAGE(x)
       
   519 #endif
       
   520 
       
   521 /*
       
   522 ** When this is called the journal file for pager pPager must be open.
       
   523 ** The master journal file name is read from the end of the file and 
       
   524 ** written into memory obtained from sqliteMalloc(). *pzMaster is
       
   525 ** set to point at the memory and SQLITE_OK returned. The caller must
       
   526 ** sqliteFree() *pzMaster.
       
   527 **
       
   528 ** If no master journal file name is present *pzMaster is set to 0 and
       
   529 ** SQLITE_OK returned.
       
   530 */
       
   531 static int readMasterJournal(OsFile *pJrnl, char **pzMaster){
       
   532   int rc;
       
   533   u32 len;
       
   534   i64 szJ;
       
   535   u32 cksum;
       
   536   int i;
       
   537   unsigned char aMagic[8]; /* A buffer to hold the magic header */
       
   538 
       
   539   *pzMaster = 0;
       
   540 
       
   541   rc = sqlite3OsFileSize(pJrnl, &szJ);
       
   542   if( rc!=SQLITE_OK || szJ<16 ) return rc;
       
   543 
       
   544   rc = sqlite3OsSeek(pJrnl, szJ-16);
       
   545   if( rc!=SQLITE_OK ) return rc;
       
   546  
       
   547   rc = read32bits(pJrnl, &len);
       
   548   if( rc!=SQLITE_OK ) return rc;
       
   549 
       
   550   rc = read32bits(pJrnl, &cksum);
       
   551   if( rc!=SQLITE_OK ) return rc;
       
   552 
       
   553   rc = sqlite3OsRead(pJrnl, aMagic, 8);
       
   554   if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc;
       
   555 
       
   556   rc = sqlite3OsSeek(pJrnl, szJ-16-len);
       
   557   if( rc!=SQLITE_OK ) return rc;
       
   558 
       
   559   *pzMaster = (char *)sqliteMalloc(len+1);
       
   560   if( !*pzMaster ){
       
   561     return SQLITE_NOMEM;
       
   562   }
       
   563   rc = sqlite3OsRead(pJrnl, *pzMaster, len);
       
   564   if( rc!=SQLITE_OK ){
       
   565     sqliteFree(*pzMaster);
       
   566     *pzMaster = 0;
       
   567     return rc;
       
   568   }
       
   569 
       
   570   /* See if the checksum matches the master journal name */
       
   571   for(i=0; i<len; i++){
       
   572     cksum -= (*pzMaster)[i];
       
   573   }
       
   574   if( cksum ){
       
   575     /* If the checksum doesn't add up, then one or more of the disk sectors
       
   576     ** containing the master journal filename is corrupted. This means
       
   577     ** definitely roll back, so just return SQLITE_OK and report a (nul)
       
   578     ** master-journal filename.
       
   579     */
       
   580     sqliteFree(*pzMaster);
       
   581     *pzMaster = 0;
       
   582   }else{
       
   583     (*pzMaster)[len] = '\0';
       
   584   }
       
   585    
       
   586   return SQLITE_OK;
       
   587 }
       
   588 
       
   589 /*
       
   590 ** Seek the journal file descriptor to the next sector boundary where a
       
   591 ** journal header may be read or written. Pager.journalOff is updated with
       
   592 ** the new seek offset.
       
   593 **
       
   594 ** i.e for a sector size of 512:
       
   595 **
       
   596 ** Input Offset              Output Offset
       
   597 ** ---------------------------------------
       
   598 ** 0                         0
       
   599 ** 512                       512
       
   600 ** 100                       512
       
   601 ** 2000                      2048
       
   602 ** 
       
   603 */
       
   604 static int seekJournalHdr(Pager *pPager){
       
   605   i64 offset = 0;
       
   606   i64 c = pPager->journalOff;
       
   607   if( c ){
       
   608     offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
       
   609   }
       
   610   assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
       
   611   assert( offset>=c );
       
   612   assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
       
   613   pPager->journalOff = offset;
       
   614   return sqlite3OsSeek(pPager->jfd, pPager->journalOff);
       
   615 }
       
   616 
       
   617 /*
       
   618 ** The journal file must be open when this routine is called. A journal
       
   619 ** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
       
   620 ** current location.
       
   621 **
       
   622 ** The format for the journal header is as follows:
       
   623 ** - 8 bytes: Magic identifying journal format.
       
   624 ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
       
   625 ** - 4 bytes: Random number used for page hash.
       
   626 ** - 4 bytes: Initial database page count.
       
   627 ** - 4 bytes: Sector size used by the process that wrote this journal.
       
   628 ** 
       
   629 ** Followed by (JOURNAL_HDR_SZ - 24) bytes of unused space.
       
   630 */
       
   631 static int writeJournalHdr(Pager *pPager){
       
   632   char zHeader[sizeof(aJournalMagic)+16];
       
   633 
       
   634   int rc = seekJournalHdr(pPager);
       
   635   if( rc ) return rc;
       
   636 
       
   637   pPager->journalHdr = pPager->journalOff;
       
   638   if( pPager->stmtHdrOff==0 ){
       
   639     pPager->stmtHdrOff = pPager->journalHdr;
       
   640   }
       
   641   pPager->journalOff += JOURNAL_HDR_SZ(pPager);
       
   642 
       
   643   /* FIX ME: 
       
   644   **
       
   645   ** Possibly for a pager not in no-sync mode, the journal magic should not
       
   646   ** be written until nRec is filled in as part of next syncJournal(). 
       
   647   **
       
   648   ** Actually maybe the whole journal header should be delayed until that
       
   649   ** point. Think about this.
       
   650   */
       
   651   memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
       
   652   /* The nRec Field. 0xFFFFFFFF for no-sync journals. */
       
   653   put32bits(&zHeader[sizeof(aJournalMagic)], pPager->noSync ? 0xffffffff : 0);
       
   654   /* The random check-hash initialiser */ 
       
   655   sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
       
   656   put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
       
   657   /* The initial database size */
       
   658   put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbSize);
       
   659   /* The assumed sector size for this process */
       
   660   put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
       
   661   rc = sqlite3OsWrite(pPager->jfd, zHeader, sizeof(zHeader));
       
   662 
       
   663   /* The journal header has been written successfully. Seek the journal
       
   664   ** file descriptor to the end of the journal header sector.
       
   665   */
       
   666   if( rc==SQLITE_OK ){
       
   667     rc = sqlite3OsSeek(pPager->jfd, pPager->journalOff-1);
       
   668     if( rc==SQLITE_OK ){
       
   669       rc = sqlite3OsWrite(pPager->jfd, "\000", 1);
       
   670     }
       
   671   }
       
   672   return rc;
       
   673 }
       
   674 
       
   675 /*
       
   676 ** The journal file must be open when this is called. A journal header file
       
   677 ** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
       
   678 ** file. See comments above function writeJournalHdr() for a description of
       
   679 ** the journal header format.
       
   680 **
       
   681 ** If the header is read successfully, *nRec is set to the number of
       
   682 ** page records following this header and *dbSize is set to the size of the
       
   683 ** database before the transaction began, in pages. Also, pPager->cksumInit
       
   684 ** is set to the value read from the journal header. SQLITE_OK is returned
       
   685 ** in this case.
       
   686 **
       
   687 ** If the journal header file appears to be corrupted, SQLITE_DONE is
       
   688 ** returned and *nRec and *dbSize are not set.  If JOURNAL_HDR_SZ bytes
       
   689 ** cannot be read from the journal file an error code is returned.
       
   690 */
       
   691 static int readJournalHdr(
       
   692   Pager *pPager, 
       
   693   i64 journalSize,
       
   694   u32 *pNRec, 
       
   695   u32 *pDbSize
       
   696 ){
       
   697   int rc;
       
   698   unsigned char aMagic[8]; /* A buffer to hold the magic header */
       
   699 
       
   700   rc = seekJournalHdr(pPager);
       
   701   if( rc ) return rc;
       
   702 
       
   703   if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
       
   704     return SQLITE_DONE;
       
   705   }
       
   706 
       
   707   rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic));
       
   708   if( rc ) return rc;
       
   709 
       
   710   if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
       
   711     return SQLITE_DONE;
       
   712   }
       
   713 
       
   714   rc = read32bits(pPager->jfd, pNRec);
       
   715   if( rc ) return rc;
       
   716 
       
   717   rc = read32bits(pPager->jfd, &pPager->cksumInit);
       
   718   if( rc ) return rc;
       
   719 
       
   720   rc = read32bits(pPager->jfd, pDbSize);
       
   721   if( rc ) return rc;
       
   722 
       
   723   /* Update the assumed sector-size to match the value used by 
       
   724   ** the process that created this journal. If this journal was
       
   725   ** created by a process other than this one, then this routine
       
   726   ** is being called from within pager_playback(). The local value
       
   727   ** of Pager.sectorSize is restored at the end of that routine.
       
   728   */
       
   729   rc = read32bits(pPager->jfd, (u32 *)&pPager->sectorSize);
       
   730   if( rc ) return rc;
       
   731 
       
   732   pPager->journalOff += JOURNAL_HDR_SZ(pPager);
       
   733   rc = sqlite3OsSeek(pPager->jfd, pPager->journalOff);
       
   734   return rc;
       
   735 }
       
   736 
       
   737 
       
   738 /*
       
   739 ** Write the supplied master journal name into the journal file for pager
       
   740 ** pPager at the current location. The master journal name must be the last
       
   741 ** thing written to a journal file. If the pager is in full-sync mode, the
       
   742 ** journal file descriptor is advanced to the next sector boundary before
       
   743 ** anything is written. The format is:
       
   744 **
       
   745 ** + 4 bytes: PAGER_MJ_PGNO.
       
   746 ** + N bytes: length of master journal name.
       
   747 ** + 4 bytes: N
       
   748 ** + 4 bytes: Master journal name checksum.
       
   749 ** + 8 bytes: aJournalMagic[].
       
   750 **
       
   751 ** The master journal page checksum is the sum of the bytes in the master
       
   752 ** journal name.
       
   753 **
       
   754 ** If zMaster is a NULL pointer (occurs for a single database transaction), 
       
   755 ** this call is a no-op.
       
   756 */
       
   757 static int writeMasterJournal(Pager *pPager, const char *zMaster){
       
   758   int rc;
       
   759   int len; 
       
   760   int i; 
       
   761   u32 cksum = 0;
       
   762   char zBuf[sizeof(aJournalMagic)+2*4];
       
   763 
       
   764   if( !zMaster || pPager->setMaster) return SQLITE_OK;
       
   765   pPager->setMaster = 1;
       
   766 
       
   767   len = strlen(zMaster);
       
   768   for(i=0; i<len; i++){
       
   769     cksum += zMaster[i];
       
   770   }
       
   771 
       
   772   /* If in full-sync mode, advance to the next disk sector before writing
       
   773   ** the master journal name. This is in case the previous page written to
       
   774   ** the journal has already been synced.
       
   775   */
       
   776   if( pPager->fullSync ){
       
   777     rc = seekJournalHdr(pPager);
       
   778     if( rc!=SQLITE_OK ) return rc;
       
   779   }
       
   780   pPager->journalOff += (len+20);
       
   781 
       
   782   rc = write32bits(pPager->jfd, PAGER_MJ_PGNO(pPager));
       
   783   if( rc!=SQLITE_OK ) return rc;
       
   784 
       
   785   rc = sqlite3OsWrite(pPager->jfd, zMaster, len);
       
   786   if( rc!=SQLITE_OK ) return rc;
       
   787 
       
   788   put32bits(zBuf, len);
       
   789   put32bits(&zBuf[4], cksum);
       
   790   memcpy(&zBuf[8], aJournalMagic, sizeof(aJournalMagic));
       
   791   rc = sqlite3OsWrite(pPager->jfd, zBuf, 8+sizeof(aJournalMagic));
       
   792   pPager->needSync = !pPager->noSync;
       
   793   return rc;
       
   794 }
       
   795 
       
   796 /*
       
   797 ** Add or remove a page from the list of all pages that are in the
       
   798 ** statement journal.
       
   799 **
       
   800 ** The Pager keeps a separate list of pages that are currently in
       
   801 ** the statement journal.  This helps the sqlite3pager_stmt_commit()
       
   802 ** routine run MUCH faster for the common case where there are many
       
   803 ** pages in memory but only a few are in the statement journal.
       
   804 */
       
   805 static void page_add_to_stmt_list(PgHdr *pPg){
       
   806   Pager *pPager = pPg->pPager;
       
   807   if( pPg->inStmt ) return;
       
   808   assert( pPg->pPrevStmt==0 && pPg->pNextStmt==0 );
       
   809   pPg->pPrevStmt = 0;
       
   810   if( pPager->pStmt ){
       
   811     pPager->pStmt->pPrevStmt = pPg;
       
   812   }
       
   813   pPg->pNextStmt = pPager->pStmt;
       
   814   pPager->pStmt = pPg;
       
   815   pPg->inStmt = 1;
       
   816 }
       
   817 static void page_remove_from_stmt_list(PgHdr *pPg){
       
   818   if( !pPg->inStmt ) return;
       
   819   if( pPg->pPrevStmt ){
       
   820     assert( pPg->pPrevStmt->pNextStmt==pPg );
       
   821     pPg->pPrevStmt->pNextStmt = pPg->pNextStmt;
       
   822   }else{
       
   823     assert( pPg->pPager->pStmt==pPg );
       
   824     pPg->pPager->pStmt = pPg->pNextStmt;
       
   825   }
       
   826   if( pPg->pNextStmt ){
       
   827     assert( pPg->pNextStmt->pPrevStmt==pPg );
       
   828     pPg->pNextStmt->pPrevStmt = pPg->pPrevStmt;
       
   829   }
       
   830   pPg->pNextStmt = 0;
       
   831   pPg->pPrevStmt = 0;
       
   832   pPg->inStmt = 0;
       
   833 }
       
   834 
       
   835 /*
       
   836 ** Find a page in the hash table given its page number.  Return
       
   837 ** a pointer to the page or NULL if not found.
       
   838 */
       
   839 static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
       
   840   PgHdr *p;
       
   841   if( pPager->aHash==0 ) return 0;
       
   842   p = pPager->aHash[pgno & (pPager->nHash-1)];
       
   843   while( p && p->pgno!=pgno ){
       
   844     p = p->pNextHash;
       
   845   }
       
   846   return p;
       
   847 }
       
   848 
       
   849 /*
       
   850 ** Unlock the database and clear the in-memory cache.  This routine
       
   851 ** sets the state of the pager back to what it was when it was first
       
   852 ** opened.  Any outstanding pages are invalidated and subsequent attempts
       
   853 ** to access those pages will likely result in a coredump.
       
   854 */
       
   855 static void pager_reset(Pager *pPager){
       
   856   PgHdr *pPg, *pNext;
       
   857   if( pPager->errCode ) return;
       
   858   for(pPg=pPager->pAll; pPg; pPg=pNext){
       
   859     pNext = pPg->pNextAll;
       
   860     sqliteFree(pPg);
       
   861   }
       
   862   pPager->pFirst = 0;
       
   863   pPager->pFirstSynced = 0;
       
   864   pPager->pLast = 0;
       
   865   pPager->pAll = 0;
       
   866   pPager->nHash = 0;
       
   867   sqliteFree(pPager->aHash);
       
   868   pPager->nPage = 0;
       
   869   pPager->aHash = 0;
       
   870   if( pPager->state>=PAGER_RESERVED ){
       
   871     sqlite3pager_rollback(pPager);
       
   872   }
       
   873   sqlite3OsUnlock(pPager->fd, NO_LOCK);
       
   874   pPager->state = PAGER_UNLOCK;
       
   875   pPager->dbSize = -1;
       
   876   pPager->nRef = 0;
       
   877   assert( pPager->journalOpen==0 );
       
   878 }
       
   879 
       
   880 /*
       
   881 ** When this routine is called, the pager has the journal file open and
       
   882 ** a RESERVED or EXCLUSIVE lock on the database.  This routine releases
       
   883 ** the database lock and acquires a SHARED lock in its place.  The journal
       
   884 ** file is deleted and closed.
       
   885 **
       
   886 ** TODO: Consider keeping the journal file open for temporary databases.
       
   887 ** This might give a performance improvement on windows where opening
       
   888 ** a file is an expensive operation.
       
   889 */
       
   890 static int pager_unwritelock(Pager *pPager){
       
   891   PgHdr *pPg;
       
   892   int rc;
       
   893   assert( !MEMDB );
       
   894   if( pPager->state<PAGER_RESERVED ){
       
   895     return SQLITE_OK;
       
   896   }
       
   897   sqlite3pager_stmt_commit(pPager);
       
   898   if( pPager->stmtOpen ){
       
   899     sqlite3OsClose(&pPager->stfd);
       
   900     pPager->stmtOpen = 0;
       
   901   }
       
   902   if( pPager->journalOpen ){
       
   903     sqlite3OsClose(&pPager->jfd);
       
   904     pPager->journalOpen = 0;
       
   905     sqlite3OsDelete(pPager->zJournal);
       
   906     sqliteFree( pPager->aInJournal );
       
   907     pPager->aInJournal = 0;
       
   908     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
       
   909       pPg->inJournal = 0;
       
   910       pPg->dirty = 0;
       
   911       pPg->needSync = 0;
       
   912 #ifdef SQLITE_CHECK_PAGES
       
   913       pPg->pageHash = pager_pagehash(pPg);
       
   914 #endif
       
   915     }
       
   916     pPager->pDirty = 0;
       
   917     pPager->dirtyCache = 0;
       
   918     pPager->nRec = 0;
       
   919   }else{
       
   920     assert( pPager->aInJournal==0 );
       
   921     assert( pPager->dirtyCache==0 || pPager->useJournal==0 );
       
   922   }
       
   923   rc = sqlite3OsUnlock(pPager->fd, SHARED_LOCK);
       
   924   pPager->state = PAGER_SHARED;
       
   925   pPager->origDbSize = 0;
       
   926   pPager->setMaster = 0;
       
   927   pPager->needSync = 0;
       
   928   pPager->pFirstSynced = pPager->pFirst;
       
   929   return rc;
       
   930 }
       
   931 
       
   932 /*
       
   933 ** Compute and return a checksum for the page of data.
       
   934 **
       
   935 ** This is not a real checksum.  It is really just the sum of the 
       
   936 ** random initial value and the page number.  We experimented with
       
   937 ** a checksum of the entire data, but that was found to be too slow.
       
   938 **
       
   939 ** Note that the page number is stored at the beginning of data and
       
   940 ** the checksum is stored at the end.  This is important.  If journal
       
   941 ** corruption occurs due to a power failure, the most likely scenario
       
   942 ** is that one end or the other of the record will be changed.  It is
       
   943 ** much less likely that the two ends of the journal record will be
       
   944 ** correct and the middle be corrupt.  Thus, this "checksum" scheme,
       
   945 ** though fast and simple, catches the mostly likely kind of corruption.
       
   946 **
       
   947 ** FIX ME:  Consider adding every 200th (or so) byte of the data to the
       
   948 ** checksum.  That way if a single page spans 3 or more disk sectors and
       
   949 ** only the middle sector is corrupt, we will still have a reasonable
       
   950 ** chance of failing the checksum and thus detecting the problem.
       
   951 */
       
   952 static u32 pager_cksum(Pager *pPager, const u8 *aData){
       
   953   u32 cksum = pPager->cksumInit;
       
   954   int i = pPager->pageSize-200;
       
   955   while( i>0 ){
       
   956     cksum += aData[i];
       
   957     i -= 200;
       
   958   }
       
   959   return cksum;
       
   960 }
       
   961 
       
   962 /* Forward declaration */
       
   963 static void makeClean(PgHdr*);
       
   964 
       
   965 /*
       
   966 ** Read a single page from the journal file opened on file descriptor
       
   967 ** jfd.  Playback this one page.
       
   968 **
       
   969 ** If useCksum==0 it means this journal does not use checksums.  Checksums
       
   970 ** are not used in statement journals because statement journals do not
       
   971 ** need to survive power failures.
       
   972 */
       
   973 static int pager_playback_one_page(Pager *pPager, OsFile *jfd, int useCksum){
       
   974   int rc;
       
   975   PgHdr *pPg;                   /* An existing page in the cache */
       
   976   Pgno pgno;                    /* The page number of a page in journal */
       
   977   u32 cksum;                    /* Checksum used for sanity checking */
       
   978   u8 aData[SQLITE_MAX_PAGE_SIZE];  /* Temp storage for a page */
       
   979 
       
   980   /* useCksum should be true for the main journal and false for
       
   981   ** statement journals.  Verify that this is always the case
       
   982   */
       
   983   assert( jfd == (useCksum ? pPager->jfd : pPager->stfd) );
       
   984 
       
   985 
       
   986   rc = read32bits(jfd, &pgno);
       
   987   if( rc!=SQLITE_OK ) return rc;
       
   988   rc = sqlite3OsRead(jfd, &aData, pPager->pageSize);
       
   989   if( rc!=SQLITE_OK ) return rc;
       
   990   pPager->journalOff += pPager->pageSize + 4;
       
   991 
       
   992   /* Sanity checking on the page.  This is more important that I originally
       
   993   ** thought.  If a power failure occurs while the journal is being written,
       
   994   ** it could cause invalid data to be written into the journal.  We need to
       
   995   ** detect this invalid data (with high probability) and ignore it.
       
   996   */
       
   997   if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
       
   998     return SQLITE_DONE;
       
   999   }
       
  1000   if( pgno>(unsigned)pPager->dbSize ){
       
  1001     return SQLITE_OK;
       
  1002   }
       
  1003   if( useCksum ){
       
  1004     rc = read32bits(jfd, &cksum);
       
  1005     if( rc ) return rc;
       
  1006     pPager->journalOff += 4;
       
  1007     if( pager_cksum(pPager, aData)!=cksum ){
       
  1008       return SQLITE_DONE;
       
  1009     }
       
  1010   }
       
  1011 
       
  1012   assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE );
       
  1013 
       
  1014   /* If the pager is in RESERVED state, then there must be a copy of this
       
  1015   ** page in the pager cache. In this case just update the pager cache,
       
  1016   ** not the database file. The page is left marked dirty in this case.
       
  1017   **
       
  1018   ** If in EXCLUSIVE state, then we update the pager cache if it exists
       
  1019   ** and the main file. The page is then marked not dirty.
       
  1020   **
       
  1021   ** Ticket #1171:  The statement journal might contain page content that is
       
  1022   ** different from the page content at the start of the transaction.
       
  1023   ** This occurs when a page is changed prior to the start of a statement
       
  1024   ** then changed again within the statement.  When rolling back such a
       
  1025   ** statement we must not write to the original database unless we know
       
  1026   ** for certain that original page contents are in the main rollback
       
  1027   ** journal.  Otherwise, if a full ROLLBACK occurs after the statement
       
  1028   ** rollback the full ROLLBACK will not restore the page to its original
       
  1029   ** content.  Two conditions must be met before writing to the database
       
  1030   ** files. (1) the database must be locked.  (2) we know that the original
       
  1031   ** page content is in the main journal either because the page is not in
       
  1032   ** cache or else it is marked as needSync==0.
       
  1033   */
       
  1034   pPg = pager_lookup(pPager, pgno);
       
  1035   assert( pPager->state>=PAGER_EXCLUSIVE || pPg!=0 );
       
  1036   TRACE3("PLAYBACK %d page %d\n", PAGERID(pPager), pgno);
       
  1037   if( pPager->state>=PAGER_EXCLUSIVE && (pPg==0 || pPg->needSync==0) ){
       
  1038     rc = sqlite3OsSeek(pPager->fd, (pgno-1)*(i64)pPager->pageSize);
       
  1039     if( rc==SQLITE_OK ){
       
  1040       rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize);
       
  1041     }
       
  1042     if( pPg ){
       
  1043       makeClean(pPg);
       
  1044     }
       
  1045   }
       
  1046   if( pPg ){
       
  1047     /* No page should ever be explicitly rolled back that is in use, except
       
  1048     ** for page 1 which is held in use in order to keep the lock on the
       
  1049     ** database active. However such a page may be rolled back as a result
       
  1050     ** of an internal error resulting in an automatic call to
       
  1051     ** sqlite3pager_rollback().
       
  1052     */
       
  1053     void *pData;
       
  1054     /* assert( pPg->nRef==0 || pPg->pgno==1 ); */
       
  1055     pData = PGHDR_TO_DATA(pPg);
       
  1056     memcpy(pData, aData, pPager->pageSize);
       
  1057     if( pPager->xDestructor ){  /*** FIX ME:  Should this be xReinit? ***/
       
  1058       pPager->xDestructor(pData, pPager->pageSize);
       
  1059     }
       
  1060 #ifdef SQLITE_CHECK_PAGES
       
  1061     pPg->pageHash = pager_pagehash(pPg);
       
  1062 #endif
       
  1063     CODEC1(pPager, pData, pPg->pgno, 3);
       
  1064   }
       
  1065   return rc;
       
  1066 }
       
  1067 
       
  1068 /*
       
  1069 ** Parameter zMaster is the name of a master journal file. A single journal
       
  1070 ** file that referred to the master journal file has just been rolled back.
       
  1071 ** This routine checks if it is possible to delete the master journal file,
       
  1072 ** and does so if it is.
       
  1073 **
       
  1074 ** The master journal file contains the names of all child journals.
       
  1075 ** To tell if a master journal can be deleted, check to each of the
       
  1076 ** children.  If all children are either missing or do not refer to
       
  1077 ** a different master journal, then this master journal can be deleted.
       
  1078 */
       
  1079 static int pager_delmaster(const char *zMaster){
       
  1080   int rc;
       
  1081   int master_open = 0;
       
  1082   OsFile *master = 0;
       
  1083   char *zMasterJournal = 0; /* Contents of master journal file */
       
  1084   i64 nMasterJournal;       /* Size of master journal file */
       
  1085 
       
  1086   /* Open the master journal file exclusively in case some other process
       
  1087   ** is running this routine also. Not that it makes too much difference.
       
  1088   */
       
  1089   rc = sqlite3OsOpenReadOnly(zMaster, &master);
       
  1090   if( rc!=SQLITE_OK ) goto delmaster_out;
       
  1091   master_open = 1;
       
  1092   rc = sqlite3OsFileSize(master, &nMasterJournal);
       
  1093   if( rc!=SQLITE_OK ) goto delmaster_out;
       
  1094 
       
  1095   if( nMasterJournal>0 ){
       
  1096     char *zJournal;
       
  1097     char *zMasterPtr = 0;
       
  1098 
       
  1099     /* Load the entire master journal file into space obtained from
       
  1100     ** sqliteMalloc() and pointed to by zMasterJournal. 
       
  1101     */
       
  1102     zMasterJournal = (char *)sqliteMalloc(nMasterJournal);
       
  1103     if( !zMasterJournal ){
       
  1104       rc = SQLITE_NOMEM;
       
  1105       goto delmaster_out;
       
  1106     }
       
  1107     rc = sqlite3OsRead(master, zMasterJournal, nMasterJournal);
       
  1108     if( rc!=SQLITE_OK ) goto delmaster_out;
       
  1109 
       
  1110     zJournal = zMasterJournal;
       
  1111     while( (zJournal-zMasterJournal)<nMasterJournal ){
       
  1112       if( sqlite3OsFileExists(zJournal) ){
       
  1113         /* One of the journals pointed to by the master journal exists.
       
  1114         ** Open it and check if it points at the master journal. If
       
  1115         ** so, return without deleting the master journal file.
       
  1116         */
       
  1117         OsFile *journal = 0;
       
  1118         int c;
       
  1119 
       
  1120         rc = sqlite3OsOpenReadOnly(zJournal, &journal);
       
  1121         if( rc!=SQLITE_OK ){
       
  1122           goto delmaster_out;
       
  1123         }
       
  1124 
       
  1125         rc = readMasterJournal(journal, &zMasterPtr);
       
  1126         sqlite3OsClose(&journal);
       
  1127         if( rc!=SQLITE_OK ){
       
  1128           goto delmaster_out;
       
  1129         }
       
  1130 
       
  1131         c = zMasterPtr!=0 && strcmp(zMasterPtr, zMaster)==0;
       
  1132         sqliteFree(zMasterPtr);
       
  1133         if( c ){
       
  1134           /* We have a match. Do not delete the master journal file. */
       
  1135           goto delmaster_out;
       
  1136         }
       
  1137       }
       
  1138       zJournal += (strlen(zJournal)+1);
       
  1139     }
       
  1140   }
       
  1141   
       
  1142   sqlite3OsDelete(zMaster);
       
  1143 
       
  1144 delmaster_out:
       
  1145   if( zMasterJournal ){
       
  1146     sqliteFree(zMasterJournal);
       
  1147   }  
       
  1148   if( master_open ){
       
  1149     sqlite3OsClose(&master);
       
  1150   }
       
  1151   return rc;
       
  1152 }
       
  1153 
       
  1154 /*
       
  1155 ** Make every page in the cache agree with what is on disk.  In other words,
       
  1156 ** reread the disk to reset the state of the cache.
       
  1157 **
       
  1158 ** This routine is called after a rollback in which some of the dirty cache
       
  1159 ** pages had never been written out to disk.  We need to roll back the
       
  1160 ** cache content and the easiest way to do that is to reread the old content
       
  1161 ** back from the disk.
       
  1162 */
       
  1163 static int pager_reload_cache(Pager *pPager){
       
  1164   PgHdr *pPg;
       
  1165   int rc = SQLITE_OK;
       
  1166   for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
       
  1167     char zBuf[SQLITE_MAX_PAGE_SIZE];
       
  1168     if( !pPg->dirty ) continue;
       
  1169     if( (int)pPg->pgno <= pPager->origDbSize ){
       
  1170       rc = sqlite3OsSeek(pPager->fd, pPager->pageSize*(i64)(pPg->pgno-1));
       
  1171       if( rc==SQLITE_OK ){
       
  1172         rc = sqlite3OsRead(pPager->fd, zBuf, pPager->pageSize);
       
  1173       }
       
  1174       TRACE3("REFETCH %d page %d\n", PAGERID(pPager), pPg->pgno);
       
  1175       if( rc ) break;
       
  1176       CODEC1(pPager, zBuf, pPg->pgno, 2);
       
  1177     }else{
       
  1178       memset(zBuf, 0, pPager->pageSize);
       
  1179     }
       
  1180     if( pPg->nRef==0 || memcmp(zBuf, PGHDR_TO_DATA(pPg), pPager->pageSize) ){
       
  1181       memcpy(PGHDR_TO_DATA(pPg), zBuf, pPager->pageSize);
       
  1182       if( pPager->xReiniter ){
       
  1183         pPager->xReiniter(PGHDR_TO_DATA(pPg), pPager->pageSize);
       
  1184       }else{
       
  1185         memset(PGHDR_TO_EXTRA(pPg, pPager), 0, pPager->nExtra);
       
  1186       }
       
  1187     }
       
  1188     pPg->needSync = 0;
       
  1189     pPg->dirty = 0;
       
  1190 #ifdef SQLITE_CHECK_PAGES
       
  1191     pPg->pageHash = pager_pagehash(pPg);
       
  1192 #endif
       
  1193   }
       
  1194   pPager->pDirty = 0;
       
  1195   return rc;
       
  1196 }
       
  1197 
       
  1198 /*
       
  1199 ** Truncate the main file of the given pager to the number of pages
       
  1200 ** indicated.
       
  1201 */
       
  1202 static int pager_truncate(Pager *pPager, int nPage){
       
  1203   assert( pPager->state>=PAGER_EXCLUSIVE );
       
  1204   return sqlite3OsTruncate(pPager->fd, pPager->pageSize*(i64)nPage);
       
  1205 }
       
  1206 
       
  1207 /*
       
  1208 ** Playback the journal and thus restore the database file to
       
  1209 ** the state it was in before we started making changes.  
       
  1210 **
       
  1211 ** The journal file format is as follows: 
       
  1212 **
       
  1213 **  (1)  8 byte prefix.  A copy of aJournalMagic[].
       
  1214 **  (2)  4 byte big-endian integer which is the number of valid page records
       
  1215 **       in the journal.  If this value is 0xffffffff, then compute the
       
  1216 **       number of page records from the journal size.
       
  1217 **  (3)  4 byte big-endian integer which is the initial value for the 
       
  1218 **       sanity checksum.
       
  1219 **  (4)  4 byte integer which is the number of pages to truncate the
       
  1220 **       database to during a rollback.
       
  1221 **  (5)  4 byte integer which is the number of bytes in the master journal
       
  1222 **       name.  The value may be zero (indicate that there is no master
       
  1223 **       journal.)
       
  1224 **  (6)  N bytes of the master journal name.  The name will be nul-terminated
       
  1225 **       and might be shorter than the value read from (5).  If the first byte
       
  1226 **       of the name is \000 then there is no master journal.  The master
       
  1227 **       journal name is stored in UTF-8.
       
  1228 **  (7)  Zero or more pages instances, each as follows:
       
  1229 **        +  4 byte page number.
       
  1230 **        +  pPager->pageSize bytes of data.
       
  1231 **        +  4 byte checksum
       
  1232 **
       
  1233 ** When we speak of the journal header, we mean the first 6 items above.
       
  1234 ** Each entry in the journal is an instance of the 7th item.
       
  1235 **
       
  1236 ** Call the value from the second bullet "nRec".  nRec is the number of
       
  1237 ** valid page entries in the journal.  In most cases, you can compute the
       
  1238 ** value of nRec from the size of the journal file.  But if a power
       
  1239 ** failure occurred while the journal was being written, it could be the
       
  1240 ** case that the size of the journal file had already been increased but
       
  1241 ** the extra entries had not yet made it safely to disk.  In such a case,
       
  1242 ** the value of nRec computed from the file size would be too large.  For
       
  1243 ** that reason, we always use the nRec value in the header.
       
  1244 **
       
  1245 ** If the nRec value is 0xffffffff it means that nRec should be computed
       
  1246 ** from the file size.  This value is used when the user selects the
       
  1247 ** no-sync option for the journal.  A power failure could lead to corruption
       
  1248 ** in this case.  But for things like temporary table (which will be
       
  1249 ** deleted when the power is restored) we don't care.  
       
  1250 **
       
  1251 ** If the file opened as the journal file is not a well-formed
       
  1252 ** journal file then all pages up to the first corrupted page are rolled
       
  1253 ** back (or no pages if the journal header is corrupted). The journal file
       
  1254 ** is then deleted and SQLITE_OK returned, just as if no corruption had
       
  1255 ** been encountered.
       
  1256 **
       
  1257 ** If an I/O or malloc() error occurs, the journal-file is not deleted
       
  1258 ** and an error code is returned.
       
  1259 */
       
  1260 static int pager_playback(Pager *pPager){
       
  1261   i64 szJ;                 /* Size of the journal file in bytes */
       
  1262   u32 nRec;                /* Number of Records in the journal */
       
  1263   int i;                   /* Loop counter */
       
  1264   Pgno mxPg = 0;           /* Size of the original file in pages */
       
  1265   int rc;                  /* Result code of a subroutine */
       
  1266   char *zMaster = 0;       /* Name of master journal file if any */
       
  1267 
       
  1268   /* Figure out how many records are in the journal.  Abort early if
       
  1269   ** the journal is empty.
       
  1270   */
       
  1271   assert( pPager->journalOpen );
       
  1272   rc = sqlite3OsFileSize(pPager->jfd, &szJ);
       
  1273   if( rc!=SQLITE_OK ){
       
  1274     goto end_playback;
       
  1275   }
       
  1276 
       
  1277   /* Read the master journal name from the journal, if it is present.
       
  1278   ** If a master journal file name is specified, but the file is not
       
  1279   ** present on disk, then the journal is not hot and does not need to be
       
  1280   ** played back.
       
  1281   */
       
  1282   rc = readMasterJournal(pPager->jfd, &zMaster);
       
  1283   assert( rc!=SQLITE_DONE );
       
  1284   if( rc!=SQLITE_OK || (zMaster && !sqlite3OsFileExists(zMaster)) ){
       
  1285     sqliteFree(zMaster);
       
  1286     zMaster = 0;
       
  1287     if( rc==SQLITE_DONE ) rc = SQLITE_OK;
       
  1288     goto end_playback;
       
  1289   }
       
  1290   sqlite3OsSeek(pPager->jfd, 0);
       
  1291   pPager->journalOff = 0;
       
  1292 
       
  1293   /* This loop terminates either when the readJournalHdr() call returns
       
  1294   ** SQLITE_DONE or an IO error occurs. */
       
  1295   while( 1 ){
       
  1296 
       
  1297     /* Read the next journal header from the journal file.  If there are
       
  1298     ** not enough bytes left in the journal file for a complete header, or
       
  1299     ** it is corrupted, then a process must of failed while writing it.
       
  1300     ** This indicates nothing more needs to be rolled back.
       
  1301     */
       
  1302     rc = readJournalHdr(pPager, szJ, &nRec, &mxPg);
       
  1303     if( rc!=SQLITE_OK ){ 
       
  1304       if( rc==SQLITE_DONE ){
       
  1305         rc = SQLITE_OK;
       
  1306       }
       
  1307       goto end_playback;
       
  1308     }
       
  1309 
       
  1310     /* If nRec is 0xffffffff, then this journal was created by a process
       
  1311     ** working in no-sync mode. This means that the rest of the journal
       
  1312     ** file consists of pages, there are no more journal headers. Compute
       
  1313     ** the value of nRec based on this assumption.
       
  1314     */
       
  1315     if( nRec==0xffffffff ){
       
  1316       assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
       
  1317       nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager);
       
  1318     }
       
  1319 
       
  1320     /* If this is the first header read from the journal, truncate the
       
  1321     ** database file back to it's original size.
       
  1322     */
       
  1323     if( pPager->state>=PAGER_EXCLUSIVE && 
       
  1324         pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
       
  1325       assert( pPager->origDbSize==0 || pPager->origDbSize==mxPg );
       
  1326       rc = pager_truncate(pPager, mxPg);
       
  1327       if( rc!=SQLITE_OK ){
       
  1328         goto end_playback;
       
  1329       }
       
  1330       pPager->dbSize = mxPg;
       
  1331     }
       
  1332 
       
  1333     /* Copy original pages out of the journal and back into the database file.
       
  1334     */
       
  1335     for(i=0; i<nRec; i++){
       
  1336       rc = pager_playback_one_page(pPager, pPager->jfd, 1);
       
  1337       if( rc!=SQLITE_OK ){
       
  1338         if( rc==SQLITE_DONE ){
       
  1339           rc = SQLITE_OK;
       
  1340           pPager->journalOff = szJ;
       
  1341           break;
       
  1342         }else{
       
  1343           goto end_playback;
       
  1344         }
       
  1345       }
       
  1346     }
       
  1347   }
       
  1348   /*NOTREACHED*/
       
  1349   assert( 0 );
       
  1350 
       
  1351 end_playback:
       
  1352   if( rc==SQLITE_OK ){
       
  1353     rc = pager_unwritelock(pPager);
       
  1354   }
       
  1355   if( zMaster ){
       
  1356     /* If there was a master journal and this routine will return true,
       
  1357     ** see if it is possible to delete the master journal.
       
  1358     */
       
  1359     if( rc==SQLITE_OK ){
       
  1360       rc = pager_delmaster(zMaster);
       
  1361     }
       
  1362     sqliteFree(zMaster);
       
  1363   }
       
  1364 
       
  1365   /* The Pager.sectorSize variable may have been updated while rolling
       
  1366   ** back a journal created by a process with a different PAGER_SECTOR_SIZE
       
  1367   ** value. Reset it to the correct value for this process.
       
  1368   */
       
  1369   pPager->sectorSize = PAGER_SECTOR_SIZE;
       
  1370   return rc;
       
  1371 }
       
  1372 
       
  1373 /*
       
  1374 ** Playback the statement journal.
       
  1375 **
       
  1376 ** This is similar to playing back the transaction journal but with
       
  1377 ** a few extra twists.
       
  1378 **
       
  1379 **    (1)  The number of pages in the database file at the start of
       
  1380 **         the statement is stored in pPager->stmtSize, not in the
       
  1381 **         journal file itself.
       
  1382 **
       
  1383 **    (2)  In addition to playing back the statement journal, also
       
  1384 **         playback all pages of the transaction journal beginning
       
  1385 **         at offset pPager->stmtJSize.
       
  1386 */
       
  1387 static int pager_stmt_playback(Pager *pPager){
       
  1388   i64 szJ;                 /* Size of the full journal */
       
  1389   i64 hdrOff;
       
  1390   int nRec;                /* Number of Records */
       
  1391   int i;                   /* Loop counter */
       
  1392   int rc;
       
  1393 
       
  1394   szJ = pPager->journalOff;
       
  1395 #ifndef NDEBUG 
       
  1396   {
       
  1397     i64 os_szJ;
       
  1398     rc = sqlite3OsFileSize(pPager->jfd, &os_szJ);
       
  1399     if( rc!=SQLITE_OK ) return rc;
       
  1400     assert( szJ==os_szJ );
       
  1401   }
       
  1402 #endif
       
  1403 
       
  1404   /* Set hdrOff to be the offset to the first journal header written
       
  1405   ** this statement transaction, or the end of the file if no journal
       
  1406   ** header was written.
       
  1407   */
       
  1408   hdrOff = pPager->stmtHdrOff;
       
  1409   assert( pPager->fullSync || !hdrOff );
       
  1410   if( !hdrOff ){
       
  1411     hdrOff = szJ;
       
  1412   }
       
  1413   
       
  1414   /* Truncate the database back to its original size.
       
  1415   */
       
  1416   if( pPager->state>=PAGER_EXCLUSIVE ){
       
  1417     rc = pager_truncate(pPager, pPager->stmtSize);
       
  1418   }
       
  1419   pPager->dbSize = pPager->stmtSize;
       
  1420 
       
  1421   /* Figure out how many records are in the statement journal.
       
  1422   */
       
  1423   assert( pPager->stmtInUse && pPager->journalOpen );
       
  1424   sqlite3OsSeek(pPager->stfd, 0);
       
  1425   nRec = pPager->stmtNRec;
       
  1426   
       
  1427   /* Copy original pages out of the statement journal and back into the
       
  1428   ** database file.  Note that the statement journal omits checksums from
       
  1429   ** each record since power-failure recovery is not important to statement
       
  1430   ** journals.
       
  1431   */
       
  1432   for(i=nRec-1; i>=0; i--){
       
  1433     rc = pager_playback_one_page(pPager, pPager->stfd, 0);
       
  1434     assert( rc!=SQLITE_DONE );
       
  1435     if( rc!=SQLITE_OK ) goto end_stmt_playback;
       
  1436   }
       
  1437 
       
  1438   /* Now roll some pages back from the transaction journal. Pager.stmtJSize
       
  1439   ** was the size of the journal file when this statement was started, so
       
  1440   ** everything after that needs to be rolled back, either into the
       
  1441   ** database, the memory cache, or both.
       
  1442   **
       
  1443   ** If it is not zero, then Pager.stmtHdrOff is the offset to the start
       
  1444   ** of the first journal header written during this statement transaction.
       
  1445   */
       
  1446   rc = sqlite3OsSeek(pPager->jfd, pPager->stmtJSize);
       
  1447   if( rc!=SQLITE_OK ){
       
  1448     goto end_stmt_playback;
       
  1449   }
       
  1450   pPager->journalOff = pPager->stmtJSize;
       
  1451   pPager->cksumInit = pPager->stmtCksum;
       
  1452   assert( JOURNAL_HDR_SZ(pPager)<(pPager->pageSize+8) );
       
  1453   while( pPager->journalOff <= (hdrOff-(pPager->pageSize+8)) ){
       
  1454     rc = pager_playback_one_page(pPager, pPager->jfd, 1);
       
  1455     assert( rc!=SQLITE_DONE );
       
  1456     if( rc!=SQLITE_OK ) goto end_stmt_playback;
       
  1457   }
       
  1458 
       
  1459   while( pPager->journalOff < szJ ){
       
  1460     u32 nJRec;         /* Number of Journal Records */
       
  1461     u32 dummy;
       
  1462     rc = readJournalHdr(pPager, szJ, &nJRec, &dummy);
       
  1463     if( rc!=SQLITE_OK ){
       
  1464       assert( rc!=SQLITE_DONE );
       
  1465       goto end_stmt_playback;
       
  1466     }
       
  1467     if( nJRec==0 ){
       
  1468       nJRec = (szJ - pPager->journalOff) / (pPager->pageSize+8);
       
  1469     }
       
  1470     for(i=nJRec-1; i>=0 && pPager->journalOff < szJ; i--){
       
  1471       rc = pager_playback_one_page(pPager, pPager->jfd, 1);
       
  1472       assert( rc!=SQLITE_DONE );
       
  1473       if( rc!=SQLITE_OK ) goto end_stmt_playback;
       
  1474     }
       
  1475   }
       
  1476 
       
  1477   pPager->journalOff = szJ;
       
  1478   
       
  1479 end_stmt_playback:
       
  1480   if( rc==SQLITE_OK) {
       
  1481     pPager->journalOff = szJ;
       
  1482     /* pager_reload_cache(pPager); */
       
  1483   }
       
  1484   return rc;
       
  1485 }
       
  1486 
       
  1487 /*
       
  1488 ** Change the maximum number of in-memory pages that are allowed.
       
  1489 */
       
  1490 void sqlite3pager_set_cachesize(Pager *pPager, int mxPage){
       
  1491   if( mxPage>10 ){
       
  1492     pPager->mxPage = mxPage;
       
  1493   }else{
       
  1494     pPager->mxPage = 10;
       
  1495   }
       
  1496 }
       
  1497 
       
  1498 /*
       
  1499 ** Adjust the robustness of the database to damage due to OS crashes
       
  1500 ** or power failures by changing the number of syncs()s when writing
       
  1501 ** the rollback journal.  There are three levels:
       
  1502 **
       
  1503 **    OFF       sqlite3OsSync() is never called.  This is the default
       
  1504 **              for temporary and transient files.
       
  1505 **
       
  1506 **    NORMAL    The journal is synced once before writes begin on the
       
  1507 **              database.  This is normally adequate protection, but
       
  1508 **              it is theoretically possible, though very unlikely,
       
  1509 **              that an inopertune power failure could leave the journal
       
  1510 **              in a state which would cause damage to the database
       
  1511 **              when it is rolled back.
       
  1512 **
       
  1513 **    FULL      The journal is synced twice before writes begin on the
       
  1514 **              database (with some additional information - the nRec field
       
  1515 **              of the journal header - being written in between the two
       
  1516 **              syncs).  If we assume that writing a
       
  1517 **              single disk sector is atomic, then this mode provides
       
  1518 **              assurance that the journal will not be corrupted to the
       
  1519 **              point of causing damage to the database during rollback.
       
  1520 **
       
  1521 ** Numeric values associated with these states are OFF==1, NORMAL=2,
       
  1522 ** and FULL=3.
       
  1523 */
       
  1524 #ifndef SQLITE_OMIT_PAGER_PRAGMAS
       
  1525 void sqlite3pager_set_safety_level(Pager *pPager, int level, int full_fsync){
       
  1526   pPager->noSync =  level==1 || pPager->tempFile;
       
  1527   pPager->fullSync = level==3 && !pPager->tempFile;
       
  1528   pPager->full_fsync = full_fsync;
       
  1529   if( pPager->noSync ) pPager->needSync = 0;
       
  1530 }
       
  1531 #endif
       
  1532 
       
  1533 /*
       
  1534 ** The following global variable is incremented whenever the library
       
  1535 ** attempts to open a temporary file.  This information is used for
       
  1536 ** testing and analysis only.  
       
  1537 */
       
  1538 #ifdef SQLITE_TEST
       
  1539 int sqlite3_opentemp_count = 0;
       
  1540 #endif
       
  1541 
       
  1542 /*
       
  1543 ** Open a temporary file.  Write the name of the file into zFile
       
  1544 ** (zFile must be at least SQLITE_TEMPNAME_SIZE bytes long.)  Write
       
  1545 ** the file descriptor into *fd.  Return SQLITE_OK on success or some
       
  1546 ** other error code if we fail.
       
  1547 **
       
  1548 ** The OS will automatically delete the temporary file when it is
       
  1549 ** closed.
       
  1550 */
       
  1551 static int sqlite3pager_opentemp(char *zFile, OsFile **pFd){
       
  1552   int cnt = 8;
       
  1553   int rc;
       
  1554 #ifdef SQLITE_TEST
       
  1555   sqlite3_opentemp_count++;  /* Used for testing and analysis only */
       
  1556 #endif
       
  1557   do{
       
  1558     cnt--;
       
  1559     sqlite3OsTempFileName(zFile);
       
  1560     rc = sqlite3OsOpenExclusive(zFile, pFd, 1);
       
  1561   }while( cnt>0 && rc!=SQLITE_OK && rc!=SQLITE_NOMEM );
       
  1562   return rc;
       
  1563 }
       
  1564 
       
  1565 /*
       
  1566 ** Create a new page cache and put a pointer to the page cache in *ppPager.
       
  1567 ** The file to be cached need not exist.  The file is not locked until
       
  1568 ** the first call to sqlite3pager_get() and is only held open until the
       
  1569 ** last page is released using sqlite3pager_unref().
       
  1570 **
       
  1571 ** If zFilename is NULL then a randomly-named temporary file is created
       
  1572 ** and used as the file to be cached.  The file will be deleted
       
  1573 ** automatically when it is closed.
       
  1574 **
       
  1575 ** If zFilename is ":memory:" then all information is held in cache.
       
  1576 ** It is never written to disk.  This can be used to implement an
       
  1577 ** in-memory database.
       
  1578 */
       
  1579 int sqlite3pager_open(
       
  1580   Pager **ppPager,         /* Return the Pager structure here */
       
  1581   const char *zFilename,   /* Name of the database file to open */
       
  1582   int nExtra,              /* Extra bytes append to each in-memory page */
       
  1583   int flags                /* flags controlling this file */
       
  1584 ){
       
  1585   Pager *pPager = 0;
       
  1586   char *zFullPathname = 0;
       
  1587   int nameLen;  /* Compiler is wrong. This is always initialized before use */
       
  1588   OsFile *fd;
       
  1589   int rc = SQLITE_OK;
       
  1590   int i;
       
  1591   int tempFile = 0;
       
  1592   int memDb = 0;
       
  1593   int readOnly = 0;
       
  1594   int useJournal = (flags & PAGER_OMIT_JOURNAL)==0;
       
  1595   int noReadlock = (flags & PAGER_NO_READLOCK)!=0;
       
  1596   char zTemp[SQLITE_TEMPNAME_SIZE];
       
  1597 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
  1598   /* A malloc() cannot fail in sqlite3ThreadData() as one or more calls to 
       
  1599   ** malloc() must have already been made by this thread before it gets
       
  1600   ** to this point. This means the ThreadData must have been allocated already
       
  1601   ** so that ThreadData.nAlloc can be set. It would be nice to assert
       
  1602   ** that ThreadData.nAlloc is non-zero, but alas this breaks test cases 
       
  1603   ** written to invoke the pager directly.
       
  1604   */
       
  1605   ThreadData *pTsd = sqlite3ThreadData();
       
  1606   assert( pTsd );
       
  1607 #endif
       
  1608 
       
  1609   /* If malloc() has already failed return SQLITE_NOMEM. Before even
       
  1610   ** testing for this, set *ppPager to NULL so the caller knows the pager
       
  1611   ** structure was never allocated. 
       
  1612   */
       
  1613   *ppPager = 0;
       
  1614   if( sqlite3MallocFailed() ){
       
  1615     return SQLITE_NOMEM;
       
  1616   }
       
  1617   memset(&fd, 0, sizeof(fd));
       
  1618 
       
  1619   /* Open the pager file and set zFullPathname to point at malloc()ed 
       
  1620   ** memory containing the complete filename (i.e. including the directory).
       
  1621   */
       
  1622   if( zFilename && zFilename[0] ){
       
  1623 #ifndef SQLITE_OMIT_MEMORYDB
       
  1624     if( strcmp(zFilename,":memory:")==0 ){
       
  1625       memDb = 1;
       
  1626       zFullPathname = sqliteStrDup("");
       
  1627     }else
       
  1628 #endif
       
  1629     {
       
  1630       zFullPathname = sqlite3OsFullPathname(zFilename);
       
  1631       if( zFullPathname ){
       
  1632         rc = sqlite3OsOpenReadWrite(zFullPathname, &fd, &readOnly);
       
  1633       }
       
  1634     }
       
  1635   }else{
       
  1636     rc = sqlite3pager_opentemp(zTemp, &fd);
       
  1637     zFilename = zTemp;
       
  1638     zFullPathname = sqlite3OsFullPathname(zFilename);
       
  1639     if( rc==SQLITE_OK ){
       
  1640       tempFile = 1;
       
  1641     }
       
  1642   }
       
  1643 
       
  1644   /* Allocate the Pager structure. As part of the same allocation, allocate
       
  1645   ** space for the full paths of the file, directory and journal 
       
  1646   ** (Pager.zFilename, Pager.zDirectory and Pager.zJournal).
       
  1647   */
       
  1648   if( zFullPathname ){
       
  1649     nameLen = strlen(zFullPathname);
       
  1650     pPager = sqliteMalloc( sizeof(*pPager) + nameLen*3 + 30 );
       
  1651   }
       
  1652 
       
  1653   /* If an error occured in either of the blocks above, free the memory 
       
  1654   ** pointed to by zFullPathname, free the Pager structure and close the 
       
  1655   ** file. Since the pager is not allocated there is no need to set 
       
  1656   ** any Pager.errMask variables.
       
  1657   */
       
  1658   if( !pPager || !zFullPathname || rc!=SQLITE_OK ){
       
  1659     sqlite3OsClose(&fd);
       
  1660     sqliteFree(zFullPathname);
       
  1661     sqliteFree(pPager);
       
  1662     return ((rc==SQLITE_OK)?SQLITE_NOMEM:rc);
       
  1663   }
       
  1664 
       
  1665   TRACE3("OPEN %d %s\n", FILEHANDLEID(fd), zFullPathname);
       
  1666   pPager->zFilename = (char*)&pPager[1];
       
  1667   pPager->zDirectory = &pPager->zFilename[nameLen+1];
       
  1668   pPager->zJournal = &pPager->zDirectory[nameLen+1];
       
  1669   strcpy(pPager->zFilename, zFullPathname);
       
  1670   strcpy(pPager->zDirectory, zFullPathname);
       
  1671 
       
  1672   for(i=nameLen; i>0 && pPager->zDirectory[i-1]!='/'; i--){}
       
  1673   if( i>0 ) pPager->zDirectory[i-1] = 0;
       
  1674   strcpy(pPager->zJournal, zFullPathname);
       
  1675   sqliteFree(zFullPathname);
       
  1676   strcpy(&pPager->zJournal[nameLen], "-journal");
       
  1677   pPager->fd = fd;
       
  1678   /* pPager->journalOpen = 0; */
       
  1679   pPager->useJournal = useJournal && !memDb;
       
  1680   pPager->noReadlock = noReadlock && readOnly;
       
  1681   /* pPager->stmtOpen = 0; */
       
  1682   /* pPager->stmtInUse = 0; */
       
  1683   /* pPager->nRef = 0; */
       
  1684   pPager->dbSize = memDb-1;
       
  1685   pPager->pageSize = SQLITE_DEFAULT_PAGE_SIZE;
       
  1686   /* pPager->stmtSize = 0; */
       
  1687   /* pPager->stmtJSize = 0; */
       
  1688   /* pPager->nPage = 0; */
       
  1689   /* pPager->nMaxPage = 0; */
       
  1690   pPager->mxPage = 100;
       
  1691   assert( PAGER_UNLOCK==0 );
       
  1692   /* pPager->state = PAGER_UNLOCK; */
       
  1693   /* pPager->errMask = 0; */
       
  1694   pPager->tempFile = tempFile;
       
  1695   pPager->memDb = memDb;
       
  1696   pPager->readOnly = readOnly;
       
  1697   /* pPager->needSync = 0; */
       
  1698   pPager->noSync = pPager->tempFile || !useJournal;
       
  1699   pPager->fullSync = (pPager->noSync?0:1);
       
  1700   /* pPager->pFirst = 0; */
       
  1701   /* pPager->pFirstSynced = 0; */
       
  1702   /* pPager->pLast = 0; */
       
  1703   pPager->nExtra = FORCE_ALIGNMENT(nExtra);
       
  1704   pPager->sectorSize = PAGER_SECTOR_SIZE;
       
  1705   /* pPager->pBusyHandler = 0; */
       
  1706   /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
       
  1707   *ppPager = pPager;
       
  1708 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
  1709   pPager->pNext = pTsd->pPager;
       
  1710   pTsd->pPager = pPager;
       
  1711 #endif
       
  1712   return SQLITE_OK;
       
  1713 }
       
  1714 
       
  1715 /*
       
  1716 ** Set the busy handler function.
       
  1717 */
       
  1718 void sqlite3pager_set_busyhandler(Pager *pPager, BusyHandler *pBusyHandler){
       
  1719   pPager->pBusyHandler = pBusyHandler;
       
  1720 }
       
  1721 
       
  1722 /*
       
  1723 ** Set the destructor for this pager.  If not NULL, the destructor is called
       
  1724 ** when the reference count on each page reaches zero.  The destructor can
       
  1725 ** be used to clean up information in the extra segment appended to each page.
       
  1726 **
       
  1727 ** The destructor is not called as a result sqlite3pager_close().  
       
  1728 ** Destructors are only called by sqlite3pager_unref().
       
  1729 */
       
  1730 void sqlite3pager_set_destructor(Pager *pPager, void (*xDesc)(void*,int)){
       
  1731   pPager->xDestructor = xDesc;
       
  1732 }
       
  1733 
       
  1734 /*
       
  1735 ** Set the reinitializer for this pager.  If not NULL, the reinitializer
       
  1736 ** is called when the content of a page in cache is restored to its original
       
  1737 ** value as a result of a rollback.  The callback gives higher-level code
       
  1738 ** an opportunity to restore the EXTRA section to agree with the restored
       
  1739 ** page data.
       
  1740 */
       
  1741 void sqlite3pager_set_reiniter(Pager *pPager, void (*xReinit)(void*,int)){
       
  1742   pPager->xReiniter = xReinit;
       
  1743 }
       
  1744 
       
  1745 /*
       
  1746 ** Set the page size.  Return the new size.  If the suggest new page
       
  1747 ** size is inappropriate, then an alternative page size is selected
       
  1748 ** and returned.
       
  1749 */
       
  1750 int sqlite3pager_set_pagesize(Pager *pPager, int pageSize){
       
  1751   assert( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE );
       
  1752   if( !pPager->memDb ){
       
  1753     pPager->pageSize = pageSize;
       
  1754   }
       
  1755   return pPager->pageSize;
       
  1756 }
       
  1757 
       
  1758 /*
       
  1759 ** The following set of routines are used to disable the simulated
       
  1760 ** I/O error mechanism.  These routines are used to avoid simulated
       
  1761 ** errors in places where we do not care about errors.
       
  1762 **
       
  1763 ** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops
       
  1764 ** and generate no code.
       
  1765 */
       
  1766 #ifdef SQLITE_TEST
       
  1767 extern int sqlite3_io_error_pending;
       
  1768 extern int sqlite3_io_error_hit;
       
  1769 static int saved_cnt;
       
  1770 void clear_simulated_io_error(){
       
  1771   sqlite3_io_error_hit = 0;
       
  1772 }
       
  1773 void disable_simulated_io_errors(void){
       
  1774   saved_cnt = sqlite3_io_error_pending;
       
  1775   sqlite3_io_error_pending = -1;
       
  1776 }
       
  1777 void enable_simulated_io_errors(void){
       
  1778   sqlite3_io_error_pending = saved_cnt;
       
  1779 }
       
  1780 #else
       
  1781 # define clear_simulated_io_error()
       
  1782 # define disable_simulated_io_errors()
       
  1783 # define enable_simulated_io_errors()
       
  1784 #endif
       
  1785 
       
  1786 /*
       
  1787 ** Read the first N bytes from the beginning of the file into memory
       
  1788 ** that pDest points to. 
       
  1789 **
       
  1790 ** No error checking is done. The rational for this is that this function 
       
  1791 ** may be called even if the file does not exist or contain a header. In 
       
  1792 ** these cases sqlite3OsRead() will return an error, to which the correct 
       
  1793 ** response is to zero the memory at pDest and continue.  A real IO error 
       
  1794 ** will presumably recur and be picked up later (Todo: Think about this).
       
  1795 */
       
  1796 void sqlite3pager_read_fileheader(Pager *pPager, int N, unsigned char *pDest){
       
  1797   memset(pDest, 0, N);
       
  1798   if( MEMDB==0 ){
       
  1799     disable_simulated_io_errors();
       
  1800     sqlite3OsSeek(pPager->fd, 0);
       
  1801     sqlite3OsRead(pPager->fd, pDest, N);
       
  1802     enable_simulated_io_errors();
       
  1803   }
       
  1804 }
       
  1805 
       
  1806 /*
       
  1807 ** Return the total number of pages in the disk file associated with
       
  1808 ** pPager. 
       
  1809 **
       
  1810 ** If the PENDING_BYTE lies on the page directly after the end of the
       
  1811 ** file, then consider this page part of the file too. For example, if
       
  1812 ** PENDING_BYTE is byte 4096 (the first byte of page 5) and the size of the
       
  1813 ** file is 4096 bytes, 5 is returned instead of 4.
       
  1814 */
       
  1815 int sqlite3pager_pagecount(Pager *pPager){
       
  1816   i64 n;
       
  1817   assert( pPager!=0 );
       
  1818   if( pPager->dbSize>=0 ){
       
  1819     n = pPager->dbSize;
       
  1820   } else {
       
  1821     if( sqlite3OsFileSize(pPager->fd, &n)!=SQLITE_OK ){
       
  1822       pager_error(pPager, SQLITE_IOERR);
       
  1823       return 0;
       
  1824     }
       
  1825     if( n>0 && n<pPager->pageSize ){
       
  1826       n = 1;
       
  1827     }else{
       
  1828       n /= pPager->pageSize;
       
  1829     }
       
  1830     if( pPager->state!=PAGER_UNLOCK ){
       
  1831       pPager->dbSize = n;
       
  1832     }
       
  1833   }
       
  1834   if( n==(PENDING_BYTE/pPager->pageSize) ){
       
  1835     n++;
       
  1836   }
       
  1837   return n;
       
  1838 }
       
  1839 
       
  1840 
       
  1841 #ifndef SQLITE_OMIT_MEMORYDB
       
  1842 /*
       
  1843 ** Clear a PgHistory block
       
  1844 */
       
  1845 static void clearHistory(PgHistory *pHist){
       
  1846   sqliteFree(pHist->pOrig);
       
  1847   sqliteFree(pHist->pStmt);
       
  1848   pHist->pOrig = 0;
       
  1849   pHist->pStmt = 0;
       
  1850 }
       
  1851 #else
       
  1852 #define clearHistory(x)
       
  1853 #endif
       
  1854 
       
  1855 /*
       
  1856 ** Forward declaration
       
  1857 */
       
  1858 static int syncJournal(Pager*);
       
  1859 
       
  1860 /*
       
  1861 ** Unlink pPg from it's hash chain. Also set the page number to 0 to indicate
       
  1862 ** that the page is not part of any hash chain. This is required because the
       
  1863 ** sqlite3pager_movepage() routine can leave a page in the 
       
  1864 ** pNextFree/pPrevFree list that is not a part of any hash-chain.
       
  1865 */
       
  1866 static void unlinkHashChain(Pager *pPager, PgHdr *pPg){
       
  1867   if( pPg->pgno==0 ){
       
  1868     assert( pPg->pNextHash==0 && pPg->pPrevHash==0 );
       
  1869     return;
       
  1870   }
       
  1871   if( pPg->pNextHash ){
       
  1872     pPg->pNextHash->pPrevHash = pPg->pPrevHash;
       
  1873   }
       
  1874   if( pPg->pPrevHash ){
       
  1875     assert( pPager->aHash[pPg->pgno & (pPager->nHash-1)]!=pPg );
       
  1876     pPg->pPrevHash->pNextHash = pPg->pNextHash;
       
  1877   }else{
       
  1878     int h = pPg->pgno & (pPager->nHash-1);
       
  1879     pPager->aHash[h] = pPg->pNextHash;
       
  1880   }
       
  1881   if( MEMDB ){
       
  1882     clearHistory(PGHDR_TO_HIST(pPg, pPager));
       
  1883   }
       
  1884   pPg->pgno = 0;
       
  1885   pPg->pNextHash = pPg->pPrevHash = 0;
       
  1886 }
       
  1887 
       
  1888 /*
       
  1889 ** Unlink a page from the free list (the list of all pages where nRef==0)
       
  1890 ** and from its hash collision chain.
       
  1891 */
       
  1892 static void unlinkPage(PgHdr *pPg){
       
  1893   Pager *pPager = pPg->pPager;
       
  1894 
       
  1895   /* Keep the pFirstSynced pointer pointing at the first synchronized page */
       
  1896   if( pPg==pPager->pFirstSynced ){
       
  1897     PgHdr *p = pPg->pNextFree;
       
  1898     while( p && p->needSync ){ p = p->pNextFree; }
       
  1899     pPager->pFirstSynced = p;
       
  1900   }
       
  1901 
       
  1902   /* Unlink from the freelist */
       
  1903   if( pPg->pPrevFree ){
       
  1904     pPg->pPrevFree->pNextFree = pPg->pNextFree;
       
  1905   }else{
       
  1906     assert( pPager->pFirst==pPg );
       
  1907     pPager->pFirst = pPg->pNextFree;
       
  1908   }
       
  1909   if( pPg->pNextFree ){
       
  1910     pPg->pNextFree->pPrevFree = pPg->pPrevFree;
       
  1911   }else{
       
  1912     assert( pPager->pLast==pPg );
       
  1913     pPager->pLast = pPg->pPrevFree;
       
  1914   }
       
  1915   pPg->pNextFree = pPg->pPrevFree = 0;
       
  1916 
       
  1917   /* Unlink from the pgno hash table */
       
  1918   unlinkHashChain(pPager, pPg);
       
  1919 }
       
  1920 
       
  1921 #ifndef SQLITE_OMIT_MEMORYDB
       
  1922 /*
       
  1923 ** This routine is used to truncate an in-memory database.  Delete
       
  1924 ** all pages whose pgno is larger than pPager->dbSize and is unreferenced.
       
  1925 ** Referenced pages larger than pPager->dbSize are zeroed.
       
  1926 */
       
  1927 static void memoryTruncate(Pager *pPager){
       
  1928   PgHdr *pPg;
       
  1929   PgHdr **ppPg;
       
  1930   int dbSize = pPager->dbSize;
       
  1931 
       
  1932   ppPg = &pPager->pAll;
       
  1933   while( (pPg = *ppPg)!=0 ){
       
  1934     if( pPg->pgno<=dbSize ){
       
  1935       ppPg = &pPg->pNextAll;
       
  1936     }else if( pPg->nRef>0 ){
       
  1937       memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
       
  1938       ppPg = &pPg->pNextAll;
       
  1939     }else{
       
  1940       *ppPg = pPg->pNextAll;
       
  1941       unlinkPage(pPg);
       
  1942       makeClean(pPg);
       
  1943       sqliteFree(pPg);
       
  1944       pPager->nPage--;
       
  1945     }
       
  1946   }
       
  1947 }
       
  1948 #else
       
  1949 #define memoryTruncate(p)
       
  1950 #endif
       
  1951 
       
  1952 /*
       
  1953 ** Try to obtain a lock on a file.  Invoke the busy callback if the lock
       
  1954 ** is currently not available.  Repeat until the busy callback returns
       
  1955 ** false or until the lock succeeds.
       
  1956 **
       
  1957 ** Return SQLITE_OK on success and an error code if we cannot obtain
       
  1958 ** the lock.
       
  1959 */
       
  1960 static int pager_wait_on_lock(Pager *pPager, int locktype){
       
  1961   int rc;
       
  1962   assert( PAGER_SHARED==SHARED_LOCK );
       
  1963   assert( PAGER_RESERVED==RESERVED_LOCK );
       
  1964   assert( PAGER_EXCLUSIVE==EXCLUSIVE_LOCK );
       
  1965   if( pPager->state>=locktype ){
       
  1966     rc = SQLITE_OK;
       
  1967   }else{
       
  1968     do {
       
  1969       rc = sqlite3OsLock(pPager->fd, locktype);
       
  1970     }while( rc==SQLITE_BUSY && sqlite3InvokeBusyHandler(pPager->pBusyHandler) );
       
  1971     if( rc==SQLITE_OK ){
       
  1972       pPager->state = locktype;
       
  1973     }
       
  1974   }
       
  1975   return rc;
       
  1976 }
       
  1977 
       
  1978 /*
       
  1979 ** Truncate the file to the number of pages specified.
       
  1980 */
       
  1981 int sqlite3pager_truncate(Pager *pPager, Pgno nPage){
       
  1982   int rc;
       
  1983   sqlite3pager_pagecount(pPager);
       
  1984   if( pPager->errCode ){
       
  1985     rc = pPager->errCode;
       
  1986     return rc;
       
  1987   }
       
  1988   if( nPage>=(unsigned)pPager->dbSize ){
       
  1989     return SQLITE_OK;
       
  1990   }
       
  1991   if( MEMDB ){
       
  1992     pPager->dbSize = nPage;
       
  1993     memoryTruncate(pPager);
       
  1994     return SQLITE_OK;
       
  1995   }
       
  1996   rc = syncJournal(pPager);
       
  1997   if( rc!=SQLITE_OK ){
       
  1998     return rc;
       
  1999   }
       
  2000 
       
  2001   /* Get an exclusive lock on the database before truncating. */
       
  2002   rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
       
  2003   if( rc!=SQLITE_OK ){
       
  2004     return rc;
       
  2005   }
       
  2006 
       
  2007   rc = pager_truncate(pPager, nPage);
       
  2008   if( rc==SQLITE_OK ){
       
  2009     pPager->dbSize = nPage;
       
  2010   }
       
  2011   return rc;
       
  2012 }
       
  2013 
       
  2014 /*
       
  2015 ** Shutdown the page cache.  Free all memory and close all files.
       
  2016 **
       
  2017 ** If a transaction was in progress when this routine is called, that
       
  2018 ** transaction is rolled back.  All outstanding pages are invalidated
       
  2019 ** and their memory is freed.  Any attempt to use a page associated
       
  2020 ** with this page cache after this function returns will likely
       
  2021 ** result in a coredump.
       
  2022 **
       
  2023 ** This function always succeeds. If a transaction is active an attempt
       
  2024 ** is made to roll it back. If an error occurs during the rollback 
       
  2025 ** a hot journal may be left in the filesystem but no error is returned
       
  2026 ** to the caller.
       
  2027 */
       
  2028 int sqlite3pager_close(Pager *pPager){
       
  2029   PgHdr *pPg, *pNext;
       
  2030 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
  2031   /* A malloc() cannot fail in sqlite3ThreadData() as one or more calls to 
       
  2032   ** malloc() must have already been made by this thread before it gets
       
  2033   ** to this point. This means the ThreadData must have been allocated already
       
  2034   ** so that ThreadData.nAlloc can be set.
       
  2035   */
       
  2036   ThreadData *pTsd = sqlite3ThreadData();
       
  2037   assert( pPager );
       
  2038   assert( pTsd && pTsd->nAlloc );
       
  2039 #endif
       
  2040 
       
  2041   switch( pPager->state ){
       
  2042     case PAGER_RESERVED:
       
  2043     case PAGER_SYNCED: 
       
  2044     case PAGER_EXCLUSIVE: {
       
  2045       /* We ignore any IO errors that occur during the rollback
       
  2046       ** operation. So disable IO error simulation so that testing
       
  2047       ** works more easily.
       
  2048       */
       
  2049       disable_simulated_io_errors();
       
  2050       sqlite3pager_rollback(pPager);
       
  2051       enable_simulated_io_errors();
       
  2052       if( !MEMDB ){
       
  2053         sqlite3OsUnlock(pPager->fd, NO_LOCK);
       
  2054       }
       
  2055       assert( pPager->errCode || pPager->journalOpen==0 );
       
  2056       break;
       
  2057     }
       
  2058     case PAGER_SHARED: {
       
  2059       if( !MEMDB ){
       
  2060         sqlite3OsUnlock(pPager->fd, NO_LOCK);
       
  2061       }
       
  2062       break;
       
  2063     }
       
  2064     default: {
       
  2065       /* Do nothing */
       
  2066       break;
       
  2067     }
       
  2068   }
       
  2069   for(pPg=pPager->pAll; pPg; pPg=pNext){
       
  2070 #ifndef NDEBUG
       
  2071     if( MEMDB ){
       
  2072       PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
       
  2073       assert( !pPg->alwaysRollback );
       
  2074       assert( !pHist->pOrig );
       
  2075       assert( !pHist->pStmt );
       
  2076     }
       
  2077 #endif
       
  2078     pNext = pPg->pNextAll;
       
  2079     sqliteFree(pPg);
       
  2080   }
       
  2081   TRACE2("CLOSE %d\n", PAGERID(pPager));
       
  2082   assert( pPager->errCode || (pPager->journalOpen==0 && pPager->stmtOpen==0) );
       
  2083   if( pPager->journalOpen ){
       
  2084     sqlite3OsClose(&pPager->jfd);
       
  2085   }
       
  2086   sqliteFree(pPager->aInJournal);
       
  2087   if( pPager->stmtOpen ){
       
  2088     sqlite3OsClose(&pPager->stfd);
       
  2089   }
       
  2090   sqlite3OsClose(&pPager->fd);
       
  2091   /* Temp files are automatically deleted by the OS
       
  2092   ** if( pPager->tempFile ){
       
  2093   **   sqlite3OsDelete(pPager->zFilename);
       
  2094   ** }
       
  2095   */
       
  2096 
       
  2097 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
  2098   /* Remove the pager from the linked list of pagers starting at 
       
  2099   ** ThreadData.pPager if memory-management is enabled.
       
  2100   */
       
  2101   if( pPager==pTsd->pPager ){
       
  2102     pTsd->pPager = pPager->pNext;
       
  2103   }else{
       
  2104     Pager *pTmp;
       
  2105     for(pTmp = pTsd->pPager; pTmp->pNext!=pPager; pTmp=pTmp->pNext){}
       
  2106     pTmp->pNext = pPager->pNext;
       
  2107   }
       
  2108 #endif
       
  2109   sqliteFree(pPager->aHash);
       
  2110   sqliteFree(pPager);
       
  2111   return SQLITE_OK;
       
  2112 }
       
  2113 
       
  2114 /*
       
  2115 ** Return the page number for the given page data.
       
  2116 */
       
  2117 Pgno sqlite3pager_pagenumber(void *pData){
       
  2118   PgHdr *p = DATA_TO_PGHDR(pData);
       
  2119   return p->pgno;
       
  2120 }
       
  2121 
       
  2122 /*
       
  2123 ** The page_ref() function increments the reference count for a page.
       
  2124 ** If the page is currently on the freelist (the reference count is zero) then
       
  2125 ** remove it from the freelist.
       
  2126 **
       
  2127 ** For non-test systems, page_ref() is a macro that calls _page_ref()
       
  2128 ** online of the reference count is zero.  For test systems, page_ref()
       
  2129 ** is a real function so that we can set breakpoints and trace it.
       
  2130 */
       
  2131 static void _page_ref(PgHdr *pPg){
       
  2132   if( pPg->nRef==0 ){
       
  2133     /* The page is currently on the freelist.  Remove it. */
       
  2134     if( pPg==pPg->pPager->pFirstSynced ){
       
  2135       PgHdr *p = pPg->pNextFree;
       
  2136       while( p && p->needSync ){ p = p->pNextFree; }
       
  2137       pPg->pPager->pFirstSynced = p;
       
  2138     }
       
  2139     if( pPg->pPrevFree ){
       
  2140       pPg->pPrevFree->pNextFree = pPg->pNextFree;
       
  2141     }else{
       
  2142       pPg->pPager->pFirst = pPg->pNextFree;
       
  2143     }
       
  2144     if( pPg->pNextFree ){
       
  2145       pPg->pNextFree->pPrevFree = pPg->pPrevFree;
       
  2146     }else{
       
  2147       pPg->pPager->pLast = pPg->pPrevFree;
       
  2148     }
       
  2149     pPg->pPager->nRef++;
       
  2150   }
       
  2151   pPg->nRef++;
       
  2152   REFINFO(pPg);
       
  2153 }
       
  2154 #ifdef SQLITE_DEBUG
       
  2155   static void page_ref(PgHdr *pPg){
       
  2156     if( pPg->nRef==0 ){
       
  2157       _page_ref(pPg);
       
  2158     }else{
       
  2159       pPg->nRef++;
       
  2160       REFINFO(pPg);
       
  2161     }
       
  2162   }
       
  2163 #else
       
  2164 # define page_ref(P)   ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
       
  2165 #endif
       
  2166 
       
  2167 /*
       
  2168 ** Increment the reference count for a page.  The input pointer is
       
  2169 ** a reference to the page data.
       
  2170 */
       
  2171 int sqlite3pager_ref(void *pData){
       
  2172   PgHdr *pPg = DATA_TO_PGHDR(pData);
       
  2173   page_ref(pPg);
       
  2174   return SQLITE_OK;
       
  2175 }
       
  2176 
       
  2177 /*
       
  2178 ** Sync the journal.  In other words, make sure all the pages that have
       
  2179 ** been written to the journal have actually reached the surface of the
       
  2180 ** disk.  It is not safe to modify the original database file until after
       
  2181 ** the journal has been synced.  If the original database is modified before
       
  2182 ** the journal is synced and a power failure occurs, the unsynced journal
       
  2183 ** data would be lost and we would be unable to completely rollback the
       
  2184 ** database changes.  Database corruption would occur.
       
  2185 ** 
       
  2186 ** This routine also updates the nRec field in the header of the journal.
       
  2187 ** (See comments on the pager_playback() routine for additional information.)
       
  2188 ** If the sync mode is FULL, two syncs will occur.  First the whole journal
       
  2189 ** is synced, then the nRec field is updated, then a second sync occurs.
       
  2190 **
       
  2191 ** For temporary databases, we do not care if we are able to rollback
       
  2192 ** after a power failure, so sync occurs.
       
  2193 **
       
  2194 ** This routine clears the needSync field of every page current held in
       
  2195 ** memory.
       
  2196 */
       
  2197 static int syncJournal(Pager *pPager){
       
  2198   PgHdr *pPg;
       
  2199   int rc = SQLITE_OK;
       
  2200 
       
  2201   /* Sync the journal before modifying the main database
       
  2202   ** (assuming there is a journal and it needs to be synced.)
       
  2203   */
       
  2204   if( pPager->needSync ){
       
  2205     if( !pPager->tempFile ){
       
  2206       assert( pPager->journalOpen );
       
  2207       /* assert( !pPager->noSync ); // noSync might be set if synchronous
       
  2208       ** was turned off after the transaction was started.  Ticket #615 */
       
  2209 #ifndef NDEBUG
       
  2210       {
       
  2211         /* Make sure the pPager->nRec counter we are keeping agrees
       
  2212         ** with the nRec computed from the size of the journal file.
       
  2213         */
       
  2214         i64 jSz;
       
  2215         rc = sqlite3OsFileSize(pPager->jfd, &jSz);
       
  2216         if( rc!=0 ) return rc;
       
  2217         assert( pPager->journalOff==jSz );
       
  2218       }
       
  2219 #endif
       
  2220       {
       
  2221         /* Write the nRec value into the journal file header. If in
       
  2222         ** full-synchronous mode, sync the journal first. This ensures that
       
  2223         ** all data has really hit the disk before nRec is updated to mark
       
  2224         ** it as a candidate for rollback. 
       
  2225         */
       
  2226         if( pPager->fullSync ){
       
  2227           TRACE2("SYNC journal of %d\n", PAGERID(pPager));
       
  2228           rc = sqlite3OsSync(pPager->jfd, 0);
       
  2229           if( rc!=0 ) return rc;
       
  2230         }
       
  2231         rc = sqlite3OsSeek(pPager->jfd,
       
  2232                            pPager->journalHdr + sizeof(aJournalMagic));
       
  2233         if( rc ) return rc;
       
  2234         rc = write32bits(pPager->jfd, pPager->nRec);
       
  2235         if( rc ) return rc;
       
  2236 
       
  2237         rc = sqlite3OsSeek(pPager->jfd, pPager->journalOff);
       
  2238         if( rc ) return rc;
       
  2239       }
       
  2240       TRACE2("SYNC journal of %d\n", PAGERID(pPager));
       
  2241       rc = sqlite3OsSync(pPager->jfd, pPager->full_fsync);
       
  2242       if( rc!=0 ) return rc;
       
  2243       pPager->journalStarted = 1;
       
  2244     }
       
  2245     pPager->needSync = 0;
       
  2246 
       
  2247     /* Erase the needSync flag from every page.
       
  2248     */
       
  2249     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
       
  2250       pPg->needSync = 0;
       
  2251     }
       
  2252     pPager->pFirstSynced = pPager->pFirst;
       
  2253   }
       
  2254 
       
  2255 #ifndef NDEBUG
       
  2256   /* If the Pager.needSync flag is clear then the PgHdr.needSync
       
  2257   ** flag must also be clear for all pages.  Verify that this
       
  2258   ** invariant is true.
       
  2259   */
       
  2260   else{
       
  2261     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
       
  2262       assert( pPg->needSync==0 );
       
  2263     }
       
  2264     assert( pPager->pFirstSynced==pPager->pFirst );
       
  2265   }
       
  2266 #endif
       
  2267 
       
  2268   return rc;
       
  2269 }
       
  2270 
       
  2271 /*
       
  2272 ** Merge two lists of pages connected by pDirty and in pgno order.
       
  2273 ** Do not both fixing the pPrevDirty pointers.
       
  2274 */
       
  2275 static PgHdr *merge_pagelist(PgHdr *pA, PgHdr *pB){
       
  2276   PgHdr result, *pTail;
       
  2277   pTail = &result;
       
  2278   while( pA && pB ){
       
  2279     if( pA->pgno<pB->pgno ){
       
  2280       pTail->pDirty = pA;
       
  2281       pTail = pA;
       
  2282       pA = pA->pDirty;
       
  2283     }else{
       
  2284       pTail->pDirty = pB;
       
  2285       pTail = pB;
       
  2286       pB = pB->pDirty;
       
  2287     }
       
  2288   }
       
  2289   if( pA ){
       
  2290     pTail->pDirty = pA;
       
  2291   }else if( pB ){
       
  2292     pTail->pDirty = pB;
       
  2293   }else{
       
  2294     pTail->pDirty = 0;
       
  2295   }
       
  2296   return result.pDirty;
       
  2297 }
       
  2298 
       
  2299 /*
       
  2300 ** Sort the list of pages in accending order by pgno.  Pages are
       
  2301 ** connected by pDirty pointers.  The pPrevDirty pointers are
       
  2302 ** corrupted by this sort.
       
  2303 */
       
  2304 #define N_SORT_BUCKET 25
       
  2305 static PgHdr *sort_pagelist(PgHdr *pIn){
       
  2306   PgHdr *a[N_SORT_BUCKET], *p;
       
  2307   int i;
       
  2308   memset(a, 0, sizeof(a));
       
  2309   while( pIn ){
       
  2310     p = pIn;
       
  2311     pIn = p->pDirty;
       
  2312     p->pDirty = 0;
       
  2313     for(i=0; i<N_SORT_BUCKET-1; i++){
       
  2314       if( a[i]==0 ){
       
  2315         a[i] = p;
       
  2316         break;
       
  2317       }else{
       
  2318         p = merge_pagelist(a[i], p);
       
  2319         a[i] = 0;
       
  2320       }
       
  2321     }
       
  2322     if( i==N_SORT_BUCKET-1 ){
       
  2323       a[i] = merge_pagelist(a[i], p);
       
  2324     }
       
  2325   }
       
  2326   p = a[0];
       
  2327   for(i=1; i<N_SORT_BUCKET; i++){
       
  2328     p = merge_pagelist(p, a[i]);
       
  2329   }
       
  2330   return p;
       
  2331 }
       
  2332 
       
  2333 /*
       
  2334 ** Given a list of pages (connected by the PgHdr.pDirty pointer) write
       
  2335 ** every one of those pages out to the database file and mark them all
       
  2336 ** as clean.
       
  2337 */
       
  2338 static int pager_write_pagelist(PgHdr *pList){
       
  2339   Pager *pPager;
       
  2340   int rc;
       
  2341 
       
  2342   if( pList==0 ) return SQLITE_OK;
       
  2343   pPager = pList->pPager;
       
  2344 
       
  2345   /* At this point there may be either a RESERVED or EXCLUSIVE lock on the
       
  2346   ** database file. If there is already an EXCLUSIVE lock, the following
       
  2347   ** calls to sqlite3OsLock() are no-ops.
       
  2348   **
       
  2349   ** Moving the lock from RESERVED to EXCLUSIVE actually involves going
       
  2350   ** through an intermediate state PENDING.   A PENDING lock prevents new
       
  2351   ** readers from attaching to the database but is unsufficient for us to
       
  2352   ** write.  The idea of a PENDING lock is to prevent new readers from
       
  2353   ** coming in while we wait for existing readers to clear.
       
  2354   **
       
  2355   ** While the pager is in the RESERVED state, the original database file
       
  2356   ** is unchanged and we can rollback without having to playback the
       
  2357   ** journal into the original database file.  Once we transition to
       
  2358   ** EXCLUSIVE, it means the database file has been changed and any rollback
       
  2359   ** will require a journal playback.
       
  2360   */
       
  2361   rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
       
  2362   if( rc!=SQLITE_OK ){
       
  2363     return rc;
       
  2364   }
       
  2365 
       
  2366   pList = sort_pagelist(pList);
       
  2367   while( pList ){
       
  2368     assert( pList->dirty );
       
  2369     rc = sqlite3OsSeek(pPager->fd, (pList->pgno-1)*(i64)pPager->pageSize);
       
  2370     if( rc ) return rc;
       
  2371     /* If there are dirty pages in the page cache with page numbers greater
       
  2372     ** than Pager.dbSize, this means sqlite3pager_truncate() was called to
       
  2373     ** make the file smaller (presumably by auto-vacuum code). Do not write
       
  2374     ** any such pages to the file.
       
  2375     */
       
  2376     if( pList->pgno<=pPager->dbSize ){
       
  2377       char *pData = CODEC2(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6);
       
  2378       TRACE3("STORE %d page %d\n", PAGERID(pPager), pList->pgno);
       
  2379       rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize);
       
  2380       TEST_INCR(pPager->nWrite);
       
  2381     }
       
  2382 #ifndef NDEBUG
       
  2383     else{
       
  2384       TRACE3("NOSTORE %d page %d\n", PAGERID(pPager), pList->pgno);
       
  2385     }
       
  2386 #endif
       
  2387     if( rc ) return rc;
       
  2388     pList->dirty = 0;
       
  2389 #ifdef SQLITE_CHECK_PAGES
       
  2390     pList->pageHash = pager_pagehash(pList);
       
  2391 #endif
       
  2392     pList = pList->pDirty;
       
  2393   }
       
  2394   return SQLITE_OK;
       
  2395 }
       
  2396 
       
  2397 /*
       
  2398 ** Collect every dirty page into a dirty list and
       
  2399 ** return a pointer to the head of that list.  All pages are
       
  2400 ** collected even if they are still in use.
       
  2401 */
       
  2402 static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
       
  2403   return pPager->pDirty;
       
  2404 }
       
  2405 
       
  2406 /*
       
  2407 ** Return TRUE if there is a hot journal on the given pager.
       
  2408 ** A hot journal is one that needs to be played back.
       
  2409 **
       
  2410 ** If the current size of the database file is 0 but a journal file
       
  2411 ** exists, that is probably an old journal left over from a prior
       
  2412 ** database with the same name.  Just delete the journal.
       
  2413 */
       
  2414 static int hasHotJournal(Pager *pPager){
       
  2415   if( !pPager->useJournal ) return 0;
       
  2416   if( !sqlite3OsFileExists(pPager->zJournal) ) return 0;
       
  2417   if( sqlite3OsCheckReservedLock(pPager->fd) ) return 0;
       
  2418   if( sqlite3pager_pagecount(pPager)==0 ){
       
  2419     sqlite3OsDelete(pPager->zJournal);
       
  2420     return 0;
       
  2421   }else{
       
  2422     return 1;
       
  2423   }
       
  2424 }
       
  2425 
       
  2426 /*
       
  2427 ** Try to find a page in the cache that can be recycled. 
       
  2428 **
       
  2429 ** This routine may return SQLITE_IOERR, SQLITE_FULL or SQLITE_OK. It 
       
  2430 ** does not set the pPager->errCode variable.
       
  2431 */
       
  2432 static int pager_recycle(Pager *pPager, int syncOk, PgHdr **ppPg){
       
  2433   PgHdr *pPg;
       
  2434   *ppPg = 0;
       
  2435 
       
  2436   /* Find a page to recycle.  Try to locate a page that does not
       
  2437   ** require us to do an fsync() on the journal.
       
  2438   */
       
  2439   pPg = pPager->pFirstSynced;
       
  2440 
       
  2441   /* If we could not find a page that does not require an fsync()
       
  2442   ** on the journal file then fsync the journal file.  This is a
       
  2443   ** very slow operation, so we work hard to avoid it.  But sometimes
       
  2444   ** it can't be helped.
       
  2445   */
       
  2446   if( pPg==0 && pPager->pFirst && syncOk && !MEMDB){
       
  2447     int rc = syncJournal(pPager);
       
  2448     if( rc!=0 ){
       
  2449       return rc;
       
  2450     }
       
  2451     if( pPager->fullSync ){
       
  2452       /* If in full-sync mode, write a new journal header into the
       
  2453       ** journal file. This is done to avoid ever modifying a journal
       
  2454       ** header that is involved in the rollback of pages that have
       
  2455       ** already been written to the database (in case the header is
       
  2456       ** trashed when the nRec field is updated).
       
  2457       */
       
  2458       pPager->nRec = 0;
       
  2459       assert( pPager->journalOff > 0 );
       
  2460       rc = writeJournalHdr(pPager);
       
  2461       if( rc!=0 ){
       
  2462         return rc;
       
  2463       }
       
  2464     }
       
  2465     pPg = pPager->pFirst;
       
  2466   }
       
  2467   if( pPg==0 ){
       
  2468     return SQLITE_OK;
       
  2469   }
       
  2470 
       
  2471   assert( pPg->nRef==0 );
       
  2472 
       
  2473   /* Write the page to the database file if it is dirty.
       
  2474   */
       
  2475   if( pPg->dirty ){
       
  2476     int rc;
       
  2477     assert( pPg->needSync==0 );
       
  2478     makeClean(pPg);
       
  2479     pPg->dirty = 1;
       
  2480     pPg->pDirty = 0;
       
  2481     rc = pager_write_pagelist( pPg );
       
  2482     if( rc!=SQLITE_OK ){
       
  2483       return rc;
       
  2484     }
       
  2485   }
       
  2486   assert( pPg->dirty==0 );
       
  2487 
       
  2488   /* If the page we are recycling is marked as alwaysRollback, then
       
  2489   ** set the global alwaysRollback flag, thus disabling the
       
  2490   ** sqlite_dont_rollback() optimization for the rest of this transaction.
       
  2491   ** It is necessary to do this because the page marked alwaysRollback
       
  2492   ** might be reloaded at a later time but at that point we won't remember
       
  2493   ** that is was marked alwaysRollback.  This means that all pages must
       
  2494   ** be marked as alwaysRollback from here on out.
       
  2495   */
       
  2496   if( pPg->alwaysRollback ){
       
  2497     pPager->alwaysRollback = 1;
       
  2498   }
       
  2499 
       
  2500   /* Unlink the old page from the free list and the hash table
       
  2501   */
       
  2502   unlinkPage(pPg);
       
  2503   TEST_INCR(pPager->nOvfl);
       
  2504 
       
  2505   *ppPg = pPg;
       
  2506   return SQLITE_OK;
       
  2507 }
       
  2508 
       
  2509 /*
       
  2510 ** This function is called to free superfluous dynamically allocated memory
       
  2511 ** held by the pager system. Memory in use by any SQLite pager allocated
       
  2512 ** by the current thread may be sqliteFree()ed.
       
  2513 **
       
  2514 ** nReq is the number of bytes of memory required. Once this much has
       
  2515 ** been released, the function returns. A negative value for nReq means
       
  2516 ** free as much memory as possible. The return value is the total number 
       
  2517 ** of bytes of memory released.
       
  2518 */
       
  2519 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
  2520 int sqlite3pager_release_memory(int nReq){
       
  2521   const ThreadData *pTsdro = sqlite3ThreadDataReadOnly();
       
  2522   Pager *p;
       
  2523   int nReleased = 0;
       
  2524   int i;
       
  2525 
       
  2526   /* If the the global mutex is held, this subroutine becomes a
       
  2527   ** o-op; zero bytes of memory are freed.  This is because
       
  2528   ** some of the code invoked by this function may also
       
  2529   ** try to obtain the mutex, resulting in a deadlock.
       
  2530   */
       
  2531   if( sqlite3OsInMutex(0) ){
       
  2532     return 0;
       
  2533   }
       
  2534 
       
  2535   /* Outermost loop runs for at most two iterations. First iteration we
       
  2536   ** try to find memory that can be released without calling fsync(). Second
       
  2537   ** iteration (which only runs if the first failed to free nReq bytes of
       
  2538   ** memory) is permitted to call fsync(). This is of course much more 
       
  2539   ** expensive.
       
  2540   */
       
  2541   for(i=0; i<=1; i++){
       
  2542 
       
  2543     /* Loop through all the SQLite pagers opened by the current thread. */
       
  2544     for(p=pTsdro->pPager; p && (nReq<0 || nReleased<nReq); p=p->pNext){
       
  2545       PgHdr *pPg;
       
  2546       int rc;
       
  2547 
       
  2548       /* For each pager, try to free as many pages as possible (without 
       
  2549       ** calling fsync() if this is the first iteration of the outermost 
       
  2550       ** loop).
       
  2551       */
       
  2552       while( SQLITE_OK==(rc = pager_recycle(p, i, &pPg)) && pPg) {
       
  2553         /* We've found a page to free. At this point the page has been 
       
  2554         ** removed from the page hash-table, free-list and synced-list 
       
  2555         ** (pFirstSynced). It is still in the all pages (pAll) list. 
       
  2556         ** Remove it from this list before freeing.
       
  2557         **
       
  2558         ** Todo: Check the Pager.pStmt list to make sure this is Ok. It 
       
  2559         ** probably is though.
       
  2560         */
       
  2561         PgHdr *pTmp;
       
  2562         assert( pPg );
       
  2563         page_remove_from_stmt_list(pPg);
       
  2564         if( pPg==p->pAll ){
       
  2565            p->pAll = pPg->pNextAll;
       
  2566         }else{
       
  2567           for( pTmp=p->pAll; pTmp->pNextAll!=pPg; pTmp=pTmp->pNextAll ){}
       
  2568           pTmp->pNextAll = pPg->pNextAll;
       
  2569         }
       
  2570         nReleased += sqliteAllocSize(pPg);
       
  2571         sqliteFree(pPg);
       
  2572       }
       
  2573 
       
  2574       if( rc!=SQLITE_OK ){
       
  2575         /* An error occured whilst writing to the database file or 
       
  2576         ** journal in pager_recycle(). The error is not returned to the 
       
  2577         ** caller of this function. Instead, set the Pager.errCode variable.
       
  2578         ** The error will be returned to the user (or users, in the case 
       
  2579         ** of a shared pager cache) of the pager for which the error occured.
       
  2580         */
       
  2581         assert( rc==SQLITE_IOERR || rc==SQLITE_FULL );
       
  2582         assert( p->state>=PAGER_RESERVED );
       
  2583         pager_error(p, rc);
       
  2584       }
       
  2585     }
       
  2586   }
       
  2587 
       
  2588   return nReleased;
       
  2589 }
       
  2590 #endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */
       
  2591 
       
  2592 /*
       
  2593 ** Acquire a page.
       
  2594 **
       
  2595 ** A read lock on the disk file is obtained when the first page is acquired. 
       
  2596 ** This read lock is dropped when the last page is released.
       
  2597 **
       
  2598 ** A _get works for any page number greater than 0.  If the database
       
  2599 ** file is smaller than the requested page, then no actual disk
       
  2600 ** read occurs and the memory image of the page is initialized to
       
  2601 ** all zeros.  The extra data appended to a page is always initialized
       
  2602 ** to zeros the first time a page is loaded into memory.
       
  2603 **
       
  2604 ** The acquisition might fail for several reasons.  In all cases,
       
  2605 ** an appropriate error code is returned and *ppPage is set to NULL.
       
  2606 **
       
  2607 ** See also sqlite3pager_lookup().  Both this routine and _lookup() attempt
       
  2608 ** to find a page in the in-memory cache first.  If the page is not already
       
  2609 ** in memory, this routine goes to disk to read it in whereas _lookup()
       
  2610 ** just returns 0.  This routine acquires a read-lock the first time it
       
  2611 ** has to go to disk, and could also playback an old journal if necessary.
       
  2612 ** Since _lookup() never goes to disk, it never has to deal with locks
       
  2613 ** or journal files.
       
  2614 */
       
  2615 int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage){
       
  2616   PgHdr *pPg;
       
  2617   int rc;
       
  2618 
       
  2619   /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
       
  2620   ** number greater than this, or zero, is requested.
       
  2621   */
       
  2622   if( pgno>PAGER_MAX_PGNO || pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
       
  2623     return SQLITE_CORRUPT_BKPT;
       
  2624   }
       
  2625 
       
  2626   /* Make sure we have not hit any critical errors.
       
  2627   */ 
       
  2628   assert( pPager!=0 );
       
  2629   *ppPage = 0;
       
  2630   if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
       
  2631     return pPager->errCode;
       
  2632   }
       
  2633 
       
  2634   /* If this is the first page accessed, then get a SHARED lock
       
  2635   ** on the database file.
       
  2636   */
       
  2637   if( pPager->nRef==0 && !MEMDB ){
       
  2638     if( !pPager->noReadlock ){
       
  2639       rc = pager_wait_on_lock(pPager, SHARED_LOCK);
       
  2640       if( rc!=SQLITE_OK ){
       
  2641         return pager_error(pPager, rc);
       
  2642       }
       
  2643     }
       
  2644 
       
  2645     /* If a journal file exists, and there is no RESERVED lock on the
       
  2646     ** database file, then it either needs to be played back or deleted.
       
  2647     */
       
  2648     if( hasHotJournal(pPager) ){
       
  2649        /* Get an EXCLUSIVE lock on the database file. At this point it is
       
  2650        ** important that a RESERVED lock is not obtained on the way to the
       
  2651        ** EXCLUSIVE lock. If it were, another process might open the
       
  2652        ** database file, detect the RESERVED lock, and conclude that the
       
  2653        ** database is safe to read while this process is still rolling it 
       
  2654        ** back.
       
  2655        ** 
       
  2656        ** Because the intermediate RESERVED lock is not requested, the
       
  2657        ** second process will get to this point in the code and fail to
       
  2658        ** obtain it's own EXCLUSIVE lock on the database file.
       
  2659        */
       
  2660        rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK);
       
  2661        if( rc!=SQLITE_OK ){
       
  2662          sqlite3OsUnlock(pPager->fd, NO_LOCK);
       
  2663          pPager->state = PAGER_UNLOCK;
       
  2664          return pager_error(pPager, rc);
       
  2665        }
       
  2666        pPager->state = PAGER_EXCLUSIVE;
       
  2667 
       
  2668        /* Open the journal for reading only.  Return SQLITE_BUSY if
       
  2669        ** we are unable to open the journal file. 
       
  2670        **
       
  2671        ** The journal file does not need to be locked itself.  The
       
  2672        ** journal file is never open unless the main database file holds
       
  2673        ** a write lock, so there is never any chance of two or more
       
  2674        ** processes opening the journal at the same time.
       
  2675        */
       
  2676        rc = sqlite3OsOpenReadOnly(pPager->zJournal, &pPager->jfd);
       
  2677        if( rc!=SQLITE_OK ){
       
  2678          sqlite3OsUnlock(pPager->fd, NO_LOCK);
       
  2679          pPager->state = PAGER_UNLOCK;
       
  2680          return SQLITE_BUSY;
       
  2681        }
       
  2682        pPager->journalOpen = 1;
       
  2683        pPager->journalStarted = 0;
       
  2684        pPager->journalOff = 0;
       
  2685        pPager->setMaster = 0;
       
  2686        pPager->journalHdr = 0;
       
  2687 
       
  2688        /* Playback and delete the journal.  Drop the database write
       
  2689        ** lock and reacquire the read lock.
       
  2690        */
       
  2691        rc = pager_playback(pPager);
       
  2692        if( rc!=SQLITE_OK ){
       
  2693          return pager_error(pPager, rc);
       
  2694        }
       
  2695     }
       
  2696     pPg = 0;
       
  2697   }else{
       
  2698     /* Search for page in cache */
       
  2699     pPg = pager_lookup(pPager, pgno);
       
  2700     if( MEMDB && pPager->state==PAGER_UNLOCK ){
       
  2701       pPager->state = PAGER_SHARED;
       
  2702     }
       
  2703   }
       
  2704   if( pPg==0 ){
       
  2705     /* The requested page is not in the page cache. */
       
  2706     int h;
       
  2707     TEST_INCR(pPager->nMiss);
       
  2708     if( pPager->nPage<pPager->mxPage || pPager->pFirst==0 || MEMDB ){
       
  2709       /* Create a new page */
       
  2710       if( pPager->nPage>=pPager->nHash ){
       
  2711         pager_resize_hash_table(pPager,
       
  2712            pPager->nHash<256 ? 256 : pPager->nHash*2);
       
  2713         if( pPager->nHash==0 ){
       
  2714           return SQLITE_NOMEM;
       
  2715         }
       
  2716       }
       
  2717       pPg = sqliteMallocRaw( sizeof(*pPg) + pPager->pageSize
       
  2718                               + sizeof(u32) + pPager->nExtra
       
  2719                               + MEMDB*sizeof(PgHistory) );
       
  2720       if( pPg==0 ){
       
  2721         return SQLITE_NOMEM;
       
  2722       }
       
  2723       memset(pPg, 0, sizeof(*pPg));
       
  2724       if( MEMDB ){
       
  2725         memset(PGHDR_TO_HIST(pPg, pPager), 0, sizeof(PgHistory));
       
  2726       }
       
  2727       pPg->pPager = pPager;
       
  2728       pPg->pNextAll = pPager->pAll;
       
  2729       pPager->pAll = pPg;
       
  2730       pPager->nPage++;
       
  2731       if( pPager->nPage>pPager->nMaxPage ){
       
  2732         assert( pPager->nMaxPage==(pPager->nPage-1) );
       
  2733         pPager->nMaxPage++;
       
  2734       }
       
  2735     }else{
       
  2736       rc = pager_recycle(pPager, 1, &pPg);
       
  2737       if( rc!=SQLITE_OK ){
       
  2738         return rc;
       
  2739       }
       
  2740       assert(pPg) ;
       
  2741     }
       
  2742     pPg->pgno = pgno;
       
  2743     if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
       
  2744       sqlite3CheckMemory(pPager->aInJournal, pgno/8);
       
  2745       assert( pPager->journalOpen );
       
  2746       pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
       
  2747       pPg->needSync = 0;
       
  2748     }else{
       
  2749       pPg->inJournal = 0;
       
  2750       pPg->needSync = 0;
       
  2751     }
       
  2752     if( pPager->aInStmt && (int)pgno<=pPager->stmtSize
       
  2753              && (pPager->aInStmt[pgno/8] & (1<<(pgno&7)))!=0 ){
       
  2754       page_add_to_stmt_list(pPg);
       
  2755     }else{
       
  2756       page_remove_from_stmt_list(pPg);
       
  2757     }
       
  2758     makeClean(pPg);
       
  2759     pPg->nRef = 1;
       
  2760     REFINFO(pPg);
       
  2761 
       
  2762     pPager->nRef++;
       
  2763     if( pPager->nExtra>0 ){
       
  2764       memset(PGHDR_TO_EXTRA(pPg, pPager), 0, pPager->nExtra);
       
  2765     }
       
  2766     if( pPager->errCode ){
       
  2767       sqlite3pager_unref(PGHDR_TO_DATA(pPg));
       
  2768       rc = pPager->errCode;
       
  2769       return rc;
       
  2770     }
       
  2771 
       
  2772     /* Populate the page with data, either by reading from the database
       
  2773     ** file, or by setting the entire page to zero.
       
  2774     */
       
  2775     if( sqlite3pager_pagecount(pPager)<(int)pgno || MEMDB ){
       
  2776       memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
       
  2777     }else{
       
  2778       assert( MEMDB==0 );
       
  2779       rc = sqlite3OsSeek(pPager->fd, (pgno-1)*(i64)pPager->pageSize);
       
  2780       if( rc==SQLITE_OK ){
       
  2781         rc = sqlite3OsRead(pPager->fd, PGHDR_TO_DATA(pPg),
       
  2782                               pPager->pageSize);
       
  2783       }
       
  2784       TRACE3("FETCH %d page %d\n", PAGERID(pPager), pPg->pgno);
       
  2785       CODEC1(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
       
  2786       if( rc!=SQLITE_OK ){
       
  2787         i64 fileSize;
       
  2788         int rc2 = sqlite3OsFileSize(pPager->fd, &fileSize);
       
  2789         if( rc2!=SQLITE_OK || fileSize>=pgno*pPager->pageSize ){
       
  2790 	  /* An IO error occured in one of the the sqlite3OsSeek() or
       
  2791           ** sqlite3OsRead() calls above. */
       
  2792           pPg->pgno = 0;
       
  2793           sqlite3pager_unref(PGHDR_TO_DATA(pPg));
       
  2794           return rc;
       
  2795         }else{
       
  2796           clear_simulated_io_error();
       
  2797           memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
       
  2798         }
       
  2799       }else{
       
  2800         TEST_INCR(pPager->nRead);
       
  2801       }
       
  2802     }
       
  2803 
       
  2804     /* Link the page into the page hash table */
       
  2805     h = pgno & (pPager->nHash-1);
       
  2806     assert( pgno!=0 );
       
  2807     pPg->pNextHash = pPager->aHash[h];
       
  2808     pPager->aHash[h] = pPg;
       
  2809     if( pPg->pNextHash ){
       
  2810       assert( pPg->pNextHash->pPrevHash==0 );
       
  2811       pPg->pNextHash->pPrevHash = pPg;
       
  2812     }
       
  2813 
       
  2814 #ifdef SQLITE_CHECK_PAGES
       
  2815     pPg->pageHash = pager_pagehash(pPg);
       
  2816 #endif
       
  2817   }else{
       
  2818     /* The requested page is in the page cache. */
       
  2819     TEST_INCR(pPager->nHit);
       
  2820     page_ref(pPg);
       
  2821   }
       
  2822   *ppPage = PGHDR_TO_DATA(pPg);
       
  2823   return SQLITE_OK;
       
  2824 }
       
  2825 
       
  2826 /*
       
  2827 ** Acquire a page if it is already in the in-memory cache.  Do
       
  2828 ** not read the page from disk.  Return a pointer to the page,
       
  2829 ** or 0 if the page is not in cache.
       
  2830 **
       
  2831 ** See also sqlite3pager_get().  The difference between this routine
       
  2832 ** and sqlite3pager_get() is that _get() will go to the disk and read
       
  2833 ** in the page if the page is not already in cache.  This routine
       
  2834 ** returns NULL if the page is not in cache or if a disk I/O error 
       
  2835 ** has ever happened.
       
  2836 */
       
  2837 void *sqlite3pager_lookup(Pager *pPager, Pgno pgno){
       
  2838   PgHdr *pPg;
       
  2839 
       
  2840   assert( pPager!=0 );
       
  2841   assert( pgno!=0 );
       
  2842   if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
       
  2843     return 0;
       
  2844   }
       
  2845   pPg = pager_lookup(pPager, pgno);
       
  2846   if( pPg==0 ) return 0;
       
  2847   page_ref(pPg);
       
  2848   return PGHDR_TO_DATA(pPg);
       
  2849 }
       
  2850 
       
  2851 /*
       
  2852 ** Release a page.
       
  2853 **
       
  2854 ** If the number of references to the page drop to zero, then the
       
  2855 ** page is added to the LRU list.  When all references to all pages
       
  2856 ** are released, a rollback occurs and the lock on the database is
       
  2857 ** removed.
       
  2858 */
       
  2859 int sqlite3pager_unref(void *pData){
       
  2860   PgHdr *pPg;
       
  2861 
       
  2862   /* Decrement the reference count for this page
       
  2863   */
       
  2864   pPg = DATA_TO_PGHDR(pData);
       
  2865   assert( pPg->nRef>0 );
       
  2866   pPg->nRef--;
       
  2867   REFINFO(pPg);
       
  2868 
       
  2869   CHECK_PAGE(pPg);
       
  2870 
       
  2871   /* When the number of references to a page reach 0, call the
       
  2872   ** destructor and add the page to the freelist.
       
  2873   */
       
  2874   if( pPg->nRef==0 ){
       
  2875     Pager *pPager;
       
  2876     pPager = pPg->pPager;
       
  2877     pPg->pNextFree = 0;
       
  2878     pPg->pPrevFree = pPager->pLast;
       
  2879     pPager->pLast = pPg;
       
  2880     if( pPg->pPrevFree ){
       
  2881       pPg->pPrevFree->pNextFree = pPg;
       
  2882     }else{
       
  2883       pPager->pFirst = pPg;
       
  2884     }
       
  2885     if( pPg->needSync==0 && pPager->pFirstSynced==0 ){
       
  2886       pPager->pFirstSynced = pPg;
       
  2887     }
       
  2888     if( pPager->xDestructor ){
       
  2889       pPager->xDestructor(pData, pPager->pageSize);
       
  2890     }
       
  2891   
       
  2892     /* When all pages reach the freelist, drop the read lock from
       
  2893     ** the database file.
       
  2894     */
       
  2895     pPager->nRef--;
       
  2896     assert( pPager->nRef>=0 );
       
  2897     if( pPager->nRef==0 && !MEMDB ){
       
  2898       pager_reset(pPager);
       
  2899     }
       
  2900   }
       
  2901   return SQLITE_OK;
       
  2902 }
       
  2903 
       
  2904 /*
       
  2905 ** Create a journal file for pPager.  There should already be a RESERVED
       
  2906 ** or EXCLUSIVE lock on the database file when this routine is called.
       
  2907 **
       
  2908 ** Return SQLITE_OK if everything.  Return an error code and release the
       
  2909 ** write lock if anything goes wrong.
       
  2910 */
       
  2911 static int pager_open_journal(Pager *pPager){
       
  2912   int rc;
       
  2913   assert( !MEMDB );
       
  2914   assert( pPager->state>=PAGER_RESERVED );
       
  2915   assert( pPager->journalOpen==0 );
       
  2916   assert( pPager->useJournal );
       
  2917   assert( pPager->aInJournal==0 );
       
  2918   sqlite3pager_pagecount(pPager);
       
  2919   pPager->aInJournal = sqliteMalloc( pPager->dbSize/8 + 1 );
       
  2920   if( pPager->aInJournal==0 ){
       
  2921     rc = SQLITE_NOMEM;
       
  2922     goto failed_to_open_journal;
       
  2923   }
       
  2924   rc = sqlite3OsOpenExclusive(pPager->zJournal, &pPager->jfd,
       
  2925                                  pPager->tempFile);
       
  2926   pPager->journalOff = 0;
       
  2927   pPager->setMaster = 0;
       
  2928   pPager->journalHdr = 0;
       
  2929   if( rc!=SQLITE_OK ){
       
  2930     goto failed_to_open_journal;
       
  2931   }
       
  2932   sqlite3OsSetFullSync(pPager->jfd, pPager->full_fsync);
       
  2933   sqlite3OsSetFullSync(pPager->fd, pPager->full_fsync);
       
  2934   sqlite3OsOpenDirectory(pPager->jfd, pPager->zDirectory);
       
  2935   pPager->journalOpen = 1;
       
  2936   pPager->journalStarted = 0;
       
  2937   pPager->needSync = 0;
       
  2938   pPager->alwaysRollback = 0;
       
  2939   pPager->nRec = 0;
       
  2940   if( pPager->errCode ){
       
  2941     rc = pPager->errCode;
       
  2942     goto failed_to_open_journal;
       
  2943   }
       
  2944   pPager->origDbSize = pPager->dbSize;
       
  2945 
       
  2946   rc = writeJournalHdr(pPager);
       
  2947 
       
  2948   if( pPager->stmtAutoopen && rc==SQLITE_OK ){
       
  2949     rc = sqlite3pager_stmt_begin(pPager);
       
  2950   }
       
  2951   if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
       
  2952     rc = pager_unwritelock(pPager);
       
  2953     if( rc==SQLITE_OK ){
       
  2954       rc = SQLITE_FULL;
       
  2955     }
       
  2956   }
       
  2957   return rc;
       
  2958 
       
  2959 failed_to_open_journal:
       
  2960   sqliteFree(pPager->aInJournal);
       
  2961   pPager->aInJournal = 0;
       
  2962   if( rc==SQLITE_NOMEM ){
       
  2963     /* If this was a malloc() failure, then we will not be closing the pager
       
  2964     ** file. So delete any journal file we may have just created. Otherwise,
       
  2965     ** the system will get confused, we have a read-lock on the file and a
       
  2966     ** mysterious journal has appeared in the filesystem.
       
  2967     */
       
  2968     sqlite3OsDelete(pPager->zJournal);
       
  2969   }else{
       
  2970     sqlite3OsUnlock(pPager->fd, NO_LOCK);
       
  2971     pPager->state = PAGER_UNLOCK;
       
  2972   }
       
  2973   return rc;
       
  2974 }
       
  2975 
       
  2976 /*
       
  2977 ** Acquire a write-lock on the database.  The lock is removed when
       
  2978 ** the any of the following happen:
       
  2979 **
       
  2980 **   *  sqlite3pager_commit() is called.
       
  2981 **   *  sqlite3pager_rollback() is called.
       
  2982 **   *  sqlite3pager_close() is called.
       
  2983 **   *  sqlite3pager_unref() is called to on every outstanding page.
       
  2984 **
       
  2985 ** The first parameter to this routine is a pointer to any open page of the
       
  2986 ** database file.  Nothing changes about the page - it is used merely to
       
  2987 ** acquire a pointer to the Pager structure and as proof that there is
       
  2988 ** already a read-lock on the database.
       
  2989 **
       
  2990 ** The second parameter indicates how much space in bytes to reserve for a
       
  2991 ** master journal file-name at the start of the journal when it is created.
       
  2992 **
       
  2993 ** A journal file is opened if this is not a temporary file.  For temporary
       
  2994 ** files, the opening of the journal file is deferred until there is an
       
  2995 ** actual need to write to the journal.
       
  2996 **
       
  2997 ** If the database is already reserved for writing, this routine is a no-op.
       
  2998 **
       
  2999 ** If exFlag is true, go ahead and get an EXCLUSIVE lock on the file
       
  3000 ** immediately instead of waiting until we try to flush the cache.  The
       
  3001 ** exFlag is ignored if a transaction is already active.
       
  3002 */
       
  3003 int sqlite3pager_begin(void *pData, int exFlag){
       
  3004   PgHdr *pPg = DATA_TO_PGHDR(pData);
       
  3005   Pager *pPager = pPg->pPager;
       
  3006   int rc = SQLITE_OK;
       
  3007   assert( pPg->nRef>0 );
       
  3008   assert( pPager->state!=PAGER_UNLOCK );
       
  3009   if( pPager->state==PAGER_SHARED ){
       
  3010     assert( pPager->aInJournal==0 );
       
  3011     if( MEMDB ){
       
  3012       pPager->state = PAGER_EXCLUSIVE;
       
  3013       pPager->origDbSize = pPager->dbSize;
       
  3014     }else{
       
  3015       rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
       
  3016       if( rc==SQLITE_OK ){
       
  3017         pPager->state = PAGER_RESERVED;
       
  3018         if( exFlag ){
       
  3019           rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
       
  3020         }
       
  3021       }
       
  3022       if( rc!=SQLITE_OK ){
       
  3023         return rc;
       
  3024       }
       
  3025       pPager->dirtyCache = 0;
       
  3026       TRACE2("TRANSACTION %d\n", PAGERID(pPager));
       
  3027       if( pPager->useJournal && !pPager->tempFile ){
       
  3028         rc = pager_open_journal(pPager);
       
  3029       }
       
  3030     }
       
  3031   }
       
  3032   return rc;
       
  3033 }
       
  3034 
       
  3035 /*
       
  3036 ** Make a page dirty.  Set its dirty flag and add it to the dirty
       
  3037 ** page list.
       
  3038 */
       
  3039 static void makeDirty(PgHdr *pPg){
       
  3040   if( pPg->dirty==0 ){
       
  3041     Pager *pPager = pPg->pPager;
       
  3042     pPg->dirty = 1;
       
  3043     pPg->pDirty = pPager->pDirty;
       
  3044     if( pPager->pDirty ){
       
  3045       pPager->pDirty->pPrevDirty = pPg;
       
  3046     }
       
  3047     pPg->pPrevDirty = 0;
       
  3048     pPager->pDirty = pPg;
       
  3049   }
       
  3050 }
       
  3051 
       
  3052 /*
       
  3053 ** Make a page clean.  Clear its dirty bit and remove it from the
       
  3054 ** dirty page list.
       
  3055 */
       
  3056 static void makeClean(PgHdr *pPg){
       
  3057   if( pPg->dirty ){
       
  3058     pPg->dirty = 0;
       
  3059     if( pPg->pDirty ){
       
  3060       pPg->pDirty->pPrevDirty = pPg->pPrevDirty;
       
  3061     }
       
  3062     if( pPg->pPrevDirty ){
       
  3063       pPg->pPrevDirty->pDirty = pPg->pDirty;
       
  3064     }else{
       
  3065       pPg->pPager->pDirty = pPg->pDirty;
       
  3066     }
       
  3067   }
       
  3068 }
       
  3069 
       
  3070 
       
  3071 /*
       
  3072 ** Mark a data page as writeable.  The page is written into the journal 
       
  3073 ** if it is not there already.  This routine must be called before making
       
  3074 ** changes to a page.
       
  3075 **
       
  3076 ** The first time this routine is called, the pager creates a new
       
  3077 ** journal and acquires a RESERVED lock on the database.  If the RESERVED
       
  3078 ** lock could not be acquired, this routine returns SQLITE_BUSY.  The
       
  3079 ** calling routine must check for that return value and be careful not to
       
  3080 ** change any page data until this routine returns SQLITE_OK.
       
  3081 **
       
  3082 ** If the journal file could not be written because the disk is full,
       
  3083 ** then this routine returns SQLITE_FULL and does an immediate rollback.
       
  3084 ** All subsequent write attempts also return SQLITE_FULL until there
       
  3085 ** is a call to sqlite3pager_commit() or sqlite3pager_rollback() to
       
  3086 ** reset.
       
  3087 */
       
  3088 int sqlite3pager_write(void *pData){
       
  3089   PgHdr *pPg = DATA_TO_PGHDR(pData);
       
  3090   Pager *pPager = pPg->pPager;
       
  3091   int rc = SQLITE_OK;
       
  3092 
       
  3093   /* Check for errors
       
  3094   */
       
  3095   if( pPager->errCode ){ 
       
  3096     return pPager->errCode;
       
  3097   }
       
  3098   if( pPager->readOnly ){
       
  3099     return SQLITE_PERM;
       
  3100   }
       
  3101 
       
  3102   assert( !pPager->setMaster );
       
  3103 
       
  3104   CHECK_PAGE(pPg);
       
  3105 
       
  3106   /* Mark the page as dirty.  If the page has already been written
       
  3107   ** to the journal then we can return right away.
       
  3108   */
       
  3109   makeDirty(pPg);
       
  3110   if( pPg->inJournal && (pPg->inStmt || pPager->stmtInUse==0) ){
       
  3111     pPager->dirtyCache = 1;
       
  3112   }else{
       
  3113 
       
  3114     /* If we get this far, it means that the page needs to be
       
  3115     ** written to the transaction journal or the ckeckpoint journal
       
  3116     ** or both.
       
  3117     **
       
  3118     ** First check to see that the transaction journal exists and
       
  3119     ** create it if it does not.
       
  3120     */
       
  3121     assert( pPager->state!=PAGER_UNLOCK );
       
  3122     rc = sqlite3pager_begin(pData, 0);
       
  3123     if( rc!=SQLITE_OK ){
       
  3124       return rc;
       
  3125     }
       
  3126     assert( pPager->state>=PAGER_RESERVED );
       
  3127     if( !pPager->journalOpen && pPager->useJournal ){
       
  3128       rc = pager_open_journal(pPager);
       
  3129       if( rc!=SQLITE_OK ) return rc;
       
  3130     }
       
  3131     assert( pPager->journalOpen || !pPager->useJournal );
       
  3132     pPager->dirtyCache = 1;
       
  3133   
       
  3134     /* The transaction journal now exists and we have a RESERVED or an
       
  3135     ** EXCLUSIVE lock on the main database file.  Write the current page to
       
  3136     ** the transaction journal if it is not there already.
       
  3137     */
       
  3138     if( !pPg->inJournal && (pPager->useJournal || MEMDB) ){
       
  3139       if( (int)pPg->pgno <= pPager->origDbSize ){
       
  3140         int szPg;
       
  3141         if( MEMDB ){
       
  3142           PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
       
  3143           TRACE3("JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
       
  3144           assert( pHist->pOrig==0 );
       
  3145           pHist->pOrig = sqliteMallocRaw( pPager->pageSize );
       
  3146           if( pHist->pOrig ){
       
  3147             memcpy(pHist->pOrig, PGHDR_TO_DATA(pPg), pPager->pageSize);
       
  3148           }
       
  3149         }else{
       
  3150           u32 cksum, saved;
       
  3151           char *pData2, *pEnd;
       
  3152           /* We should never write to the journal file the page that
       
  3153           ** contains the database locks.  The following assert verifies
       
  3154           ** that we do not. */
       
  3155           assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
       
  3156           pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
       
  3157           cksum = pager_cksum(pPager, (u8*)pData2);
       
  3158           pEnd = pData2 + pPager->pageSize;
       
  3159           pData2 -= 4;
       
  3160           saved = *(u32*)pEnd;
       
  3161           put32bits(pEnd, cksum);
       
  3162           szPg = pPager->pageSize+8;
       
  3163           put32bits(pData2, pPg->pgno);
       
  3164           rc = sqlite3OsWrite(pPager->jfd, pData2, szPg);
       
  3165           pPager->journalOff += szPg;
       
  3166           TRACE4("JOURNAL %d page %d needSync=%d\n",
       
  3167                   PAGERID(pPager), pPg->pgno, pPg->needSync);
       
  3168           *(u32*)pEnd = saved;
       
  3169 
       
  3170 	  /* An error has occured writing to the journal file. The 
       
  3171           ** transaction will be rolled back by the layer above.
       
  3172           */
       
  3173           if( rc!=SQLITE_OK ){
       
  3174             return rc;
       
  3175           }
       
  3176 
       
  3177           pPager->nRec++;
       
  3178           assert( pPager->aInJournal!=0 );
       
  3179           pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
       
  3180           pPg->needSync = !pPager->noSync;
       
  3181           if( pPager->stmtInUse ){
       
  3182             pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
       
  3183             page_add_to_stmt_list(pPg);
       
  3184           }
       
  3185         }
       
  3186       }else{
       
  3187         pPg->needSync = !pPager->journalStarted && !pPager->noSync;
       
  3188         TRACE4("APPEND %d page %d needSync=%d\n",
       
  3189                 PAGERID(pPager), pPg->pgno, pPg->needSync);
       
  3190       }
       
  3191       if( pPg->needSync ){
       
  3192         pPager->needSync = 1;
       
  3193       }
       
  3194       pPg->inJournal = 1;
       
  3195     }
       
  3196   
       
  3197     /* If the statement journal is open and the page is not in it,
       
  3198     ** then write the current page to the statement journal.  Note that
       
  3199     ** the statement journal format differs from the standard journal format
       
  3200     ** in that it omits the checksums and the header.
       
  3201     */
       
  3202     if( pPager->stmtInUse && !pPg->inStmt && (int)pPg->pgno<=pPager->stmtSize ){
       
  3203       assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
       
  3204       if( MEMDB ){
       
  3205         PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
       
  3206         assert( pHist->pStmt==0 );
       
  3207         pHist->pStmt = sqliteMallocRaw( pPager->pageSize );
       
  3208         if( pHist->pStmt ){
       
  3209           memcpy(pHist->pStmt, PGHDR_TO_DATA(pPg), pPager->pageSize);
       
  3210         }
       
  3211         TRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
       
  3212       }else{
       
  3213         char *pData2 = CODEC2(pPager, pData, pPg->pgno, 7)-4;
       
  3214         put32bits(pData2, pPg->pgno);
       
  3215         rc = sqlite3OsWrite(pPager->stfd, pData2, pPager->pageSize+4);
       
  3216         TRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
       
  3217         if( rc!=SQLITE_OK ){
       
  3218           return rc;
       
  3219         }
       
  3220         pPager->stmtNRec++;
       
  3221         assert( pPager->aInStmt!=0 );
       
  3222         pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
       
  3223       }
       
  3224       page_add_to_stmt_list(pPg);
       
  3225     }
       
  3226   }
       
  3227 
       
  3228   /* Update the database size and return.
       
  3229   */
       
  3230   if( pPager->dbSize<(int)pPg->pgno ){
       
  3231     pPager->dbSize = pPg->pgno;
       
  3232     if( !MEMDB && pPager->dbSize==PENDING_BYTE/pPager->pageSize ){
       
  3233       pPager->dbSize++;
       
  3234     }
       
  3235   }
       
  3236   return rc;
       
  3237 }
       
  3238 
       
  3239 /*
       
  3240 ** Return TRUE if the page given in the argument was previously passed
       
  3241 ** to sqlite3pager_write().  In other words, return TRUE if it is ok
       
  3242 ** to change the content of the page.
       
  3243 */
       
  3244 #ifndef NDEBUG
       
  3245 int sqlite3pager_iswriteable(void *pData){
       
  3246   PgHdr *pPg = DATA_TO_PGHDR(pData);
       
  3247   return pPg->dirty;
       
  3248 }
       
  3249 #endif
       
  3250 
       
  3251 #ifndef SQLITE_OMIT_VACUUM
       
  3252 /*
       
  3253 ** Replace the content of a single page with the information in the third
       
  3254 ** argument.
       
  3255 */
       
  3256 int sqlite3pager_overwrite(Pager *pPager, Pgno pgno, void *pData){
       
  3257   void *pPage;
       
  3258   int rc;
       
  3259 
       
  3260   rc = sqlite3pager_get(pPager, pgno, &pPage);
       
  3261   if( rc==SQLITE_OK ){
       
  3262     rc = sqlite3pager_write(pPage);
       
  3263     if( rc==SQLITE_OK ){
       
  3264       memcpy(pPage, pData, pPager->pageSize);
       
  3265     }
       
  3266     sqlite3pager_unref(pPage);
       
  3267   }
       
  3268   return rc;
       
  3269 }
       
  3270 #endif
       
  3271 
       
  3272 /*
       
  3273 ** A call to this routine tells the pager that it is not necessary to
       
  3274 ** write the information on page "pgno" back to the disk, even though
       
  3275 ** that page might be marked as dirty.
       
  3276 **
       
  3277 ** The overlying software layer calls this routine when all of the data
       
  3278 ** on the given page is unused.  The pager marks the page as clean so
       
  3279 ** that it does not get written to disk.
       
  3280 **
       
  3281 ** Tests show that this optimization, together with the
       
  3282 ** sqlite3pager_dont_rollback() below, more than double the speed
       
  3283 ** of large INSERT operations and quadruple the speed of large DELETEs.
       
  3284 **
       
  3285 ** When this routine is called, set the alwaysRollback flag to true.
       
  3286 ** Subsequent calls to sqlite3pager_dont_rollback() for the same page
       
  3287 ** will thereafter be ignored.  This is necessary to avoid a problem
       
  3288 ** where a page with data is added to the freelist during one part of
       
  3289 ** a transaction then removed from the freelist during a later part
       
  3290 ** of the same transaction and reused for some other purpose.  When it
       
  3291 ** is first added to the freelist, this routine is called.  When reused,
       
  3292 ** the dont_rollback() routine is called.  But because the page contains
       
  3293 ** critical data, we still need to be sure it gets rolled back in spite
       
  3294 ** of the dont_rollback() call.
       
  3295 */
       
  3296 void sqlite3pager_dont_write(Pager *pPager, Pgno pgno){
       
  3297   PgHdr *pPg;
       
  3298 
       
  3299   if( MEMDB ) return;
       
  3300 
       
  3301   pPg = pager_lookup(pPager, pgno);
       
  3302   assert( pPg!=0 );  /* We never call _dont_write unless the page is in mem */
       
  3303   pPg->alwaysRollback = 1;
       
  3304   if( pPg->dirty && !pPager->stmtInUse ){
       
  3305     if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
       
  3306       /* If this pages is the last page in the file and the file has grown
       
  3307       ** during the current transaction, then do NOT mark the page as clean.
       
  3308       ** When the database file grows, we must make sure that the last page
       
  3309       ** gets written at least once so that the disk file will be the correct
       
  3310       ** size. If you do not write this page and the size of the file
       
  3311       ** on the disk ends up being too small, that can lead to database
       
  3312       ** corruption during the next transaction.
       
  3313       */
       
  3314     }else{
       
  3315       TRACE3("DONT_WRITE page %d of %d\n", pgno, PAGERID(pPager));
       
  3316       makeClean(pPg);
       
  3317 #ifdef SQLITE_CHECK_PAGES
       
  3318       pPg->pageHash = pager_pagehash(pPg);
       
  3319 #endif
       
  3320     }
       
  3321   }
       
  3322 }
       
  3323 
       
  3324 /*
       
  3325 ** A call to this routine tells the pager that if a rollback occurs,
       
  3326 ** it is not necessary to restore the data on the given page.  This
       
  3327 ** means that the pager does not have to record the given page in the
       
  3328 ** rollback journal.
       
  3329 */
       
  3330 void sqlite3pager_dont_rollback(void *pData){
       
  3331   PgHdr *pPg = DATA_TO_PGHDR(pData);
       
  3332   Pager *pPager = pPg->pPager;
       
  3333 
       
  3334   if( pPager->state!=PAGER_EXCLUSIVE || pPager->journalOpen==0 ) return;
       
  3335   if( pPg->alwaysRollback || pPager->alwaysRollback || MEMDB ) return;
       
  3336   if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
       
  3337     assert( pPager->aInJournal!=0 );
       
  3338     pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
       
  3339     pPg->inJournal = 1;
       
  3340     if( pPager->stmtInUse ){
       
  3341       pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
       
  3342       page_add_to_stmt_list(pPg);
       
  3343     }
       
  3344     TRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, PAGERID(pPager));
       
  3345   }
       
  3346   if( pPager->stmtInUse && !pPg->inStmt && (int)pPg->pgno<=pPager->stmtSize ){
       
  3347     assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
       
  3348     assert( pPager->aInStmt!=0 );
       
  3349     pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
       
  3350     page_add_to_stmt_list(pPg);
       
  3351   }
       
  3352 }
       
  3353 
       
  3354 
       
  3355 /*
       
  3356 ** Commit all changes to the database and release the write lock.
       
  3357 **
       
  3358 ** If the commit fails for any reason, a rollback attempt is made
       
  3359 ** and an error code is returned.  If the commit worked, SQLITE_OK
       
  3360 ** is returned.
       
  3361 */
       
  3362 int sqlite3pager_commit(Pager *pPager){
       
  3363   int rc;
       
  3364   PgHdr *pPg;
       
  3365 
       
  3366   if( pPager->errCode ){
       
  3367     return pPager->errCode;
       
  3368   }
       
  3369   if( pPager->state<PAGER_RESERVED ){
       
  3370     return SQLITE_ERROR;
       
  3371   }
       
  3372   TRACE2("COMMIT %d\n", PAGERID(pPager));
       
  3373   if( MEMDB ){
       
  3374     pPg = pager_get_all_dirty_pages(pPager);
       
  3375     while( pPg ){
       
  3376       clearHistory(PGHDR_TO_HIST(pPg, pPager));
       
  3377       pPg->dirty = 0;
       
  3378       pPg->inJournal = 0;
       
  3379       pPg->inStmt = 0;
       
  3380       pPg->needSync = 0;
       
  3381       pPg->pPrevStmt = pPg->pNextStmt = 0;
       
  3382       pPg = pPg->pDirty;
       
  3383     }
       
  3384     pPager->pDirty = 0;
       
  3385 #ifndef NDEBUG
       
  3386     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
       
  3387       PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
       
  3388       assert( !pPg->alwaysRollback );
       
  3389       assert( !pHist->pOrig );
       
  3390       assert( !pHist->pStmt );
       
  3391     }
       
  3392 #endif
       
  3393     pPager->pStmt = 0;
       
  3394     pPager->state = PAGER_SHARED;
       
  3395     return SQLITE_OK;
       
  3396   }
       
  3397   if( pPager->dirtyCache==0 ){
       
  3398     /* Exit early (without doing the time-consuming sqlite3OsSync() calls)
       
  3399     ** if there have been no changes to the database file. */
       
  3400     assert( pPager->needSync==0 );
       
  3401     rc = pager_unwritelock(pPager);
       
  3402     pPager->dbSize = -1;
       
  3403     return rc;
       
  3404   }
       
  3405   assert( pPager->journalOpen );
       
  3406   rc = sqlite3pager_sync(pPager, 0, 0);
       
  3407   if( rc==SQLITE_OK ){
       
  3408     rc = pager_unwritelock(pPager);
       
  3409     pPager->dbSize = -1;
       
  3410   }
       
  3411   return rc;
       
  3412 }
       
  3413 
       
  3414 /*
       
  3415 ** Rollback all changes.  The database falls back to PAGER_SHARED mode.
       
  3416 ** All in-memory cache pages revert to their original data contents.
       
  3417 ** The journal is deleted.
       
  3418 **
       
  3419 ** This routine cannot fail unless some other process is not following
       
  3420 ** the correct locking protocol (SQLITE_PROTOCOL) or unless some other
       
  3421 ** process is writing trash into the journal file (SQLITE_CORRUPT) or
       
  3422 ** unless a prior malloc() failed (SQLITE_NOMEM).  Appropriate error
       
  3423 ** codes are returned for all these occasions.  Otherwise,
       
  3424 ** SQLITE_OK is returned.
       
  3425 */
       
  3426 int sqlite3pager_rollback(Pager *pPager){
       
  3427   int rc;
       
  3428   TRACE2("ROLLBACK %d\n", PAGERID(pPager));
       
  3429   if( MEMDB ){
       
  3430     PgHdr *p;
       
  3431     for(p=pPager->pAll; p; p=p->pNextAll){
       
  3432       PgHistory *pHist;
       
  3433       assert( !p->alwaysRollback );
       
  3434       if( !p->dirty ){
       
  3435         assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pOrig );
       
  3436         assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pStmt );
       
  3437         continue;
       
  3438       }
       
  3439 
       
  3440       pHist = PGHDR_TO_HIST(p, pPager);
       
  3441       if( pHist->pOrig ){
       
  3442         memcpy(PGHDR_TO_DATA(p), pHist->pOrig, pPager->pageSize);
       
  3443         TRACE3("ROLLBACK-PAGE %d of %d\n", p->pgno, PAGERID(pPager));
       
  3444       }else{
       
  3445         TRACE3("PAGE %d is clean on %d\n", p->pgno, PAGERID(pPager));
       
  3446       }
       
  3447       clearHistory(pHist);
       
  3448       p->dirty = 0;
       
  3449       p->inJournal = 0;
       
  3450       p->inStmt = 0;
       
  3451       p->pPrevStmt = p->pNextStmt = 0;
       
  3452       if( pPager->xReiniter ){
       
  3453         pPager->xReiniter(PGHDR_TO_DATA(p), pPager->pageSize);
       
  3454       }
       
  3455     }
       
  3456     pPager->pDirty = 0;
       
  3457     pPager->pStmt = 0;
       
  3458     pPager->dbSize = pPager->origDbSize;
       
  3459     memoryTruncate(pPager);
       
  3460     pPager->stmtInUse = 0;
       
  3461     pPager->state = PAGER_SHARED;
       
  3462     return SQLITE_OK;
       
  3463   }
       
  3464 
       
  3465   if( !pPager->dirtyCache || !pPager->journalOpen ){
       
  3466     rc = pager_unwritelock(pPager);
       
  3467     pPager->dbSize = -1;
       
  3468     return rc;
       
  3469   }
       
  3470 
       
  3471   if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
       
  3472     if( pPager->state>=PAGER_EXCLUSIVE ){
       
  3473       pager_playback(pPager);
       
  3474     }
       
  3475     return pPager->errCode;
       
  3476   }
       
  3477   if( pPager->state==PAGER_RESERVED ){
       
  3478     int rc2;
       
  3479     rc = pager_reload_cache(pPager);
       
  3480     rc2 = pager_unwritelock(pPager);
       
  3481     if( rc==SQLITE_OK ){
       
  3482       rc = rc2;
       
  3483     }
       
  3484   }else{
       
  3485     rc = pager_playback(pPager);
       
  3486   }
       
  3487   pPager->dbSize = -1;
       
  3488 
       
  3489   /* If an error occurs during a ROLLBACK, we can no longer trust the pager
       
  3490   ** cache. So call pager_error() on the way out to make any error 
       
  3491   ** persistent.
       
  3492   */
       
  3493   return pager_error(pPager, rc);
       
  3494 }
       
  3495 
       
  3496 /*
       
  3497 ** Return TRUE if the database file is opened read-only.  Return FALSE
       
  3498 ** if the database is (in theory) writable.
       
  3499 */
       
  3500 int sqlite3pager_isreadonly(Pager *pPager){
       
  3501   return pPager->readOnly;
       
  3502 }
       
  3503 
       
  3504 /*
       
  3505 ** Return the number of references to the pager.
       
  3506 */
       
  3507 int sqlite3pager_refcount(Pager *pPager){
       
  3508   return pPager->nRef;
       
  3509 }
       
  3510 
       
  3511 #ifdef SQLITE_TEST
       
  3512 /*
       
  3513 ** This routine is used for testing and analysis only.
       
  3514 */
       
  3515 int *sqlite3pager_stats(Pager *pPager){
       
  3516   static int a[11];
       
  3517   a[0] = pPager->nRef;
       
  3518   a[1] = pPager->nPage;
       
  3519   a[2] = pPager->mxPage;
       
  3520   a[3] = pPager->dbSize;
       
  3521   a[4] = pPager->state;
       
  3522   a[5] = pPager->errCode;
       
  3523   a[6] = pPager->nHit;
       
  3524   a[7] = pPager->nMiss;
       
  3525   a[8] = pPager->nOvfl;
       
  3526   a[9] = pPager->nRead;
       
  3527   a[10] = pPager->nWrite;
       
  3528   return a;
       
  3529 }
       
  3530 #endif
       
  3531 
       
  3532 /*
       
  3533 ** Set the statement rollback point.
       
  3534 **
       
  3535 ** This routine should be called with the transaction journal already
       
  3536 ** open.  A new statement journal is created that can be used to rollback
       
  3537 ** changes of a single SQL command within a larger transaction.
       
  3538 */
       
  3539 int sqlite3pager_stmt_begin(Pager *pPager){
       
  3540   int rc;
       
  3541   char zTemp[SQLITE_TEMPNAME_SIZE];
       
  3542   assert( !pPager->stmtInUse );
       
  3543   assert( pPager->dbSize>=0 );
       
  3544   TRACE2("STMT-BEGIN %d\n", PAGERID(pPager));
       
  3545   if( MEMDB ){
       
  3546     pPager->stmtInUse = 1;
       
  3547     pPager->stmtSize = pPager->dbSize;
       
  3548     return SQLITE_OK;
       
  3549   }
       
  3550   if( !pPager->journalOpen ){
       
  3551     pPager->stmtAutoopen = 1;
       
  3552     return SQLITE_OK;
       
  3553   }
       
  3554   assert( pPager->journalOpen );
       
  3555   pPager->aInStmt = sqliteMalloc( pPager->dbSize/8 + 1 );
       
  3556   if( pPager->aInStmt==0 ){
       
  3557     /* sqlite3OsLock(pPager->fd, SHARED_LOCK); */
       
  3558     return SQLITE_NOMEM;
       
  3559   }
       
  3560 #ifndef NDEBUG
       
  3561   rc = sqlite3OsFileSize(pPager->jfd, &pPager->stmtJSize);
       
  3562   if( rc ) goto stmt_begin_failed;
       
  3563   assert( pPager->stmtJSize == pPager->journalOff );
       
  3564 #endif
       
  3565   pPager->stmtJSize = pPager->journalOff;
       
  3566   pPager->stmtSize = pPager->dbSize;
       
  3567   pPager->stmtHdrOff = 0;
       
  3568   pPager->stmtCksum = pPager->cksumInit;
       
  3569   if( !pPager->stmtOpen ){
       
  3570     rc = sqlite3pager_opentemp(zTemp, &pPager->stfd);
       
  3571     if( rc ) goto stmt_begin_failed;
       
  3572     pPager->stmtOpen = 1;
       
  3573     pPager->stmtNRec = 0;
       
  3574   }
       
  3575   pPager->stmtInUse = 1;
       
  3576   return SQLITE_OK;
       
  3577  
       
  3578 stmt_begin_failed:
       
  3579   if( pPager->aInStmt ){
       
  3580     sqliteFree(pPager->aInStmt);
       
  3581     pPager->aInStmt = 0;
       
  3582   }
       
  3583   return rc;
       
  3584 }
       
  3585 
       
  3586 /*
       
  3587 ** Commit a statement.
       
  3588 */
       
  3589 int sqlite3pager_stmt_commit(Pager *pPager){
       
  3590   if( pPager->stmtInUse ){
       
  3591     PgHdr *pPg, *pNext;
       
  3592     TRACE2("STMT-COMMIT %d\n", PAGERID(pPager));
       
  3593     if( !MEMDB ){
       
  3594       sqlite3OsSeek(pPager->stfd, 0);
       
  3595       /* sqlite3OsTruncate(pPager->stfd, 0); */
       
  3596       sqliteFree( pPager->aInStmt );
       
  3597       pPager->aInStmt = 0;
       
  3598     }
       
  3599     for(pPg=pPager->pStmt; pPg; pPg=pNext){
       
  3600       pNext = pPg->pNextStmt;
       
  3601       assert( pPg->inStmt );
       
  3602       pPg->inStmt = 0;
       
  3603       pPg->pPrevStmt = pPg->pNextStmt = 0;
       
  3604       if( MEMDB ){
       
  3605         PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
       
  3606         sqliteFree(pHist->pStmt);
       
  3607         pHist->pStmt = 0;
       
  3608       }
       
  3609     }
       
  3610     pPager->stmtNRec = 0;
       
  3611     pPager->stmtInUse = 0;
       
  3612     pPager->pStmt = 0;
       
  3613   }
       
  3614   pPager->stmtAutoopen = 0;
       
  3615   return SQLITE_OK;
       
  3616 }
       
  3617 
       
  3618 /*
       
  3619 ** Rollback a statement.
       
  3620 */
       
  3621 int sqlite3pager_stmt_rollback(Pager *pPager){
       
  3622   int rc;
       
  3623   if( pPager->stmtInUse ){
       
  3624     TRACE2("STMT-ROLLBACK %d\n", PAGERID(pPager));
       
  3625     if( MEMDB ){
       
  3626       PgHdr *pPg;
       
  3627       for(pPg=pPager->pStmt; pPg; pPg=pPg->pNextStmt){
       
  3628         PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
       
  3629         if( pHist->pStmt ){
       
  3630           memcpy(PGHDR_TO_DATA(pPg), pHist->pStmt, pPager->pageSize);
       
  3631           sqliteFree(pHist->pStmt);
       
  3632           pHist->pStmt = 0;
       
  3633         }
       
  3634       }
       
  3635       pPager->dbSize = pPager->stmtSize;
       
  3636       memoryTruncate(pPager);
       
  3637       rc = SQLITE_OK;
       
  3638     }else{
       
  3639       rc = pager_stmt_playback(pPager);
       
  3640     }
       
  3641     sqlite3pager_stmt_commit(pPager);
       
  3642   }else{
       
  3643     rc = SQLITE_OK;
       
  3644   }
       
  3645   pPager->stmtAutoopen = 0;
       
  3646   return rc;
       
  3647 }
       
  3648 
       
  3649 /*
       
  3650 ** Return the full pathname of the database file.
       
  3651 */
       
  3652 const char *sqlite3pager_filename(Pager *pPager){
       
  3653   return pPager->zFilename;
       
  3654 }
       
  3655 
       
  3656 /*
       
  3657 ** Return the directory of the database file.
       
  3658 */
       
  3659 const char *sqlite3pager_dirname(Pager *pPager){
       
  3660   return pPager->zDirectory;
       
  3661 }
       
  3662 
       
  3663 /*
       
  3664 ** Return the full pathname of the journal file.
       
  3665 */
       
  3666 const char *sqlite3pager_journalname(Pager *pPager){
       
  3667   return pPager->zJournal;
       
  3668 }
       
  3669 
       
  3670 /*
       
  3671 ** Return true if fsync() calls are disabled for this pager.  Return FALSE
       
  3672 ** if fsync()s are executed normally.
       
  3673 */
       
  3674 int sqlite3pager_nosync(Pager *pPager){
       
  3675   return pPager->noSync;
       
  3676 }
       
  3677 
       
  3678 /*
       
  3679 ** Set the codec for this pager
       
  3680 */
       
  3681 void sqlite3pager_set_codec(
       
  3682   Pager *pPager,
       
  3683   void *(*xCodec)(void*,void*,Pgno,int),
       
  3684   void *pCodecArg
       
  3685 ){
       
  3686   pPager->xCodec = xCodec;
       
  3687   pPager->pCodecArg = pCodecArg;
       
  3688 }
       
  3689 
       
  3690 /*
       
  3691 ** This routine is called to increment the database file change-counter,
       
  3692 ** stored at byte 24 of the pager file.
       
  3693 */
       
  3694 static int pager_incr_changecounter(Pager *pPager){
       
  3695   void *pPage;
       
  3696   PgHdr *pPgHdr;
       
  3697   u32 change_counter;
       
  3698   int rc;
       
  3699 
       
  3700   /* Open page 1 of the file for writing. */
       
  3701   rc = sqlite3pager_get(pPager, 1, &pPage);
       
  3702   if( rc!=SQLITE_OK ) return rc;
       
  3703   rc = sqlite3pager_write(pPage);
       
  3704   if( rc!=SQLITE_OK ) return rc;
       
  3705 
       
  3706   /* Read the current value at byte 24. */
       
  3707   pPgHdr = DATA_TO_PGHDR(pPage);
       
  3708   change_counter = retrieve32bits(pPgHdr, 24);
       
  3709 
       
  3710   /* Increment the value just read and write it back to byte 24. */
       
  3711   change_counter++;
       
  3712   put32bits(((char*)PGHDR_TO_DATA(pPgHdr))+24, change_counter);
       
  3713 
       
  3714   /* Release the page reference. */
       
  3715   sqlite3pager_unref(pPage);
       
  3716   return SQLITE_OK;
       
  3717 }
       
  3718 
       
  3719 /*
       
  3720 ** Sync the database file for the pager pPager. zMaster points to the name
       
  3721 ** of a master journal file that should be written into the individual
       
  3722 ** journal file. zMaster may be NULL, which is interpreted as no master
       
  3723 ** journal (a single database transaction).
       
  3724 **
       
  3725 ** This routine ensures that the journal is synced, all dirty pages written
       
  3726 ** to the database file and the database file synced. The only thing that
       
  3727 ** remains to commit the transaction is to delete the journal file (or
       
  3728 ** master journal file if specified).
       
  3729 **
       
  3730 ** Note that if zMaster==NULL, this does not overwrite a previous value
       
  3731 ** passed to an sqlite3pager_sync() call.
       
  3732 **
       
  3733 ** If parameter nTrunc is non-zero, then the pager file is truncated to
       
  3734 ** nTrunc pages (this is used by auto-vacuum databases).
       
  3735 */
       
  3736 int sqlite3pager_sync(Pager *pPager, const char *zMaster, Pgno nTrunc){
       
  3737   int rc = SQLITE_OK;
       
  3738 
       
  3739   TRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n", 
       
  3740       pPager->zFilename, zMaster, nTrunc);
       
  3741 
       
  3742   /* If this is an in-memory db, or no pages have been written to, or this
       
  3743   ** function has already been called, it is a no-op.
       
  3744   */
       
  3745   if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){
       
  3746     PgHdr *pPg;
       
  3747     assert( pPager->journalOpen );
       
  3748 
       
  3749     /* If a master journal file name has already been written to the
       
  3750     ** journal file, then no sync is required. This happens when it is
       
  3751     ** written, then the process fails to upgrade from a RESERVED to an
       
  3752     ** EXCLUSIVE lock. The next time the process tries to commit the
       
  3753     ** transaction the m-j name will have already been written.
       
  3754     */
       
  3755     if( !pPager->setMaster ){
       
  3756       rc = pager_incr_changecounter(pPager);
       
  3757       if( rc!=SQLITE_OK ) goto sync_exit;
       
  3758 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  3759       if( nTrunc!=0 ){
       
  3760         /* If this transaction has made the database smaller, then all pages
       
  3761         ** being discarded by the truncation must be written to the journal
       
  3762         ** file.
       
  3763         */
       
  3764         Pgno i;
       
  3765         void *pPage;
       
  3766         int iSkip = PAGER_MJ_PGNO(pPager);
       
  3767         for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){
       
  3768           if( !(pPager->aInJournal[i/8] & (1<<(i&7))) && i!=iSkip ){
       
  3769             rc = sqlite3pager_get(pPager, i, &pPage);
       
  3770             if( rc!=SQLITE_OK ) goto sync_exit;
       
  3771             rc = sqlite3pager_write(pPage);
       
  3772             sqlite3pager_unref(pPage);
       
  3773             if( rc!=SQLITE_OK ) goto sync_exit;
       
  3774           }
       
  3775         } 
       
  3776       }
       
  3777 #endif
       
  3778       rc = writeMasterJournal(pPager, zMaster);
       
  3779       if( rc!=SQLITE_OK ) goto sync_exit;
       
  3780       rc = syncJournal(pPager);
       
  3781       if( rc!=SQLITE_OK ) goto sync_exit;
       
  3782     }
       
  3783 
       
  3784 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  3785     if( nTrunc!=0 ){
       
  3786       rc = sqlite3pager_truncate(pPager, nTrunc);
       
  3787       if( rc!=SQLITE_OK ) goto sync_exit;
       
  3788     }
       
  3789 #endif
       
  3790 
       
  3791     /* Write all dirty pages to the database file */
       
  3792     pPg = pager_get_all_dirty_pages(pPager);
       
  3793     rc = pager_write_pagelist(pPg);
       
  3794     if( rc!=SQLITE_OK ) goto sync_exit;
       
  3795 
       
  3796     /* Sync the database file. */
       
  3797     if( !pPager->noSync ){
       
  3798       rc = sqlite3OsSync(pPager->fd, 0);
       
  3799     }
       
  3800 
       
  3801     pPager->state = PAGER_SYNCED;
       
  3802   }else if( MEMDB && nTrunc!=0 ){
       
  3803     rc = sqlite3pager_truncate(pPager, nTrunc);
       
  3804   }
       
  3805 
       
  3806 sync_exit:
       
  3807   return rc;
       
  3808 }
       
  3809 
       
  3810 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  3811 /*
       
  3812 ** Move the page identified by pData to location pgno in the file. 
       
  3813 **
       
  3814 ** There must be no references to the current page pgno. If current page
       
  3815 ** pgno is not already in the rollback journal, it is not written there by
       
  3816 ** by this routine. The same applies to the page pData refers to on entry to
       
  3817 ** this routine.
       
  3818 **
       
  3819 ** References to the page refered to by pData remain valid. Updating any
       
  3820 ** meta-data associated with page pData (i.e. data stored in the nExtra bytes
       
  3821 ** allocated along with the page) is the responsibility of the caller.
       
  3822 **
       
  3823 ** A transaction must be active when this routine is called. It used to be
       
  3824 ** required that a statement transaction was not active, but this restriction
       
  3825 ** has been removed (CREATE INDEX needs to move a page when a statement
       
  3826 ** transaction is active).
       
  3827 */
       
  3828 int sqlite3pager_movepage(Pager *pPager, void *pData, Pgno pgno){
       
  3829   PgHdr *pPg = DATA_TO_PGHDR(pData);
       
  3830   PgHdr *pPgOld; 
       
  3831   int h;
       
  3832   Pgno needSyncPgno = 0;
       
  3833 
       
  3834   assert( pPg->nRef>0 );
       
  3835 
       
  3836   TRACE5("MOVE %d page %d (needSync=%d) moves to %d\n", 
       
  3837       PAGERID(pPager), pPg->pgno, pPg->needSync, pgno);
       
  3838 
       
  3839   if( pPg->needSync ){
       
  3840     needSyncPgno = pPg->pgno;
       
  3841     assert( pPg->inJournal );
       
  3842     assert( pPg->dirty );
       
  3843     assert( pPager->needSync );
       
  3844   }
       
  3845 
       
  3846   /* Unlink pPg from it's hash-chain */
       
  3847   unlinkHashChain(pPager, pPg);
       
  3848 
       
  3849   /* If the cache contains a page with page-number pgno, remove it
       
  3850   ** from it's hash chain. Also, if the PgHdr.needSync was set for 
       
  3851   ** page pgno before the 'move' operation, it needs to be retained 
       
  3852   ** for the page moved there.
       
  3853   */
       
  3854   pPgOld = pager_lookup(pPager, pgno);
       
  3855   if( pPgOld ){
       
  3856     assert( pPgOld->nRef==0 );
       
  3857     unlinkHashChain(pPager, pPgOld);
       
  3858     makeClean(pPgOld);
       
  3859     if( pPgOld->needSync ){
       
  3860       assert( pPgOld->inJournal );
       
  3861       pPg->inJournal = 1;
       
  3862       pPg->needSync = 1;
       
  3863       assert( pPager->needSync );
       
  3864     }
       
  3865   }
       
  3866 
       
  3867   /* Change the page number for pPg and insert it into the new hash-chain. */
       
  3868   assert( pgno!=0 );
       
  3869   pPg->pgno = pgno;
       
  3870   h = pgno & (pPager->nHash-1);
       
  3871   if( pPager->aHash[h] ){
       
  3872     assert( pPager->aHash[h]->pPrevHash==0 );
       
  3873     pPager->aHash[h]->pPrevHash = pPg;
       
  3874   }
       
  3875   pPg->pNextHash = pPager->aHash[h];
       
  3876   pPager->aHash[h] = pPg;
       
  3877   pPg->pPrevHash = 0;
       
  3878 
       
  3879   makeDirty(pPg);
       
  3880   pPager->dirtyCache = 1;
       
  3881 
       
  3882   if( needSyncPgno ){
       
  3883     /* If needSyncPgno is non-zero, then the journal file needs to be 
       
  3884     ** sync()ed before any data is written to database file page needSyncPgno.
       
  3885     ** Currently, no such page exists in the page-cache and the 
       
  3886     ** Pager.aInJournal bit has been set. This needs to be remedied by loading
       
  3887     ** the page into the pager-cache and setting the PgHdr.needSync flag.
       
  3888     **
       
  3889     ** The sqlite3pager_get() call may cause the journal to sync. So make
       
  3890     ** sure the Pager.needSync flag is set too.
       
  3891     */
       
  3892     int rc;
       
  3893     void *pNeedSync;
       
  3894     assert( pPager->needSync );
       
  3895     rc = sqlite3pager_get(pPager, needSyncPgno, &pNeedSync);
       
  3896     if( rc!=SQLITE_OK ) return rc;
       
  3897     pPager->needSync = 1;
       
  3898     DATA_TO_PGHDR(pNeedSync)->needSync = 1;
       
  3899     DATA_TO_PGHDR(pNeedSync)->inJournal = 1;
       
  3900     makeDirty(DATA_TO_PGHDR(pNeedSync));
       
  3901     sqlite3pager_unref(pNeedSync);
       
  3902   }
       
  3903 
       
  3904   return SQLITE_OK;
       
  3905 }
       
  3906 #endif
       
  3907 
       
  3908 #if defined(SQLITE_DEBUG) || defined(SQLITE_TEST)
       
  3909 /*
       
  3910 ** Return the current state of the file lock for the given pager.
       
  3911 ** The return value is one of NO_LOCK, SHARED_LOCK, RESERVED_LOCK,
       
  3912 ** PENDING_LOCK, or EXCLUSIVE_LOCK.
       
  3913 */
       
  3914 int sqlite3pager_lockstate(Pager *pPager){
       
  3915   return sqlite3OsLockState(pPager->fd);
       
  3916 }
       
  3917 #endif
       
  3918 
       
  3919 #ifdef SQLITE_DEBUG
       
  3920 /*
       
  3921 ** Print a listing of all referenced pages and their ref count.
       
  3922 */
       
  3923 void sqlite3pager_refdump(Pager *pPager){
       
  3924   PgHdr *pPg;
       
  3925   for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
       
  3926     if( pPg->nRef<=0 ) continue;
       
  3927     sqlite3DebugPrintf("PAGE %3d addr=%p nRef=%d\n", 
       
  3928        pPg->pgno, PGHDR_TO_DATA(pPg), pPg->nRef);
       
  3929   }
       
  3930 }
       
  3931 #endif
       
  3932 
       
  3933 #endif /* SQLITE_OMIT_DISKIO */