engine/sqlite/src/pager.cpp
changeset 2 29cda98b007e
equal deleted inserted replaced
1:5f8e5adbbed9 2:29cda98b007e
       
     1 /*
       
     2 ** 2001 September 15
       
     3 **
       
     4 ** The author disclaims copyright to this source code.  In place of
       
     5 ** a legal notice, here is a blessing:
       
     6 **
       
     7 **    May you do good and not evil.
       
     8 **    May you find forgiveness for yourself and forgive others.
       
     9 **    May you share freely, never taking more than you give.
       
    10 **
       
    11 *************************************************************************
       
    12 ** This is the implementation of the page cache subsystem or "pager".
       
    13 ** 
       
    14 ** The pager is used to access a database disk file.  It implements
       
    15 ** atomic commit and rollback through the use of a journal file that
       
    16 ** is separate from the database file.  The pager also implements file
       
    17 ** locking to prevent two processes from writing the same database
       
    18 ** file simultaneously, or one process from reading the database while
       
    19 ** another is writing.
       
    20 **
       
    21 ** @(#) $Id: pager.cpp 1282 2008-11-13 09:31:33Z LarsPson $
       
    22 */
       
    23 #ifndef SQLITE_OMIT_DISKIO
       
    24 #include "sqliteInt.h"
       
    25 #include <assert.h>
       
    26 #include <string.h>
       
    27 
       
    28 /*
       
    29 ** Macros for troubleshooting.  Normally turned off
       
    30 */
       
    31 #if 0
       
    32 #define sqlite3DebugPrintf printf
       
    33 #define PAGERTRACE1(X)       sqlite3DebugPrintf(X)
       
    34 #define PAGERTRACE2(X,Y)     sqlite3DebugPrintf(X,Y)
       
    35 #define PAGERTRACE3(X,Y,Z)   sqlite3DebugPrintf(X,Y,Z)
       
    36 #define PAGERTRACE4(X,Y,Z,W) sqlite3DebugPrintf(X,Y,Z,W)
       
    37 #define PAGERTRACE5(X,Y,Z,W,V) sqlite3DebugPrintf(X,Y,Z,W,V)
       
    38 #else
       
    39 #define PAGERTRACE1(X)
       
    40 #define PAGERTRACE2(X,Y)
       
    41 #define PAGERTRACE3(X,Y,Z)
       
    42 #define PAGERTRACE4(X,Y,Z,W)
       
    43 #define PAGERTRACE5(X,Y,Z,W,V)
       
    44 #endif
       
    45 
       
    46 /*
       
    47 ** The following two macros are used within the PAGERTRACEX() macros above
       
    48 ** to print out file-descriptors. 
       
    49 **
       
    50 ** PAGERID() takes a pointer to a Pager struct as its argument. The
       
    51 ** associated file-descriptor is returned. FILEHANDLEID() takes an sqlite3_file
       
    52 ** struct as its argument.
       
    53 */
       
    54 #define PAGERID(p) ((int)(p->fd))
       
    55 #define FILEHANDLEID(fd) ((int)fd)
       
    56 
       
    57 /*
       
    58 ** The page cache as a whole is always in one of the following
       
    59 ** states:
       
    60 **
       
    61 **   PAGER_UNLOCK        The page cache is not currently reading or 
       
    62 **                       writing the database file.  There is no
       
    63 **                       data held in memory.  This is the initial
       
    64 **                       state.
       
    65 **
       
    66 **   PAGER_SHARED        The page cache is reading the database.
       
    67 **                       Writing is not permitted.  There can be
       
    68 **                       multiple readers accessing the same database
       
    69 **                       file at the same time.
       
    70 **
       
    71 **   PAGER_RESERVED      This process has reserved the database for writing
       
    72 **                       but has not yet made any changes.  Only one process
       
    73 **                       at a time can reserve the database.  The original
       
    74 **                       database file has not been modified so other
       
    75 **                       processes may still be reading the on-disk
       
    76 **                       database file.
       
    77 **
       
    78 **   PAGER_EXCLUSIVE     The page cache is writing the database.
       
    79 **                       Access is exclusive.  No other processes or
       
    80 **                       threads can be reading or writing while one
       
    81 **                       process is writing.
       
    82 **
       
    83 **   PAGER_SYNCED        The pager moves to this state from PAGER_EXCLUSIVE
       
    84 **                       after all dirty pages have been written to the
       
    85 **                       database file and the file has been synced to
       
    86 **                       disk. All that remains to do is to remove or
       
    87 **                       truncate the journal file and the transaction 
       
    88 **                       will be committed.
       
    89 **
       
    90 ** The page cache comes up in PAGER_UNLOCK.  The first time a
       
    91 ** sqlite3PagerGet() occurs, the state transitions to PAGER_SHARED.
       
    92 ** After all pages have been released using sqlite_page_unref(),
       
    93 ** the state transitions back to PAGER_UNLOCK.  The first time
       
    94 ** that sqlite3PagerWrite() is called, the state transitions to
       
    95 ** PAGER_RESERVED.  (Note that sqlite3PagerWrite() can only be
       
    96 ** called on an outstanding page which means that the pager must
       
    97 ** be in PAGER_SHARED before it transitions to PAGER_RESERVED.)
       
    98 ** PAGER_RESERVED means that there is an open rollback journal.
       
    99 ** The transition to PAGER_EXCLUSIVE occurs before any changes
       
   100 ** are made to the database file, though writes to the rollback
       
   101 ** journal occurs with just PAGER_RESERVED.  After an sqlite3PagerRollback()
       
   102 ** or sqlite3PagerCommitPhaseTwo(), the state can go back to PAGER_SHARED,
       
   103 ** or it can stay at PAGER_EXCLUSIVE if we are in exclusive access mode.
       
   104 */
       
   105 #define PAGER_UNLOCK      0
       
   106 #define PAGER_SHARED      1   /* same as SHARED_LOCK */
       
   107 #define PAGER_RESERVED    2   /* same as RESERVED_LOCK */
       
   108 #define PAGER_EXCLUSIVE   4   /* same as EXCLUSIVE_LOCK */
       
   109 #define PAGER_SYNCED      5
       
   110 
       
   111 /*
       
   112 ** If the SQLITE_BUSY_RESERVED_LOCK macro is set to true at compile-time,
       
   113 ** then failed attempts to get a reserved lock will invoke the busy callback.
       
   114 ** This is off by default.  To see why, consider the following scenario:
       
   115 ** 
       
   116 ** Suppose thread A already has a shared lock and wants a reserved lock.
       
   117 ** Thread B already has a reserved lock and wants an exclusive lock.  If
       
   118 ** both threads are using their busy callbacks, it might be a long time
       
   119 ** be for one of the threads give up and allows the other to proceed.
       
   120 ** But if the thread trying to get the reserved lock gives up quickly
       
   121 ** (if it never invokes its busy callback) then the contention will be
       
   122 ** resolved quickly.
       
   123 */
       
   124 #ifndef SQLITE_BUSY_RESERVED_LOCK
       
   125 # define SQLITE_BUSY_RESERVED_LOCK 0
       
   126 #endif
       
   127 
       
   128 /*
       
   129 ** This macro rounds values up so that if the value is an address it
       
   130 ** is guaranteed to be an address that is aligned to an 8-byte boundary.
       
   131 */
       
   132 #define FORCE_ALIGNMENT(X)   (((X)+7)&~7)
       
   133 
       
   134 typedef struct PgHdr PgHdr;
       
   135 
       
   136 /*
       
   137 ** Each pager stores all currently unreferenced pages in a list sorted
       
   138 ** in least-recently-used (LRU) order (i.e. the first item on the list has 
       
   139 ** not been referenced in a long time, the last item has been recently
       
   140 ** used). An instance of this structure is included as part of each
       
   141 ** pager structure for this purpose (variable Pager.lru).
       
   142 **
       
   143 ** Additionally, if memory-management is enabled, all unreferenced pages 
       
   144 ** are stored in a global LRU list (global variable sqlite3LruPageList).
       
   145 **
       
   146 ** In both cases, the PagerLruList.pFirstSynced variable points to
       
   147 ** the first page in the corresponding list that does not require an
       
   148 ** fsync() operation before its memory can be reclaimed. If no such
       
   149 ** page exists, PagerLruList.pFirstSynced is set to NULL.
       
   150 */
       
   151 typedef struct PagerLruList PagerLruList;
       
   152 struct PagerLruList {
       
   153   PgHdr *pFirst;         /* First page in LRU list */
       
   154   PgHdr *pLast;          /* Last page in LRU list (the most recently used) */
       
   155   PgHdr *pFirstSynced;   /* First page in list with PgHdr.needSync==0 */
       
   156 };
       
   157 
       
   158 /*
       
   159 ** The following structure contains the next and previous pointers used
       
   160 ** to link a PgHdr structure into a PagerLruList linked list. 
       
   161 */
       
   162 typedef struct PagerLruLink PagerLruLink;
       
   163 struct PagerLruLink {
       
   164   PgHdr *pNext;
       
   165   PgHdr *pPrev;
       
   166 };
       
   167 
       
   168 /*
       
   169 ** Each in-memory image of a page begins with the following header.
       
   170 ** This header is only visible to this pager module.  The client
       
   171 ** code that calls pager sees only the data that follows the header.
       
   172 **
       
   173 ** Client code should call sqlite3PagerWrite() on a page prior to making
       
   174 ** any modifications to that page.  The first time sqlite3PagerWrite()
       
   175 ** is called, the original page contents are written into the rollback
       
   176 ** journal and PgHdr.inJournal and PgHdr.needSync are set.  Later, once
       
   177 ** the journal page has made it onto the disk surface, PgHdr.needSync
       
   178 ** is cleared.  The modified page cannot be written back into the original
       
   179 ** database file until the journal pages has been synced to disk and the
       
   180 ** PgHdr.needSync has been cleared.
       
   181 **
       
   182 ** The PgHdr.dirty flag is set when sqlite3PagerWrite() is called and
       
   183 ** is cleared again when the page content is written back to the original
       
   184 ** database file.
       
   185 **
       
   186 ** Details of important structure elements:
       
   187 **
       
   188 ** needSync
       
   189 **
       
   190 **     If this is true, this means that it is not safe to write the page
       
   191 **     content to the database because the original content needed
       
   192 **     for rollback has not by synced to the main rollback journal.
       
   193 **     The original content may have been written to the rollback journal
       
   194 **     but it has not yet been synced.  So we cannot write to the database
       
   195 **     file because power failure might cause the page in the journal file
       
   196 **     to never reach the disk.  It is as if the write to the journal file
       
   197 **     does not occur until the journal file is synced.
       
   198 **     
       
   199 **     This flag is false if the page content exactly matches what
       
   200 **     currently exists in the database file.  The needSync flag is also
       
   201 **     false if the original content has been written to the main rollback
       
   202 **     journal and synced.  If the page represents a new page that has
       
   203 **     been added onto the end of the database during the current
       
   204 **     transaction, the needSync flag is true until the original database
       
   205 **     size in the journal header has been synced to disk.
       
   206 **
       
   207 ** inJournal
       
   208 **
       
   209 **     This is true if the original page has been written into the main
       
   210 **     rollback journal.  This is always false for new pages added to
       
   211 **     the end of the database file during the current transaction.
       
   212 **     And this flag says nothing about whether or not the journal
       
   213 **     has been synced to disk.  For pages that are in the original
       
   214 **     database file, the following expression should always be true:
       
   215 **
       
   216 **       inJournal = (pPager->aInJournal[(pgno-1)/8] & (1<<((pgno-1)%8))!=0
       
   217 **
       
   218 **     The pPager->aInJournal[] array is only valid for the original
       
   219 **     pages of the database, not new pages that are added to the end
       
   220 **     of the database, so obviously the above expression cannot be
       
   221 **     valid for new pages.  For new pages inJournal is always 0.
       
   222 **
       
   223 ** dirty
       
   224 **
       
   225 **     When true, this means that the content of the page has been
       
   226 **     modified and needs to be written back to the database file.
       
   227 **     If false, it means that either the content of the page is
       
   228 **     unchanged or else the content is unimportant and we do not
       
   229 **     care whether or not it is preserved.
       
   230 **
       
   231 ** alwaysRollback
       
   232 **
       
   233 **     This means that the sqlite3PagerDontRollback() API should be
       
   234 **     ignored for this page.  The DontRollback() API attempts to say
       
   235 **     that the content of the page on disk is unimportant (it is an
       
   236 **     unused page on the freelist) so that it is unnecessary to 
       
   237 **     rollback changes to this page because the content of the page
       
   238 **     can change without changing the meaning of the database.  This
       
   239 **     flag overrides any DontRollback() attempt.  This flag is set
       
   240 **     when a page that originally contained valid data is added to
       
   241 **     the freelist.  Later in the same transaction, this page might
       
   242 **     be pulled from the freelist and reused for something different
       
   243 **     and at that point the DontRollback() API will be called because
       
   244 **     pages taken from the freelist do not need to be protected by
       
   245 **     the rollback journal.  But this flag says that the page was
       
   246 **     not originally part of the freelist so that it still needs to
       
   247 **     be rolled back in spite of any subsequent DontRollback() calls.
       
   248 **
       
   249 ** needRead 
       
   250 **
       
   251 **     This flag means (when true) that the content of the page has
       
   252 **     not yet been loaded from disk.  The in-memory content is just
       
   253 **     garbage.  (Actually, we zero the content, but you should not
       
   254 **     make any assumptions about the content nevertheless.)  If the
       
   255 **     content is needed in the future, it should be read from the
       
   256 **     original database file.
       
   257 */
       
   258 struct PgHdr {
       
   259   Pager *pPager;                 /* The pager to which this page belongs */
       
   260   Pgno pgno;                     /* The page number for this page */
       
   261   PgHdr *pNextHash, *pPrevHash;  /* Hash collision chain for PgHdr.pgno */
       
   262   PagerLruLink free;             /* Next and previous free pages */
       
   263   PgHdr *pNextAll;               /* A list of all pages */
       
   264   u8 inJournal;                  /* TRUE if has been written to journal */
       
   265   u8 dirty;                      /* TRUE if we need to write back changes */
       
   266   u8 needSync;                   /* Sync journal before writing this page */
       
   267   u8 alwaysRollback;             /* Disable DontRollback() for this page */
       
   268   u8 needRead;                   /* Read content if PagerWrite() is called */
       
   269   short int nRef;                /* Number of users of this page */
       
   270   PgHdr *pDirty, *pPrevDirty;    /* Dirty pages */
       
   271 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
   272   PagerLruLink gfree;            /* Global list of nRef==0 pages */
       
   273 #endif
       
   274 #ifdef SQLITE_CHECK_PAGES
       
   275   u32 pageHash;
       
   276 #endif
       
   277   void *pData;                   /* Page data */
       
   278   /* Pager.nExtra bytes of local data appended to this header */
       
   279 };
       
   280 
       
   281 /*
       
   282 ** For an in-memory only database, some extra information is recorded about
       
   283 ** each page so that changes can be rolled back.  (Journal files are not
       
   284 ** used for in-memory databases.)  The following information is added to
       
   285 ** the end of every EXTRA block for in-memory databases.
       
   286 **
       
   287 ** This information could have been added directly to the PgHdr structure.
       
   288 ** But then it would take up an extra 8 bytes of storage on every PgHdr
       
   289 ** even for disk-based databases.  Splitting it out saves 8 bytes.  This
       
   290 ** is only a savings of 0.8% but those percentages add up.
       
   291 */
       
   292 typedef struct PgHistory PgHistory;
       
   293 struct PgHistory {
       
   294   u8 *pOrig;     /* Original page text.  Restore to this on a full rollback */
       
   295   u8 *pStmt;     /* Text as it was at the beginning of the current statement */
       
   296   PgHdr *pNextStmt, *pPrevStmt;  /* List of pages in the statement journal */
       
   297   u8 inStmt;                     /* TRUE if in the statement subjournal */
       
   298 };
       
   299 
       
   300 /*
       
   301 ** A macro used for invoking the codec if there is one
       
   302 */
       
   303 #ifdef SQLITE_HAS_CODEC
       
   304 # define CODEC1(P,D,N,X) if( P->xCodec!=0 ){ P->xCodec(P->pCodecArg,D,N,X); }
       
   305 # define CODEC2(P,D,N,X) ((char*)(P->xCodec!=0?P->xCodec(P->pCodecArg,D,N,X):D))
       
   306 #else
       
   307 # define CODEC1(P,D,N,X) /* NO-OP */
       
   308 # define CODEC2(P,D,N,X) ((char*)D)
       
   309 #endif
       
   310 
       
   311 /*
       
   312 ** Convert a pointer to a PgHdr into a pointer to its data
       
   313 ** and back again.
       
   314 */
       
   315 #define PGHDR_TO_DATA(P)    ((P)->pData)
       
   316 #define PGHDR_TO_EXTRA(G,P) ((void*)&((G)[1]))
       
   317 #define PGHDR_TO_HIST(P,PGR)  \
       
   318             ((PgHistory*)&((char*)(&(P)[1]))[(PGR)->nExtra])
       
   319 
       
   320 /*
       
   321 ** A open page cache is an instance of the following structure.
       
   322 **
       
   323 ** Pager.errCode may be set to SQLITE_IOERR, SQLITE_CORRUPT, or
       
   324 ** or SQLITE_FULL. Once one of the first three errors occurs, it persists
       
   325 ** and is returned as the result of every major pager API call.  The
       
   326 ** SQLITE_FULL return code is slightly different. It persists only until the
       
   327 ** next successful rollback is performed on the pager cache. Also,
       
   328 ** SQLITE_FULL does not affect the sqlite3PagerGet() and sqlite3PagerLookup()
       
   329 ** APIs, they may still be used successfully.
       
   330 */
       
   331 struct Pager {
       
   332   sqlite3_vfs *pVfs;          /* OS functions to use for IO */
       
   333   u8 journalOpen;             /* True if journal file descriptors is valid */
       
   334   u8 journalStarted;          /* True if header of journal is synced */
       
   335   u8 useJournal;              /* Use a rollback journal on this file */
       
   336   u8 noReadlock;              /* Do not bother to obtain readlocks */
       
   337   u8 stmtOpen;                /* True if the statement subjournal is open */
       
   338   u8 stmtInUse;               /* True we are in a statement subtransaction */
       
   339   u8 stmtAutoopen;            /* Open stmt journal when main journal is opened*/
       
   340   u8 noSync;                  /* Do not sync the journal if true */
       
   341   u8 fullSync;                /* Do extra syncs of the journal for robustness */
       
   342   u8 sync_flags;              /* One of SYNC_NORMAL or SYNC_FULL */
       
   343   u8 state;                   /* PAGER_UNLOCK, _SHARED, _RESERVED, etc. */
       
   344   u8 tempFile;                /* zFilename is a temporary file */
       
   345   u8 readOnly;                /* True for a read-only database */
       
   346   u8 needSync;                /* True if an fsync() is needed on the journal */
       
   347   u8 dirtyCache;              /* True if cached pages have changed */
       
   348   u8 alwaysRollback;          /* Disable DontRollback() for all pages */
       
   349   u8 memDb;                   /* True to inhibit all file I/O */
       
   350   u8 setMaster;               /* True if a m-j name has been written to jrnl */
       
   351   u8 doNotSync;               /* Boolean. While true, do not spill the cache */
       
   352   u8 exclusiveMode;           /* Boolean. True if locking_mode==EXCLUSIVE */
       
   353   u8 changeCountDone;         /* Set after incrementing the change-counter */
       
   354   u32 vfsFlags;               /* Flags for sqlite3_vfs.xOpen() */
       
   355   int errCode;                /* One of several kinds of errors */
       
   356   int dbSize;                 /* Number of pages in the file */
       
   357   int origDbSize;             /* dbSize before the current change */
       
   358   int stmtSize;               /* Size of database (in pages) at stmt_begin() */
       
   359   int nRec;                   /* Number of pages written to the journal */
       
   360   u32 cksumInit;              /* Quasi-random value added to every checksum */
       
   361   int stmtNRec;               /* Number of records in stmt subjournal */
       
   362   int nExtra;                 /* Add this many bytes to each in-memory page */
       
   363   int pageSize;               /* Number of bytes in a page */
       
   364   int nPage;                  /* Total number of in-memory pages */
       
   365   int nRef;                   /* Number of in-memory pages with PgHdr.nRef>0 */
       
   366   int mxPage;                 /* Maximum number of pages to hold in cache */
       
   367   Pgno mxPgno;                /* Maximum allowed size of the database */
       
   368   u8 *aInJournal;             /* One bit for each page in the database file */
       
   369   u8 *aInStmt;                /* One bit for each page in the database */
       
   370   char *zFilename;            /* Name of the database file */
       
   371   char *zJournal;             /* Name of the journal file */
       
   372   char *zDirectory;           /* Directory hold database and journal files */
       
   373   char *zStmtJrnl;            /* Name of the statement journal file */
       
   374   sqlite3_file *fd, *jfd;     /* File descriptors for database and journal */
       
   375   sqlite3_file *stfd;         /* File descriptor for the statement subjournal*/
       
   376   BusyHandler *pBusyHandler;  /* Pointer to sqlite.busyHandler */
       
   377   PagerLruList lru;           /* LRU list of free pages */
       
   378   PgHdr *pAll;                /* List of all pages */
       
   379   PgHdr *pStmt;               /* List of pages in the statement subjournal */
       
   380   PgHdr *pDirty;              /* List of all dirty pages */
       
   381   i64 journalOff;             /* Current byte offset in the journal file */
       
   382   i64 journalHdr;             /* Byte offset to previous journal header */
       
   383   i64 stmtHdrOff;             /* First journal header written this statement */
       
   384   i64 stmtCksum;              /* cksumInit when statement was started */
       
   385   i64 stmtJSize;              /* Size of journal at stmt_begin() */
       
   386   int sectorSize;             /* Assumed sector size during rollback */
       
   387 #ifdef SQLITE_TEST
       
   388   int nHit, nMiss;            /* Cache hits and missing */
       
   389   int nRead, nWrite;          /* Database pages read/written */
       
   390 #endif
       
   391   void (*xDestructor)(DbPage*,int); /* Call this routine when freeing pages */
       
   392   void (*xReiniter)(DbPage*,int);   /* Call this routine when reloading pages */
       
   393 #ifdef SQLITE_HAS_CODEC
       
   394   void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
       
   395   void *pCodecArg;            /* First argument to xCodec() */
       
   396 #endif
       
   397   int nHash;                  /* Size of the pager hash table */
       
   398   PgHdr **aHash;              /* Hash table to map page number to PgHdr */
       
   399 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
   400   Pager *pNext;               /* Doubly linked list of pagers on which */
       
   401   Pager *pPrev;               /* sqlite3_release_memory() will work */
       
   402   int iInUseMM;               /* Non-zero if unavailable to MM */
       
   403   int iInUseDB;               /* Non-zero if in sqlite3_release_memory() */
       
   404 #endif
       
   405   char *pTmpSpace;            /* Pager.pageSize bytes of space for tmp use */
       
   406   char dbFileVers[16];        /* Changes whenever database file changes */
       
   407 };
       
   408 
       
   409 /*
       
   410 ** The following global variables hold counters used for
       
   411 ** testing purposes only.  These variables do not exist in
       
   412 ** a non-testing build.  These variables are not thread-safe.
       
   413 */
       
   414 #ifdef SQLITE_TEST
       
   415 int sqlite3_pager_readdb_count = 0;    /* Number of full pages read from DB */
       
   416 int sqlite3_pager_writedb_count = 0;   /* Number of full pages written to DB */
       
   417 int sqlite3_pager_writej_count = 0;    /* Number of pages written to journal */
       
   418 int sqlite3_pager_pgfree_count = 0;    /* Number of cache pages freed */
       
   419 # define PAGER_INCR(v)  v++
       
   420 #else
       
   421 # define PAGER_INCR(v)
       
   422 #endif
       
   423 
       
   424 /*
       
   425 ** The following variable points to the head of a double-linked list
       
   426 ** of all pagers that are eligible for page stealing by the
       
   427 ** sqlite3_release_memory() interface.  Access to this list is
       
   428 ** protected by the SQLITE_MUTEX_STATIC_MEM2 mutex.
       
   429 */
       
   430 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
   431 static Pager *sqlite3PagerList = 0;
       
   432 static PagerLruList sqlite3LruPageList = {0, 0, 0};
       
   433 #endif
       
   434 
       
   435 
       
   436 /*
       
   437 ** Journal files begin with the following magic string.  The data
       
   438 ** was obtained from /dev/random.  It is used only as a sanity check.
       
   439 **
       
   440 ** Since version 2.8.0, the journal format contains additional sanity
       
   441 ** checking information.  If the power fails while the journal is begin
       
   442 ** written, semi-random garbage data might appear in the journal
       
   443 ** file after power is restored.  If an attempt is then made
       
   444 ** to roll the journal back, the database could be corrupted.  The additional
       
   445 ** sanity checking data is an attempt to discover the garbage in the
       
   446 ** journal and ignore it.
       
   447 **
       
   448 ** The sanity checking information for the new journal format consists
       
   449 ** of a 32-bit checksum on each page of data.  The checksum covers both
       
   450 ** the page number and the pPager->pageSize bytes of data for the page.
       
   451 ** This cksum is initialized to a 32-bit random value that appears in the
       
   452 ** journal file right after the header.  The random initializer is important,
       
   453 ** because garbage data that appears at the end of a journal is likely
       
   454 ** data that was once in other files that have now been deleted.  If the
       
   455 ** garbage data came from an obsolete journal file, the checksums might
       
   456 ** be correct.  But by initializing the checksum to random value which
       
   457 ** is different for every journal, we minimize that risk.
       
   458 */
       
   459 static const unsigned char aJournalMagic[] = {
       
   460   0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
       
   461 };
       
   462 
       
   463 /*
       
   464 ** The size of the header and of each page in the journal is determined
       
   465 ** by the following macros.
       
   466 */
       
   467 #define JOURNAL_PG_SZ(pPager)  ((pPager->pageSize) + 8)
       
   468 
       
   469 /*
       
   470 ** The journal header size for this pager. In the future, this could be
       
   471 ** set to some value read from the disk controller. The important
       
   472 ** characteristic is that it is the same size as a disk sector.
       
   473 */
       
   474 #define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
       
   475 
       
   476 /*
       
   477 ** The macro MEMDB is true if we are dealing with an in-memory database.
       
   478 ** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
       
   479 ** the value of MEMDB will be a constant and the compiler will optimize
       
   480 ** out code that would never execute.
       
   481 */
       
   482 #ifdef SQLITE_OMIT_MEMORYDB
       
   483 # define MEMDB 0
       
   484 #else
       
   485 # define MEMDB pPager->memDb
       
   486 #endif
       
   487 
       
   488 /*
       
   489 ** Page number PAGER_MJ_PGNO is never used in an SQLite database (it is
       
   490 ** reserved for working around a windows/posix incompatibility). It is
       
   491 ** used in the journal to signify that the remainder of the journal file 
       
   492 ** is devoted to storing a master journal name - there are no more pages to
       
   493 ** roll back. See comments for function writeMasterJournal() for details.
       
   494 */
       
   495 /* #define PAGER_MJ_PGNO(x) (PENDING_BYTE/((x)->pageSize)) */
       
   496 #define PAGER_MJ_PGNO(x) ((PENDING_BYTE/((x)->pageSize))+1)
       
   497 
       
   498 /*
       
   499 ** The maximum legal page number is (2^31 - 1).
       
   500 */
       
   501 #define PAGER_MAX_PGNO 2147483647
       
   502 
       
   503 /*
       
   504 ** The pagerEnter() and pagerLeave() routines acquire and release
       
   505 ** a mutex on each pager.  The mutex is recursive.
       
   506 **
       
   507 ** This is a special-purpose mutex.  It only provides mutual exclusion
       
   508 ** between the Btree and the Memory Management sqlite3_release_memory()
       
   509 ** function.  It does not prevent, for example, two Btrees from accessing
       
   510 ** the same pager at the same time.  Other general-purpose mutexes in
       
   511 ** the btree layer handle that chore.
       
   512 */
       
   513 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
   514   static void pagerEnter(Pager *p){
       
   515     p->iInUseDB++;
       
   516     if( p->iInUseMM && p->iInUseDB==1 ){
       
   517       sqlite3_mutex *mutex;
       
   518       mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MEM2);
       
   519       p->iInUseDB = 0;
       
   520       sqlite3_mutex_enter(mutex);
       
   521       p->iInUseDB = 1;
       
   522       sqlite3_mutex_leave(mutex);
       
   523     }
       
   524     assert( p->iInUseMM==0 );
       
   525   }
       
   526   static void pagerLeave(Pager *p){
       
   527     p->iInUseDB--;
       
   528     assert( p->iInUseDB>=0 );
       
   529   }
       
   530 #else
       
   531 # define pagerEnter(X)
       
   532 # define pagerLeave(X)
       
   533 #endif
       
   534 
       
   535 /*
       
   536 ** Enable reference count tracking (for debugging) here:
       
   537 */
       
   538 #ifdef SQLITE_DEBUG
       
   539   int pager3_refinfo_enable = 0;
       
   540   static void pager_refinfo(PgHdr *p){
       
   541     static int cnt = 0;
       
   542     if( !pager3_refinfo_enable ) return;
       
   543     sqlite3DebugPrintf(
       
   544        "REFCNT: %4d addr=%p nRef=%-3d total=%d\n",
       
   545        p->pgno, PGHDR_TO_DATA(p), p->nRef, p->pPager->nRef
       
   546     );
       
   547     cnt++;   /* Something to set a breakpoint on */
       
   548   }
       
   549 # define REFINFO(X)  pager_refinfo(X)
       
   550 #else
       
   551 # define REFINFO(X)
       
   552 #endif
       
   553 
       
   554 /*
       
   555 ** Add page pPg to the end of the linked list managed by structure
       
   556 ** pList (pPg becomes the last entry in the list - the most recently 
       
   557 ** used). Argument pLink should point to either pPg->free or pPg->gfree,
       
   558 ** depending on whether pPg is being added to the pager-specific or
       
   559 ** global LRU list.
       
   560 */
       
   561 static void listAdd(PagerLruList *pList, PagerLruLink *pLink, PgHdr *pPg){
       
   562   pLink->pNext = 0;
       
   563   pLink->pPrev = pList->pLast;
       
   564 
       
   565 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
   566   assert(pLink==&pPg->free || pLink==&pPg->gfree);
       
   567   assert(pLink==&pPg->gfree || pList!=&sqlite3LruPageList);
       
   568 #endif
       
   569 
       
   570   if( pList->pLast ){
       
   571     int iOff = (char *)pLink - (char *)pPg;
       
   572     PagerLruLink *pLastLink = (PagerLruLink *)(&((u8 *)pList->pLast)[iOff]);
       
   573     pLastLink->pNext = pPg;
       
   574   }else{
       
   575     assert(!pList->pFirst);
       
   576     pList->pFirst = pPg;
       
   577   }
       
   578 
       
   579   pList->pLast = pPg;
       
   580   if( !pList->pFirstSynced && pPg->needSync==0 ){
       
   581     pList->pFirstSynced = pPg;
       
   582   }
       
   583 }
       
   584 
       
   585 /*
       
   586 ** Remove pPg from the list managed by the structure pointed to by pList.
       
   587 **
       
   588 ** Argument pLink should point to either pPg->free or pPg->gfree, depending 
       
   589 ** on whether pPg is being added to the pager-specific or global LRU list.
       
   590 */
       
   591 static void listRemove(PagerLruList *pList, PagerLruLink *pLink, PgHdr *pPg){
       
   592   int iOff = (char *)pLink - (char *)pPg;
       
   593 
       
   594 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
   595   assert(pLink==&pPg->free || pLink==&pPg->gfree);
       
   596   assert(pLink==&pPg->gfree || pList!=&sqlite3LruPageList);
       
   597 #endif
       
   598 
       
   599   if( pPg==pList->pFirst ){
       
   600     pList->pFirst = pLink->pNext;
       
   601   }
       
   602   if( pPg==pList->pLast ){
       
   603     pList->pLast = pLink->pPrev;
       
   604   }
       
   605   if( pLink->pPrev ){
       
   606     PagerLruLink *pPrevLink = (PagerLruLink *)(&((u8 *)pLink->pPrev)[iOff]);
       
   607     pPrevLink->pNext = pLink->pNext;
       
   608   }
       
   609   if( pLink->pNext ){
       
   610     PagerLruLink *pNextLink = (PagerLruLink *)(&((u8 *)pLink->pNext)[iOff]);
       
   611     pNextLink->pPrev = pLink->pPrev;
       
   612   }
       
   613   if( pPg==pList->pFirstSynced ){
       
   614     PgHdr *p = pLink->pNext;
       
   615     while( p && p->needSync ){
       
   616       PagerLruLink *pL = (PagerLruLink *)(&((u8 *)p)[iOff]);
       
   617       p = pL->pNext;
       
   618     }
       
   619     pList->pFirstSynced = p;
       
   620   }
       
   621 
       
   622   pLink->pNext = pLink->pPrev = 0;
       
   623 }
       
   624 
       
   625 /* 
       
   626 ** Add page pPg to the list of free pages for the pager. If 
       
   627 ** memory-management is enabled, also add the page to the global 
       
   628 ** list of free pages.
       
   629 */
       
   630 static void lruListAdd(PgHdr *pPg){
       
   631   listAdd(&pPg->pPager->lru, &pPg->free, pPg);
       
   632 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
   633   if( !pPg->pPager->memDb ){
       
   634     sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
       
   635     listAdd(&sqlite3LruPageList, &pPg->gfree, pPg);
       
   636     sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
       
   637   }
       
   638 #endif
       
   639 }
       
   640 
       
   641 /* 
       
   642 ** Remove page pPg from the list of free pages for the associated pager.
       
   643 ** If memory-management is enabled, also remove pPg from the global list
       
   644 ** of free pages.
       
   645 */
       
   646 static void lruListRemove(PgHdr *pPg){
       
   647   listRemove(&pPg->pPager->lru, &pPg->free, pPg);
       
   648 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
   649   if( !pPg->pPager->memDb ){
       
   650     sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
       
   651     listRemove(&sqlite3LruPageList, &pPg->gfree, pPg);
       
   652     sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
       
   653   }
       
   654 #endif
       
   655 }
       
   656 
       
   657 /* 
       
   658 ** This function is called just after the needSync flag has been cleared
       
   659 ** from all pages managed by pPager (usually because the journal file
       
   660 ** has just been synced). It updates the pPager->lru.pFirstSynced variable
       
   661 ** and, if memory-management is enabled, the sqlite3LruPageList.pFirstSynced
       
   662 ** variable also.
       
   663 */
       
   664 static void lruListSetFirstSynced(Pager *pPager){
       
   665   pPager->lru.pFirstSynced = pPager->lru.pFirst;
       
   666 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
   667   if( !pPager->memDb ){
       
   668     PgHdr *p;
       
   669     sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
       
   670     for(p=sqlite3LruPageList.pFirst; p && p->needSync; p=p->gfree.pNext);
       
   671     assert(p==pPager->lru.pFirstSynced || p==sqlite3LruPageList.pFirstSynced);
       
   672     sqlite3LruPageList.pFirstSynced = p;
       
   673     sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
       
   674   }
       
   675 #endif
       
   676 }
       
   677 
       
   678 /*
       
   679 ** Return true if page *pPg has already been written to the statement
       
   680 ** journal (or statement snapshot has been created, if *pPg is part
       
   681 ** of an in-memory database).
       
   682 */
       
   683 static int pageInStatement(PgHdr *pPg){
       
   684   Pager *pPager = pPg->pPager;
       
   685   if( MEMDB ){
       
   686     return PGHDR_TO_HIST(pPg, pPager)->inStmt;
       
   687   }else{
       
   688     Pgno pgno = pPg->pgno;
       
   689     u8 *a = pPager->aInStmt;
       
   690     return (a && (int)pgno<=pPager->stmtSize && (a[pgno/8] & (1<<(pgno&7))));
       
   691   }
       
   692 }
       
   693 
       
   694 /*
       
   695 ** Change the size of the pager hash table to N.  N must be a power
       
   696 ** of two.
       
   697 */
       
   698 static void pager_resize_hash_table(Pager *pPager, int N){
       
   699   PgHdr **aHash, *pPg;
       
   700   assert( N>0 && (N&(N-1))==0 );
       
   701   pagerLeave(pPager);
       
   702   sqlite3MallocBenignFailure((int)pPager->aHash);
       
   703   aHash = (PgHdr**)sqlite3MallocZero( sizeof(aHash[0])*N );
       
   704   pagerEnter(pPager);
       
   705   if( aHash==0 ){
       
   706     /* Failure to rehash is not an error.  It is only a performance hit. */
       
   707     return;
       
   708   }
       
   709   sqlite3_free(pPager->aHash);
       
   710   pPager->nHash = N;
       
   711   pPager->aHash = aHash;
       
   712   for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
       
   713     int h;
       
   714     if( pPg->pgno==0 ){
       
   715       assert( pPg->pNextHash==0 && pPg->pPrevHash==0 );
       
   716       continue;
       
   717     }
       
   718     h = pPg->pgno & (N-1);
       
   719     pPg->pNextHash = aHash[h];
       
   720     if( aHash[h] ){
       
   721       aHash[h]->pPrevHash = pPg;
       
   722     }
       
   723     aHash[h] = pPg;
       
   724     pPg->pPrevHash = 0;
       
   725   }
       
   726 }
       
   727 
       
   728 /*
       
   729 ** Read a 32-bit integer from the given file descriptor.  Store the integer
       
   730 ** that is read in *pRes.  Return SQLITE_OK if everything worked, or an
       
   731 ** error code is something goes wrong.
       
   732 **
       
   733 ** All values are stored on disk as big-endian.
       
   734 */
       
   735 static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){
       
   736   unsigned char ac[4];
       
   737   int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset);
       
   738   if( rc==SQLITE_OK ){
       
   739     *pRes = sqlite3Get4byte(ac);
       
   740   }
       
   741   return rc;
       
   742 }
       
   743 
       
   744 /*
       
   745 ** Write a 32-bit integer into a string buffer in big-endian byte order.
       
   746 */
       
   747 #define put32bits(A,B)  sqlite3Put4byte((u8*)A,B)
       
   748 
       
   749 /*
       
   750 ** Write a 32-bit integer into the given file descriptor.  Return SQLITE_OK
       
   751 ** on success or an error code is something goes wrong.
       
   752 */
       
   753 static int write32bits(sqlite3_file *fd, i64 offset, u32 val){
       
   754   char ac[4];
       
   755   put32bits(ac, val);
       
   756   return sqlite3OsWrite(fd, ac, 4, offset);
       
   757 }
       
   758 
       
   759 /*
       
   760 ** If file pFd is open, call sqlite3OsUnlock() on it.
       
   761 */
       
   762 static int osUnlock(sqlite3_file *pFd, int eLock){
       
   763   if( !pFd->isOpen ){
       
   764     return SQLITE_OK;
       
   765   }
       
   766   return sqlite3OsUnlock(pFd, eLock);
       
   767 }
       
   768 
       
   769 /*
       
   770 ** This function determines whether or not the atomic-write optimization
       
   771 ** can be used with this pager. The optimization can be used if:
       
   772 **
       
   773 **  (a) the value returned by OsDeviceCharacteristics() indicates that
       
   774 **      a database page may be written atomically, and
       
   775 **  (b) the value returned by OsSectorSize() is less than or equal
       
   776 **      to the page size.
       
   777 **
       
   778 ** If the optimization cannot be used, 0 is returned. If it can be used,
       
   779 ** then the value returned is the size of the journal file when it
       
   780 ** contains rollback data for exactly one page.
       
   781 */
       
   782 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
       
   783 static int jrnlBufferSize(Pager *pPager){
       
   784   int dc;           /* Device characteristics */
       
   785   int nSector;      /* Sector size */
       
   786   int nPage;        /* Page size */
       
   787   sqlite3_file *fd = pPager->fd;
       
   788 
       
   789   if( fd->pMethods ){
       
   790     dc = sqlite3OsDeviceCharacteristics(fd);
       
   791     nSector = sqlite3OsSectorSize(fd);
       
   792     nPage = pPager->pageSize;
       
   793   }
       
   794 
       
   795   assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
       
   796   assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
       
   797 
       
   798   if( !fd->pMethods || (dc&(SQLITE_IOCAP_ATOMIC|(nPage>>8))&&nSector<=nPage) ){
       
   799     return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager);
       
   800   }
       
   801   return 0;
       
   802 }
       
   803 #endif
       
   804 
       
   805 /*
       
   806 ** This function should be called when an error occurs within the pager
       
   807 ** code. The first argument is a pointer to the pager structure, the
       
   808 ** second the error-code about to be returned by a pager API function. 
       
   809 ** The value returned is a copy of the second argument to this function. 
       
   810 **
       
   811 ** If the second argument is SQLITE_IOERR, SQLITE_CORRUPT, or SQLITE_FULL
       
   812 ** the error becomes persistent. Until the persisten error is cleared,
       
   813 ** subsequent API calls on this Pager will immediately return the same 
       
   814 ** error code.
       
   815 **
       
   816 ** A persistent error indicates that the contents of the pager-cache 
       
   817 ** cannot be trusted. This state can be cleared by completely discarding 
       
   818 ** the contents of the pager-cache. If a transaction was active when
       
   819 ** the persistent error occured, then the rollback journal may need
       
   820 ** to be replayed.
       
   821 */
       
   822 static void pager_unlock(Pager *pPager);
       
   823 static int pager_error(Pager *pPager, int rc){
       
   824   int rc2 = rc & 0xff;
       
   825   assert(
       
   826        pPager->errCode==SQLITE_FULL ||
       
   827        pPager->errCode==SQLITE_OK ||
       
   828        (pPager->errCode & 0xff)==SQLITE_IOERR
       
   829   );
       
   830   if(
       
   831     rc2==SQLITE_FULL ||
       
   832     rc2==SQLITE_IOERR ||
       
   833     rc2==SQLITE_CORRUPT
       
   834   ){
       
   835     pPager->errCode = rc;
       
   836     if( pPager->state==PAGER_UNLOCK && pPager->nRef==0 ){
       
   837       /* If the pager is already unlocked, call pager_unlock() now to
       
   838       ** clear the error state and ensure that the pager-cache is 
       
   839       ** completely empty.
       
   840       */
       
   841       pager_unlock(pPager);
       
   842     }
       
   843   }
       
   844   return rc;
       
   845 }
       
   846 
       
   847 /*
       
   848 ** If SQLITE_CHECK_PAGES is defined then we do some sanity checking
       
   849 ** on the cache using a hash function.  This is used for testing
       
   850 ** and debugging only.
       
   851 */
       
   852 #ifdef SQLITE_CHECK_PAGES
       
   853 /*
       
   854 ** Return a 32-bit hash of the page data for pPage.
       
   855 */
       
   856 static u32 pager_datahash(int nByte, unsigned char *pData){
       
   857   u32 hash = 0;
       
   858   int i;
       
   859   for(i=0; i<nByte; i++){
       
   860     hash = (hash*1039) + pData[i];
       
   861   }
       
   862   return hash;
       
   863 }
       
   864 static u32 pager_pagehash(PgHdr *pPage){
       
   865   return pager_datahash(pPage->pPager->pageSize, 
       
   866                         (unsigned char *)PGHDR_TO_DATA(pPage));
       
   867 }
       
   868 
       
   869 /*
       
   870 ** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
       
   871 ** is defined, and NDEBUG is not defined, an assert() statement checks
       
   872 ** that the page is either dirty or still matches the calculated page-hash.
       
   873 */
       
   874 #define CHECK_PAGE(x) checkPage(x)
       
   875 static void checkPage(PgHdr *pPg){
       
   876   Pager *pPager = pPg->pPager;
       
   877   assert( !pPg->pageHash || pPager->errCode || MEMDB || pPg->dirty || 
       
   878       pPg->pageHash==pager_pagehash(pPg) );
       
   879 }
       
   880 
       
   881 #else
       
   882 #define pager_datahash(X,Y)  0
       
   883 #define pager_pagehash(X)  0
       
   884 #define CHECK_PAGE(x)
       
   885 #endif
       
   886 
       
   887 /*
       
   888 ** When this is called the journal file for pager pPager must be open.
       
   889 ** The master journal file name is read from the end of the file and 
       
   890 ** written into memory supplied by the caller. 
       
   891 **
       
   892 ** zMaster must point to a buffer of at least nMaster bytes allocated by
       
   893 ** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is
       
   894 ** enough space to write the master journal name). If the master journal
       
   895 ** name in the journal is longer than nMaster bytes (including a
       
   896 ** nul-terminator), then this is handled as if no master journal name
       
   897 ** were present in the journal.
       
   898 **
       
   899 ** If no master journal file name is present zMaster[0] is set to 0 and
       
   900 ** SQLITE_OK returned.
       
   901 */
       
   902 static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, int nMaster){
       
   903   int rc;
       
   904   u32 len;
       
   905   i64 szJ;
       
   906   u32 cksum;
       
   907   int i;
       
   908   unsigned char aMagic[8]; /* A buffer to hold the magic header */
       
   909 
       
   910   zMaster[0] = '\0';
       
   911 
       
   912   rc = sqlite3OsFileSize(pJrnl, &szJ);
       
   913   if( rc!=SQLITE_OK || szJ<16 ) return rc;
       
   914 
       
   915   rc = read32bits(pJrnl, szJ-16, &len);
       
   916   if( rc!=SQLITE_OK ) return rc;
       
   917 
       
   918   if( len>=nMaster ){
       
   919     return SQLITE_OK;
       
   920   }
       
   921 
       
   922   rc = read32bits(pJrnl, szJ-12, &cksum);
       
   923   if( rc!=SQLITE_OK ) return rc;
       
   924 
       
   925   rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8);
       
   926   if( rc!=SQLITE_OK || memcmp(aMagic, aJournalMagic, 8) ) return rc;
       
   927 
       
   928   rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len);
       
   929   if( rc!=SQLITE_OK ){
       
   930     return rc;
       
   931   }
       
   932   zMaster[len] = '\0';
       
   933 
       
   934   /* See if the checksum matches the master journal name */
       
   935   for(i=0; i<len; i++){
       
   936     cksum -= zMaster[i];
       
   937    }
       
   938   if( cksum ){
       
   939     /* If the checksum doesn't add up, then one or more of the disk sectors
       
   940     ** containing the master journal filename is corrupted. This means
       
   941     ** definitely roll back, so just return SQLITE_OK and report a (nul)
       
   942     ** master-journal filename.
       
   943     */
       
   944     zMaster[0] = '\0';
       
   945   }
       
   946    
       
   947   return SQLITE_OK;
       
   948 }
       
   949 
       
   950 /*
       
   951 ** Seek the journal file descriptor to the next sector boundary where a
       
   952 ** journal header may be read or written. Pager.journalOff is updated with
       
   953 ** the new seek offset.
       
   954 **
       
   955 ** i.e for a sector size of 512:
       
   956 **
       
   957 ** Input Offset              Output Offset
       
   958 ** ---------------------------------------
       
   959 ** 0                         0
       
   960 ** 512                       512
       
   961 ** 100                       512
       
   962 ** 2000                      2048
       
   963 ** 
       
   964 */
       
   965 static void seekJournalHdr(Pager *pPager){
       
   966   i64 offset = 0;
       
   967   i64 c = pPager->journalOff;
       
   968   if( c ){
       
   969     offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
       
   970   }
       
   971   assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
       
   972   assert( offset>=c );
       
   973   assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
       
   974   pPager->journalOff = offset;
       
   975 }
       
   976 
       
   977 /*
       
   978 ** The journal file must be open when this routine is called. A journal
       
   979 ** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
       
   980 ** current location.
       
   981 **
       
   982 ** The format for the journal header is as follows:
       
   983 ** - 8 bytes: Magic identifying journal format.
       
   984 ** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
       
   985 ** - 4 bytes: Random number used for page hash.
       
   986 ** - 4 bytes: Initial database page count.
       
   987 ** - 4 bytes: Sector size used by the process that wrote this journal.
       
   988 ** 
       
   989 ** Followed by (JOURNAL_HDR_SZ - 24) bytes of unused space.
       
   990 */
       
   991 static int writeJournalHdr(Pager *pPager){
       
   992   char zHeader[sizeof(aJournalMagic)+16];
       
   993   int rc;
       
   994 
       
   995   if( pPager->stmtHdrOff==0 ){
       
   996     pPager->stmtHdrOff = pPager->journalOff;
       
   997   }
       
   998 
       
   999   seekJournalHdr(pPager);
       
  1000   pPager->journalHdr = pPager->journalOff;
       
  1001 
       
  1002   memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
       
  1003 
       
  1004   /* 
       
  1005   ** Write the nRec Field - the number of page records that follow this
       
  1006   ** journal header. Normally, zero is written to this value at this time.
       
  1007   ** After the records are added to the journal (and the journal synced, 
       
  1008   ** if in full-sync mode), the zero is overwritten with the true number
       
  1009   ** of records (see syncJournal()).
       
  1010   **
       
  1011   ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When
       
  1012   ** reading the journal this value tells SQLite to assume that the
       
  1013   ** rest of the journal file contains valid page records. This assumption
       
  1014   ** is dangerous, as if a failure occured whilst writing to the journal
       
  1015   ** file it may contain some garbage data. There are two scenarios
       
  1016   ** where this risk can be ignored:
       
  1017   **
       
  1018   **   * When the pager is in no-sync mode. Corruption can follow a
       
  1019   **     power failure in this case anyway.
       
  1020   **
       
  1021   **   * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees
       
  1022   **     that garbage data is never appended to the journal file.
       
  1023   */
       
  1024   assert(pPager->fd->pMethods||pPager->noSync);
       
  1025   if( (pPager->noSync) 
       
  1026    || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) 
       
  1027   ){
       
  1028     put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff);
       
  1029   }else{
       
  1030     put32bits(&zHeader[sizeof(aJournalMagic)], 0);
       
  1031   }
       
  1032 
       
  1033   /* The random check-hash initialiser */ 
       
  1034   sqlite3Randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
       
  1035   put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
       
  1036   /* The initial database size */
       
  1037   put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbSize);
       
  1038   /* The assumed sector size for this process */
       
  1039   put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
       
  1040   IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, sizeof(zHeader)))
       
  1041   rc = sqlite3OsWrite(pPager->jfd, zHeader, sizeof(zHeader),pPager->journalOff);
       
  1042   pPager->journalOff += JOURNAL_HDR_SZ(pPager);
       
  1043 
       
  1044   /* The journal header has been written successfully. Seek the journal
       
  1045   ** file descriptor to the end of the journal header sector.
       
  1046   */
       
  1047   if( rc==SQLITE_OK ){
       
  1048     IOTRACE(("JTAIL %p %lld\n", pPager, pPager->journalOff-1))
       
  1049     rc = sqlite3OsWrite(pPager->jfd, "\000", 1, pPager->journalOff-1);
       
  1050   }
       
  1051   return rc;
       
  1052 }
       
  1053 
       
  1054 /*
       
  1055 ** The journal file must be open when this is called. A journal header file
       
  1056 ** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
       
  1057 ** file. See comments above function writeJournalHdr() for a description of
       
  1058 ** the journal header format.
       
  1059 **
       
  1060 ** If the header is read successfully, *nRec is set to the number of
       
  1061 ** page records following this header and *dbSize is set to the size of the
       
  1062 ** database before the transaction began, in pages. Also, pPager->cksumInit
       
  1063 ** is set to the value read from the journal header. SQLITE_OK is returned
       
  1064 ** in this case.
       
  1065 **
       
  1066 ** If the journal header file appears to be corrupted, SQLITE_DONE is
       
  1067 ** returned and *nRec and *dbSize are not set.  If JOURNAL_HDR_SZ bytes
       
  1068 ** cannot be read from the journal file an error code is returned.
       
  1069 */
       
  1070 static int readJournalHdr(
       
  1071   Pager *pPager, 
       
  1072   i64 journalSize,
       
  1073   u32 *pNRec, 
       
  1074   u32 *pDbSize
       
  1075 ){
       
  1076   int rc;
       
  1077   unsigned char aMagic[8]; /* A buffer to hold the magic header */
       
  1078   i64 jrnlOff;
       
  1079 
       
  1080   seekJournalHdr(pPager);
       
  1081   if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
       
  1082     return SQLITE_DONE;
       
  1083   }
       
  1084   jrnlOff = pPager->journalOff;
       
  1085 
       
  1086   rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), jrnlOff);
       
  1087   if( rc ) return rc;
       
  1088   jrnlOff += sizeof(aMagic);
       
  1089 
       
  1090   if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
       
  1091     return SQLITE_DONE;
       
  1092   }
       
  1093 
       
  1094   rc = read32bits(pPager->jfd, jrnlOff, pNRec);
       
  1095   if( rc ) return rc;
       
  1096 
       
  1097   rc = read32bits(pPager->jfd, jrnlOff+4, &pPager->cksumInit);
       
  1098   if( rc ) return rc;
       
  1099 
       
  1100   rc = read32bits(pPager->jfd, jrnlOff+8, pDbSize);
       
  1101   if( rc ) return rc;
       
  1102 
       
  1103   /* Update the assumed sector-size to match the value used by 
       
  1104   ** the process that created this journal. If this journal was
       
  1105   ** created by a process other than this one, then this routine
       
  1106   ** is being called from within pager_playback(). The local value
       
  1107   ** of Pager.sectorSize is restored at the end of that routine.
       
  1108   */
       
  1109   rc = read32bits(pPager->jfd, jrnlOff+12, (u32 *)&pPager->sectorSize);
       
  1110   if( rc ) return rc;
       
  1111 
       
  1112   pPager->journalOff += JOURNAL_HDR_SZ(pPager);
       
  1113   return SQLITE_OK;
       
  1114 }
       
  1115 
       
  1116 
       
  1117 /*
       
  1118 ** Write the supplied master journal name into the journal file for pager
       
  1119 ** pPager at the current location. The master journal name must be the last
       
  1120 ** thing written to a journal file. If the pager is in full-sync mode, the
       
  1121 ** journal file descriptor is advanced to the next sector boundary before
       
  1122 ** anything is written. The format is:
       
  1123 **
       
  1124 ** + 4 bytes: PAGER_MJ_PGNO.
       
  1125 ** + N bytes: length of master journal name.
       
  1126 ** + 4 bytes: N
       
  1127 ** + 4 bytes: Master journal name checksum.
       
  1128 ** + 8 bytes: aJournalMagic[].
       
  1129 **
       
  1130 ** The master journal page checksum is the sum of the bytes in the master
       
  1131 ** journal name.
       
  1132 **
       
  1133 ** If zMaster is a NULL pointer (occurs for a single database transaction), 
       
  1134 ** this call is a no-op.
       
  1135 */
       
  1136 static int writeMasterJournal(Pager *pPager, const char *zMaster){
       
  1137   int rc;
       
  1138   int len; 
       
  1139   int i; 
       
  1140   i64 jrnlOff;
       
  1141   u32 cksum = 0;
       
  1142   char zBuf[sizeof(aJournalMagic)+2*4];
       
  1143 
       
  1144   if( !zMaster || pPager->setMaster) return SQLITE_OK;
       
  1145   pPager->setMaster = 1;
       
  1146 
       
  1147   len = strlen(zMaster);
       
  1148   for(i=0; i<len; i++){
       
  1149     cksum += zMaster[i];
       
  1150   }
       
  1151 
       
  1152   /* If in full-sync mode, advance to the next disk sector before writing
       
  1153   ** the master journal name. This is in case the previous page written to
       
  1154   ** the journal has already been synced.
       
  1155   */
       
  1156   if( pPager->fullSync ){
       
  1157     seekJournalHdr(pPager);
       
  1158   }
       
  1159   jrnlOff = pPager->journalOff;
       
  1160   pPager->journalOff += (len+20);
       
  1161 
       
  1162   rc = write32bits(pPager->jfd, jrnlOff, PAGER_MJ_PGNO(pPager));
       
  1163   if( rc!=SQLITE_OK ) return rc;
       
  1164   jrnlOff += 4;
       
  1165 
       
  1166   rc = sqlite3OsWrite(pPager->jfd, zMaster, len, jrnlOff);
       
  1167   if( rc!=SQLITE_OK ) return rc;
       
  1168   jrnlOff += len;
       
  1169 
       
  1170   put32bits(zBuf, len);
       
  1171   put32bits(&zBuf[4], cksum);
       
  1172   memcpy(&zBuf[8], aJournalMagic, sizeof(aJournalMagic));
       
  1173   rc = sqlite3OsWrite(pPager->jfd, zBuf, 8+sizeof(aJournalMagic), jrnlOff);
       
  1174   pPager->needSync = !pPager->noSync;
       
  1175   return rc;
       
  1176 }
       
  1177 
       
  1178 /*
       
  1179 ** Add or remove a page from the list of all pages that are in the
       
  1180 ** statement journal.
       
  1181 **
       
  1182 ** The Pager keeps a separate list of pages that are currently in
       
  1183 ** the statement journal.  This helps the sqlite3PagerStmtCommit()
       
  1184 ** routine run MUCH faster for the common case where there are many
       
  1185 ** pages in memory but only a few are in the statement journal.
       
  1186 */
       
  1187 static void page_add_to_stmt_list(PgHdr *pPg){
       
  1188   Pager *pPager = pPg->pPager;
       
  1189   PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
       
  1190   assert( MEMDB );
       
  1191   if( !pHist->inStmt ){
       
  1192     assert( pHist->pPrevStmt==0 && pHist->pNextStmt==0 );
       
  1193     if( pPager->pStmt ){
       
  1194       PGHDR_TO_HIST(pPager->pStmt, pPager)->pPrevStmt = pPg;
       
  1195     }
       
  1196     pHist->pNextStmt = pPager->pStmt;
       
  1197     pPager->pStmt = pPg;
       
  1198     pHist->inStmt = 1;
       
  1199   }
       
  1200 }
       
  1201 
       
  1202 /*
       
  1203 ** Find a page in the hash table given its page number.  Return
       
  1204 ** a pointer to the page or NULL if not found.
       
  1205 */
       
  1206 static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
       
  1207   PgHdr *p;
       
  1208   if( pPager->aHash==0 ) return 0;
       
  1209   p = pPager->aHash[pgno & (pPager->nHash-1)];
       
  1210   while( p && p->pgno!=pgno ){
       
  1211     p = p->pNextHash;
       
  1212   }
       
  1213   return p;
       
  1214 }
       
  1215 
       
  1216 /*
       
  1217 ** Clear the in-memory cache.  This routine
       
  1218 ** sets the state of the pager back to what it was when it was first
       
  1219 ** opened.  Any outstanding pages are invalidated and subsequent attempts
       
  1220 ** to access those pages will likely result in a coredump.
       
  1221 */
       
  1222 static void pager_reset(Pager *pPager){
       
  1223   PgHdr *pPg, *pNext;
       
  1224   if( pPager->errCode ) return;
       
  1225   for(pPg=pPager->pAll; pPg; pPg=pNext){
       
  1226     IOTRACE(("PGFREE %p %d\n", pPager, pPg->pgno));
       
  1227     PAGER_INCR(sqlite3_pager_pgfree_count);
       
  1228     pNext = pPg->pNextAll;
       
  1229     lruListRemove(pPg);
       
  1230     sqlite3_free(pPg);
       
  1231   }
       
  1232   assert(pPager->lru.pFirst==0);
       
  1233   assert(pPager->lru.pFirstSynced==0);
       
  1234   assert(pPager->lru.pLast==0);
       
  1235   pPager->pStmt = 0;
       
  1236   pPager->pAll = 0;
       
  1237   pPager->pDirty = 0;
       
  1238   pPager->nHash = 0;
       
  1239   sqlite3_free(pPager->aHash);
       
  1240   pPager->nPage = 0;
       
  1241   pPager->aHash = 0;
       
  1242   pPager->nRef = 0;
       
  1243 }
       
  1244 
       
  1245 /*
       
  1246 ** Unlock the database file. 
       
  1247 **
       
  1248 ** If the pager is currently in error state, discard the contents of 
       
  1249 ** the cache and reset the Pager structure internal state. If there is
       
  1250 ** an open journal-file, then the next time a shared-lock is obtained
       
  1251 ** on the pager file (by this or any other process), it will be
       
  1252 ** treated as a hot-journal and rolled back.
       
  1253 */
       
  1254 static void pager_unlock(Pager *pPager){
       
  1255   if( !pPager->exclusiveMode ){
       
  1256     if( !MEMDB ){
       
  1257       if( pPager->fd->isOpen ){
       
  1258         osUnlock(pPager->fd, NO_LOCK);
       
  1259       }
       
  1260       pPager->dbSize = -1;
       
  1261       IOTRACE(("UNLOCK %p\n", pPager))
       
  1262 
       
  1263       /* If Pager.errCode is set, the contents of the pager cache cannot be
       
  1264       ** trusted. Now that the pager file is unlocked, the contents of the
       
  1265       ** cache can be discarded and the error code safely cleared.
       
  1266       */
       
  1267       if( pPager->errCode ){
       
  1268         pPager->errCode = SQLITE_OK;
       
  1269         pager_reset(pPager);
       
  1270         if( pPager->stmtOpen ){
       
  1271           sqlite3OsClose(pPager->stfd);
       
  1272           sqlite3_free(pPager->aInStmt);
       
  1273           pPager->aInStmt = 0;
       
  1274         }
       
  1275         if( pPager->journalOpen ){
       
  1276           sqlite3OsClose(pPager->jfd);
       
  1277           pPager->journalOpen = 0;
       
  1278           sqlite3_free(pPager->aInJournal);
       
  1279           pPager->aInJournal = 0;
       
  1280         }
       
  1281         pPager->stmtOpen = 0;
       
  1282         pPager->stmtInUse = 0;
       
  1283         pPager->journalOff = 0;
       
  1284         pPager->journalStarted = 0;
       
  1285         pPager->stmtAutoopen = 0;
       
  1286         pPager->origDbSize = 0;
       
  1287       }
       
  1288     }
       
  1289 
       
  1290     if( !MEMDB || pPager->errCode==SQLITE_OK ){
       
  1291       pPager->state = PAGER_UNLOCK;
       
  1292       pPager->changeCountDone = 0;
       
  1293     }
       
  1294   }
       
  1295 }
       
  1296 
       
  1297 /*
       
  1298 ** Execute a rollback if a transaction is active and unlock the 
       
  1299 ** database file. If the pager has already entered the error state, 
       
  1300 ** do not attempt the rollback.
       
  1301 */
       
  1302 static void pagerUnlockAndRollback(Pager *p){
       
  1303   assert( p->state>=PAGER_RESERVED || p->journalOpen==0 );
       
  1304   if( p->errCode==SQLITE_OK && p->state>=PAGER_RESERVED ){
       
  1305     sqlite3PagerRollback(p);
       
  1306   }
       
  1307   pager_unlock(p);
       
  1308   assert( p->errCode || !p->journalOpen || (p->exclusiveMode&&!p->journalOff) );
       
  1309   assert( p->errCode || !p->stmtOpen || p->exclusiveMode );
       
  1310 }
       
  1311 
       
  1312 /*
       
  1313 ** This routine ends a transaction.  A transaction is ended by either
       
  1314 ** a COMMIT or a ROLLBACK.
       
  1315 **
       
  1316 ** When this routine is called, the pager has the journal file open and
       
  1317 ** a RESERVED or EXCLUSIVE lock on the database.  This routine will release
       
  1318 ** the database lock and acquires a SHARED lock in its place if that is
       
  1319 ** the appropriate thing to do.  Release locks usually is appropriate,
       
  1320 ** unless we are in exclusive access mode or unless this is a 
       
  1321 ** COMMIT AND BEGIN or ROLLBACK AND BEGIN operation.
       
  1322 **
       
  1323 ** The journal file is either deleted or truncated.
       
  1324 **
       
  1325 ** TODO: Consider keeping the journal file open for temporary databases.
       
  1326 ** This might give a performance improvement on windows where opening
       
  1327 ** a file is an expensive operation.
       
  1328 */
       
  1329 static int pager_end_transaction(Pager *pPager){
       
  1330   PgHdr *pPg;
       
  1331   int rc = SQLITE_OK;
       
  1332   int rc2 = SQLITE_OK;
       
  1333   assert( !MEMDB );
       
  1334   if( pPager->state<PAGER_RESERVED ){
       
  1335     return SQLITE_OK;
       
  1336   }
       
  1337   sqlite3PagerStmtCommit(pPager);
       
  1338   if( pPager->stmtOpen && !pPager->exclusiveMode ){
       
  1339     sqlite3OsClose(pPager->stfd);
       
  1340     pPager->stmtOpen = 0;
       
  1341   }
       
  1342   if( pPager->journalOpen ){
       
  1343     if( pPager->exclusiveMode 
       
  1344           && (rc = sqlite3OsTruncate(pPager->jfd, 0))==SQLITE_OK ){;
       
  1345       pPager->journalOff = 0;
       
  1346       pPager->journalStarted = 0;
       
  1347     }else{
       
  1348       sqlite3OsClose(pPager->jfd);
       
  1349       pPager->journalOpen = 0;
       
  1350       if( rc==SQLITE_OK ){
       
  1351         rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
       
  1352       }
       
  1353     }
       
  1354     sqlite3_free( pPager->aInJournal );
       
  1355     pPager->aInJournal = 0;
       
  1356     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
       
  1357       pPg->inJournal = 0;
       
  1358       pPg->dirty = 0;
       
  1359       pPg->needSync = 0;
       
  1360       pPg->alwaysRollback = 0;
       
  1361 #ifdef SQLITE_CHECK_PAGES
       
  1362       pPg->pageHash = pager_pagehash(pPg);
       
  1363 #endif
       
  1364     }
       
  1365     pPager->pDirty = 0;
       
  1366     pPager->dirtyCache = 0;
       
  1367     pPager->nRec = 0;
       
  1368   }else{
       
  1369     assert( pPager->aInJournal==0 );
       
  1370     assert( pPager->dirtyCache==0 || pPager->useJournal==0 );
       
  1371   }
       
  1372 
       
  1373   if( !pPager->exclusiveMode ){
       
  1374     rc2 = osUnlock(pPager->fd, SHARED_LOCK);
       
  1375     pPager->state = PAGER_SHARED;
       
  1376   }else if( pPager->state==PAGER_SYNCED ){
       
  1377     pPager->state = PAGER_EXCLUSIVE;
       
  1378   }
       
  1379   pPager->origDbSize = 0;
       
  1380   pPager->setMaster = 0;
       
  1381   pPager->needSync = 0;
       
  1382   lruListSetFirstSynced(pPager);
       
  1383   pPager->dbSize = -1;
       
  1384 
       
  1385   return (rc==SQLITE_OK?rc2:rc);
       
  1386 }
       
  1387 
       
  1388 /*
       
  1389 ** Compute and return a checksum for the page of data.
       
  1390 **
       
  1391 ** This is not a real checksum.  It is really just the sum of the 
       
  1392 ** random initial value and the page number.  We experimented with
       
  1393 ** a checksum of the entire data, but that was found to be too slow.
       
  1394 **
       
  1395 ** Note that the page number is stored at the beginning of data and
       
  1396 ** the checksum is stored at the end.  This is important.  If journal
       
  1397 ** corruption occurs due to a power failure, the most likely scenario
       
  1398 ** is that one end or the other of the record will be changed.  It is
       
  1399 ** much less likely that the two ends of the journal record will be
       
  1400 ** correct and the middle be corrupt.  Thus, this "checksum" scheme,
       
  1401 ** though fast and simple, catches the mostly likely kind of corruption.
       
  1402 **
       
  1403 ** FIX ME:  Consider adding every 200th (or so) byte of the data to the
       
  1404 ** checksum.  That way if a single page spans 3 or more disk sectors and
       
  1405 ** only the middle sector is corrupt, we will still have a reasonable
       
  1406 ** chance of failing the checksum and thus detecting the problem.
       
  1407 */
       
  1408 static u32 pager_cksum(Pager *pPager, const u8 *aData){
       
  1409   u32 cksum = pPager->cksumInit;
       
  1410   int i = pPager->pageSize-200;
       
  1411   while( i>0 ){
       
  1412     cksum += aData[i];
       
  1413     i -= 200;
       
  1414   }
       
  1415   return cksum;
       
  1416 }
       
  1417 
       
  1418 /* Forward declaration */
       
  1419 static void makeClean(PgHdr*);
       
  1420 
       
  1421 /*
       
  1422 ** Read a single page from the journal file opened on file descriptor
       
  1423 ** jfd.  Playback this one page.
       
  1424 **
       
  1425 ** If useCksum==0 it means this journal does not use checksums.  Checksums
       
  1426 ** are not used in statement journals because statement journals do not
       
  1427 ** need to survive power failures.
       
  1428 */
       
  1429 static int pager_playback_one_page(
       
  1430   Pager *pPager, 
       
  1431   sqlite3_file *jfd,
       
  1432   i64 offset,
       
  1433   int useCksum
       
  1434 ){
       
  1435   int rc;
       
  1436   PgHdr *pPg;                   /* An existing page in the cache */
       
  1437   Pgno pgno;                    /* The page number of a page in journal */
       
  1438   u32 cksum;                    /* Checksum used for sanity checking */
       
  1439   u8 *aData = (u8 *)pPager->pTmpSpace;   /* Temp storage for a page */
       
  1440 
       
  1441   /* useCksum should be true for the main journal and false for
       
  1442   ** statement journals.  Verify that this is always the case
       
  1443   */
       
  1444   assert( jfd == (useCksum ? pPager->jfd : pPager->stfd) );
       
  1445   assert( aData );
       
  1446 
       
  1447   rc = read32bits(jfd, offset, &pgno);
       
  1448   if( rc!=SQLITE_OK ) return rc;
       
  1449   rc = sqlite3OsRead(jfd, aData, pPager->pageSize, offset+4);
       
  1450   if( rc!=SQLITE_OK ) return rc;
       
  1451   pPager->journalOff += pPager->pageSize + 4;
       
  1452 
       
  1453   /* Sanity checking on the page.  This is more important that I originally
       
  1454   ** thought.  If a power failure occurs while the journal is being written,
       
  1455   ** it could cause invalid data to be written into the journal.  We need to
       
  1456   ** detect this invalid data (with high probability) and ignore it.
       
  1457   */
       
  1458   if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
       
  1459     return SQLITE_DONE;
       
  1460   }
       
  1461   if( pgno>(unsigned)pPager->dbSize ){
       
  1462     return SQLITE_OK;
       
  1463   }
       
  1464   if( useCksum ){
       
  1465     rc = read32bits(jfd, offset+pPager->pageSize+4, &cksum);
       
  1466     if( rc ) return rc;
       
  1467     pPager->journalOff += 4;
       
  1468     if( pager_cksum(pPager, aData)!=cksum ){
       
  1469       return SQLITE_DONE;
       
  1470     }
       
  1471   }
       
  1472 
       
  1473   assert( pPager->state==PAGER_RESERVED || pPager->state>=PAGER_EXCLUSIVE );
       
  1474 
       
  1475   /* If the pager is in RESERVED state, then there must be a copy of this
       
  1476   ** page in the pager cache. In this case just update the pager cache,
       
  1477   ** not the database file. The page is left marked dirty in this case.
       
  1478   **
       
  1479   ** An exception to the above rule: If the database is in no-sync mode
       
  1480   ** and a page is moved during an incremental vacuum then the page may
       
  1481   ** not be in the pager cache. Later: if a malloc() or IO error occurs
       
  1482   ** during a Movepage() call, then the page may not be in the cache
       
  1483   ** either. So the condition described in the above paragraph is not
       
  1484   ** assert()able.
       
  1485   **
       
  1486   ** If in EXCLUSIVE state, then we update the pager cache if it exists
       
  1487   ** and the main file. The page is then marked not dirty.
       
  1488   **
       
  1489   ** Ticket #1171:  The statement journal might contain page content that is
       
  1490   ** different from the page content at the start of the transaction.
       
  1491   ** This occurs when a page is changed prior to the start of a statement
       
  1492   ** then changed again within the statement.  When rolling back such a
       
  1493   ** statement we must not write to the original database unless we know
       
  1494   ** for certain that original page contents are synced into the main rollback
       
  1495   ** journal.  Otherwise, a power loss might leave modified data in the
       
  1496   ** database file without an entry in the rollback journal that can
       
  1497   ** restore the database to its original form.  Two conditions must be
       
  1498   ** met before writing to the database files. (1) the database must be
       
  1499   ** locked.  (2) we know that the original page content is fully synced
       
  1500   ** in the main journal either because the page is not in cache or else
       
  1501   ** the page is marked as needSync==0.
       
  1502   */
       
  1503   pPg = pager_lookup(pPager, pgno);
       
  1504   PAGERTRACE4("PLAYBACK %d page %d hash(%08x)\n",
       
  1505                PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, aData));
       
  1506   if( pPager->state>=PAGER_EXCLUSIVE && (pPg==0 || pPg->needSync==0) ){
       
  1507     i64 offset = (pgno-1)*(i64)pPager->pageSize;
       
  1508     rc = sqlite3OsWrite(pPager->fd, aData, pPager->pageSize, offset);
       
  1509     if( pPg ){
       
  1510       makeClean(pPg);
       
  1511     }
       
  1512   }
       
  1513   if( pPg ){
       
  1514     /* No page should ever be explicitly rolled back that is in use, except
       
  1515     ** for page 1 which is held in use in order to keep the lock on the
       
  1516     ** database active. However such a page may be rolled back as a result
       
  1517     ** of an internal error resulting in an automatic call to
       
  1518     ** sqlite3PagerRollback().
       
  1519     */
       
  1520     void *pData;
       
  1521     /* assert( pPg->nRef==0 || pPg->pgno==1 ); */
       
  1522     pData = PGHDR_TO_DATA(pPg);
       
  1523     memcpy(pData, aData, pPager->pageSize);
       
  1524     if( pPager->xReiniter ){
       
  1525       pPager->xReiniter(pPg, pPager->pageSize);
       
  1526     }
       
  1527 #ifdef SQLITE_CHECK_PAGES
       
  1528     pPg->pageHash = pager_pagehash(pPg);
       
  1529 #endif
       
  1530     /* If this was page 1, then restore the value of Pager.dbFileVers.
       
  1531     ** Do this before any decoding. */
       
  1532     if( pgno==1 ){
       
  1533       memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers));
       
  1534     }
       
  1535 
       
  1536     /* Decode the page just read from disk */
       
  1537     CODEC1(pPager, pData, pPg->pgno, 3);
       
  1538   }
       
  1539   return rc;
       
  1540 }
       
  1541 
       
  1542 /*
       
  1543 ** Parameter zMaster is the name of a master journal file. A single journal
       
  1544 ** file that referred to the master journal file has just been rolled back.
       
  1545 ** This routine checks if it is possible to delete the master journal file,
       
  1546 ** and does so if it is.
       
  1547 **
       
  1548 ** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not 
       
  1549 ** available for use within this function.
       
  1550 **
       
  1551 **
       
  1552 ** The master journal file contains the names of all child journals.
       
  1553 ** To tell if a master journal can be deleted, check to each of the
       
  1554 ** children.  If all children are either missing or do not refer to
       
  1555 ** a different master journal, then this master journal can be deleted.
       
  1556 */
       
  1557 static int pager_delmaster(Pager *pPager, const char *zMaster){
       
  1558   sqlite3_vfs *pVfs = pPager->pVfs;
       
  1559   int rc;
       
  1560   int master_open = 0;
       
  1561   sqlite3_file *pMaster;
       
  1562   sqlite3_file *pJournal;
       
  1563   char *zMasterJournal = 0; /* Contents of master journal file */
       
  1564   i64 nMasterJournal;       /* Size of master journal file */
       
  1565 
       
  1566   /* Open the master journal file exclusively in case some other process
       
  1567   ** is running this routine also. Not that it makes too much difference.
       
  1568   */
       
  1569   pMaster = (sqlite3_file *)sqlite3_malloc(pVfs->szOsFile * 2);
       
  1570   pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile);
       
  1571   if( !pMaster ){
       
  1572     rc = SQLITE_NOMEM;
       
  1573   }else{
       
  1574     int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL);
       
  1575     rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0);
       
  1576   }
       
  1577   if( rc!=SQLITE_OK ) goto delmaster_out;
       
  1578   master_open = 1;
       
  1579 
       
  1580   rc = sqlite3OsFileSize(pMaster, &nMasterJournal);
       
  1581   if( rc!=SQLITE_OK ) goto delmaster_out;
       
  1582 
       
  1583   if( nMasterJournal>0 ){
       
  1584     char *zJournal;
       
  1585     char *zMasterPtr = 0;
       
  1586     int nMasterPtr = pPager->pVfs->mxPathname+1;
       
  1587 
       
  1588     /* Load the entire master journal file into space obtained from
       
  1589     ** sqlite3_malloc() and pointed to by zMasterJournal. 
       
  1590     */
       
  1591     zMasterJournal = (char *)sqlite3_malloc(nMasterJournal + nMasterPtr);
       
  1592     if( !zMasterJournal ){
       
  1593       rc = SQLITE_NOMEM;
       
  1594       goto delmaster_out;
       
  1595     }
       
  1596     zMasterPtr = &zMasterJournal[nMasterJournal];
       
  1597     rc = sqlite3OsRead(pMaster, zMasterJournal, nMasterJournal, 0);
       
  1598     if( rc!=SQLITE_OK ) goto delmaster_out;
       
  1599 
       
  1600     zJournal = zMasterJournal;
       
  1601     while( (zJournal-zMasterJournal)<nMasterJournal ){
       
  1602       if( sqlite3OsAccess(pVfs, zJournal, SQLITE_ACCESS_EXISTS) ){
       
  1603         /* One of the journals pointed to by the master journal exists.
       
  1604         ** Open it and check if it points at the master journal. If
       
  1605         ** so, return without deleting the master journal file.
       
  1606         */
       
  1607         int c;
       
  1608         int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL);
       
  1609         rc = sqlite3OsOpen(pVfs, zJournal, pJournal, flags, 0);
       
  1610         if( rc!=SQLITE_OK ){
       
  1611           goto delmaster_out;
       
  1612         }
       
  1613 
       
  1614         rc = readMasterJournal(pJournal, zMasterPtr, nMasterPtr);
       
  1615         sqlite3OsClose(pJournal);
       
  1616         if( rc!=SQLITE_OK ){
       
  1617           goto delmaster_out;
       
  1618         }
       
  1619 
       
  1620         c = zMasterPtr[0]!=0 && strcmp(zMasterPtr, zMaster)==0;
       
  1621         if( c ){
       
  1622           /* We have a match. Do not delete the master journal file. */
       
  1623           goto delmaster_out;
       
  1624         }
       
  1625       }
       
  1626       zJournal += (strlen(zJournal)+1);
       
  1627     }
       
  1628   }
       
  1629   
       
  1630   rc = sqlite3OsDelete(pVfs, zMaster, 0);
       
  1631 
       
  1632 delmaster_out:
       
  1633   if( zMasterJournal ){
       
  1634     sqlite3_free(zMasterJournal);
       
  1635   }  
       
  1636   if( master_open ){
       
  1637     sqlite3OsClose(pMaster);
       
  1638   }
       
  1639   sqlite3_free(pMaster);
       
  1640   return rc;
       
  1641 }
       
  1642 
       
  1643 
       
  1644 static void pager_truncate_cache(Pager *pPager);
       
  1645 
       
  1646 /*
       
  1647 ** Truncate the main file of the given pager to the number of pages
       
  1648 ** indicated. Also truncate the cached representation of the file.
       
  1649 **
       
  1650 ** Might might be the case that the file on disk is smaller than nPage.
       
  1651 ** This can happen, for example, if we are in the middle of a transaction
       
  1652 ** which has extended the file size and the new pages are still all held
       
  1653 ** in cache, then an INSERT or UPDATE does a statement rollback.  Some
       
  1654 ** operating system implementations can get confused if you try to
       
  1655 ** truncate a file to some size that is larger than it currently is,
       
  1656 ** so detect this case and do not do the truncation.
       
  1657 */
       
  1658 static int pager_truncate(Pager *pPager, int nPage){
       
  1659   int rc = SQLITE_OK;
       
  1660   if( pPager->state>=PAGER_EXCLUSIVE && pPager->fd->isOpen ){
       
  1661     i64 currentSize, newSize;
       
  1662     rc = sqlite3OsFileSize(pPager->fd, &currentSize);
       
  1663     newSize = pPager->pageSize*(i64)nPage;
       
  1664     if( rc==SQLITE_OK && currentSize>newSize ){
       
  1665       rc = sqlite3OsTruncate(pPager->fd, newSize);
       
  1666     }
       
  1667   }
       
  1668   if( rc==SQLITE_OK ){
       
  1669     pPager->dbSize = nPage;
       
  1670     pager_truncate_cache(pPager);
       
  1671   }
       
  1672   return rc;
       
  1673 }
       
  1674 
       
  1675 /*
       
  1676 ** Set the sectorSize for the given pager.
       
  1677 **
       
  1678 ** The sector size is the larger of the sector size reported
       
  1679 ** by sqlite3OsSectorSize() and the pageSize.
       
  1680 */
       
  1681 static void setSectorSize(Pager *pPager){
       
  1682   assert(pPager->fd->pMethods||pPager->tempFile);
       
  1683   if( !pPager->tempFile ){
       
  1684     /* Sector size doesn't matter for temporary files. Also, the file
       
  1685     ** may not have been opened yet, in whcih case the OsSectorSize()
       
  1686     ** call will segfault.
       
  1687     */
       
  1688     pPager->sectorSize = sqlite3OsSectorSize(pPager->fd);
       
  1689   }
       
  1690   if( pPager->sectorSize<pPager->pageSize ){
       
  1691     pPager->sectorSize = pPager->pageSize;
       
  1692   }
       
  1693 }
       
  1694 
       
  1695 /*
       
  1696 ** Playback the journal and thus restore the database file to
       
  1697 ** the state it was in before we started making changes.  
       
  1698 **
       
  1699 ** The journal file format is as follows: 
       
  1700 **
       
  1701 **  (1)  8 byte prefix.  A copy of aJournalMagic[].
       
  1702 **  (2)  4 byte big-endian integer which is the number of valid page records
       
  1703 **       in the journal.  If this value is 0xffffffff, then compute the
       
  1704 **       number of page records from the journal size.
       
  1705 **  (3)  4 byte big-endian integer which is the initial value for the 
       
  1706 **       sanity checksum.
       
  1707 **  (4)  4 byte integer which is the number of pages to truncate the
       
  1708 **       database to during a rollback.
       
  1709 **  (5)  4 byte integer which is the number of bytes in the master journal
       
  1710 **       name.  The value may be zero (indicate that there is no master
       
  1711 **       journal.)
       
  1712 **  (6)  N bytes of the master journal name.  The name will be nul-terminated
       
  1713 **       and might be shorter than the value read from (5).  If the first byte
       
  1714 **       of the name is \000 then there is no master journal.  The master
       
  1715 **       journal name is stored in UTF-8.
       
  1716 **  (7)  Zero or more pages instances, each as follows:
       
  1717 **        +  4 byte page number.
       
  1718 **        +  pPager->pageSize bytes of data.
       
  1719 **        +  4 byte checksum
       
  1720 **
       
  1721 ** When we speak of the journal header, we mean the first 6 items above.
       
  1722 ** Each entry in the journal is an instance of the 7th item.
       
  1723 **
       
  1724 ** Call the value from the second bullet "nRec".  nRec is the number of
       
  1725 ** valid page entries in the journal.  In most cases, you can compute the
       
  1726 ** value of nRec from the size of the journal file.  But if a power
       
  1727 ** failure occurred while the journal was being written, it could be the
       
  1728 ** case that the size of the journal file had already been increased but
       
  1729 ** the extra entries had not yet made it safely to disk.  In such a case,
       
  1730 ** the value of nRec computed from the file size would be too large.  For
       
  1731 ** that reason, we always use the nRec value in the header.
       
  1732 **
       
  1733 ** If the nRec value is 0xffffffff it means that nRec should be computed
       
  1734 ** from the file size.  This value is used when the user selects the
       
  1735 ** no-sync option for the journal.  A power failure could lead to corruption
       
  1736 ** in this case.  But for things like temporary table (which will be
       
  1737 ** deleted when the power is restored) we don't care.  
       
  1738 **
       
  1739 ** If the file opened as the journal file is not a well-formed
       
  1740 ** journal file then all pages up to the first corrupted page are rolled
       
  1741 ** back (or no pages if the journal header is corrupted). The journal file
       
  1742 ** is then deleted and SQLITE_OK returned, just as if no corruption had
       
  1743 ** been encountered.
       
  1744 **
       
  1745 ** If an I/O or malloc() error occurs, the journal-file is not deleted
       
  1746 ** and an error code is returned.
       
  1747 */
       
  1748 static int pager_playback(Pager *pPager, int isHot){
       
  1749   sqlite3_vfs *pVfs = pPager->pVfs;
       
  1750   i64 szJ;                 /* Size of the journal file in bytes */
       
  1751   u32 nRec;                /* Number of Records in the journal */
       
  1752   int i;                   /* Loop counter */
       
  1753   Pgno mxPg = 0;           /* Size of the original file in pages */
       
  1754   int rc;                  /* Result code of a subroutine */
       
  1755   char *zMaster = 0;       /* Name of master journal file if any */
       
  1756 
       
  1757   /* Figure out how many records are in the journal.  Abort early if
       
  1758   ** the journal is empty.
       
  1759   */
       
  1760   assert( pPager->journalOpen );
       
  1761   rc = sqlite3OsFileSize(pPager->jfd, &szJ);
       
  1762   if( rc!=SQLITE_OK || szJ==0 ){
       
  1763     goto end_playback;
       
  1764   }
       
  1765 
       
  1766   /* Read the master journal name from the journal, if it is present.
       
  1767   ** If a master journal file name is specified, but the file is not
       
  1768   ** present on disk, then the journal is not hot and does not need to be
       
  1769   ** played back.
       
  1770   */
       
  1771   zMaster = pPager->pTmpSpace;
       
  1772   rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
       
  1773   assert( rc!=SQLITE_DONE );
       
  1774   if( rc!=SQLITE_OK 
       
  1775    || (zMaster[0] && !sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS)) 
       
  1776   ){
       
  1777     zMaster = 0;
       
  1778     if( rc==SQLITE_DONE ) rc = SQLITE_OK;
       
  1779     goto end_playback;
       
  1780   }
       
  1781   pPager->journalOff = 0;
       
  1782   zMaster = 0;
       
  1783 
       
  1784   /* This loop terminates either when the readJournalHdr() call returns
       
  1785   ** SQLITE_DONE or an IO error occurs. */
       
  1786   while( 1 ){
       
  1787 
       
  1788     /* Read the next journal header from the journal file.  If there are
       
  1789     ** not enough bytes left in the journal file for a complete header, or
       
  1790     ** it is corrupted, then a process must of failed while writing it.
       
  1791     ** This indicates nothing more needs to be rolled back.
       
  1792     */
       
  1793     rc = readJournalHdr(pPager, szJ, &nRec, &mxPg);
       
  1794     if( rc!=SQLITE_OK ){ 
       
  1795       if( rc==SQLITE_DONE ){
       
  1796         rc = SQLITE_OK;
       
  1797       }
       
  1798       goto end_playback;
       
  1799     }
       
  1800 
       
  1801     /* If nRec is 0xffffffff, then this journal was created by a process
       
  1802     ** working in no-sync mode. This means that the rest of the journal
       
  1803     ** file consists of pages, there are no more journal headers. Compute
       
  1804     ** the value of nRec based on this assumption.
       
  1805     */
       
  1806     if( nRec==0xffffffff ){
       
  1807       assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
       
  1808       nRec = (szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager);
       
  1809     }
       
  1810 
       
  1811     /* If nRec is 0 and this rollback is of a transaction created by this
       
  1812     ** process and if this is the final header in the journal, then it means
       
  1813     ** that this part of the journal was being filled but has not yet been
       
  1814     ** synced to disk.  Compute the number of pages based on the remaining
       
  1815     ** size of the file.
       
  1816     **
       
  1817     ** The third term of the test was added to fix ticket #2565.
       
  1818     */
       
  1819     if( nRec==0 && !isHot &&
       
  1820         pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){
       
  1821       nRec = (szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager);
       
  1822     }
       
  1823 
       
  1824     /* If this is the first header read from the journal, truncate the
       
  1825     ** database file back to its original size.
       
  1826     */
       
  1827     if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
       
  1828       rc = pager_truncate(pPager, mxPg);
       
  1829       if( rc!=SQLITE_OK ){
       
  1830         goto end_playback;
       
  1831       }
       
  1832     }
       
  1833 
       
  1834     /* Copy original pages out of the journal and back into the database file.
       
  1835     */
       
  1836     for(i=0; i<nRec; i++){
       
  1837       rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
       
  1838       if( rc!=SQLITE_OK ){
       
  1839         if( rc==SQLITE_DONE ){
       
  1840           rc = SQLITE_OK;
       
  1841           pPager->journalOff = szJ;
       
  1842           break;
       
  1843         }else{
       
  1844           goto end_playback;
       
  1845         }
       
  1846       }
       
  1847     }
       
  1848   }
       
  1849   /*NOTREACHED*/
       
  1850   assert( 0 );
       
  1851 
       
  1852 end_playback:
       
  1853   if( rc==SQLITE_OK ){
       
  1854     zMaster = pPager->pTmpSpace;
       
  1855     rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
       
  1856   }
       
  1857   if( rc==SQLITE_OK ){
       
  1858     rc = pager_end_transaction(pPager);
       
  1859   }
       
  1860   if( rc==SQLITE_OK && zMaster[0] ){
       
  1861     /* If there was a master journal and this routine will return success,
       
  1862     ** see if it is possible to delete the master journal.
       
  1863     */
       
  1864     rc = pager_delmaster(pPager, zMaster);
       
  1865   }
       
  1866 
       
  1867   /* The Pager.sectorSize variable may have been updated while rolling
       
  1868   ** back a journal created by a process with a different sector size
       
  1869   ** value. Reset it to the correct value for this process.
       
  1870   */
       
  1871   setSectorSize(pPager);
       
  1872   return rc;
       
  1873 }
       
  1874 
       
  1875 /*
       
  1876 ** Playback the statement journal.
       
  1877 **
       
  1878 ** This is similar to playing back the transaction journal but with
       
  1879 ** a few extra twists.
       
  1880 **
       
  1881 **    (1)  The number of pages in the database file at the start of
       
  1882 **         the statement is stored in pPager->stmtSize, not in the
       
  1883 **         journal file itself.
       
  1884 **
       
  1885 **    (2)  In addition to playing back the statement journal, also
       
  1886 **         playback all pages of the transaction journal beginning
       
  1887 **         at offset pPager->stmtJSize.
       
  1888 */
       
  1889 static int pager_stmt_playback(Pager *pPager){
       
  1890   i64 szJ;                 /* Size of the full journal */
       
  1891   i64 hdrOff;
       
  1892   int nRec;                /* Number of Records */
       
  1893   int i;                   /* Loop counter */
       
  1894   int rc;
       
  1895 
       
  1896   szJ = pPager->journalOff;
       
  1897 #ifndef NDEBUG 
       
  1898   {
       
  1899     i64 os_szJ;
       
  1900     rc = sqlite3OsFileSize(pPager->jfd, &os_szJ);
       
  1901     if( rc!=SQLITE_OK ) return rc;
       
  1902     assert( szJ==os_szJ );
       
  1903   }
       
  1904 #endif
       
  1905 
       
  1906   /* Set hdrOff to be the offset just after the end of the last journal
       
  1907   ** page written before the first journal-header for this statement
       
  1908   ** transaction was written, or the end of the file if no journal
       
  1909   ** header was written.
       
  1910   */
       
  1911   hdrOff = pPager->stmtHdrOff;
       
  1912   assert( pPager->fullSync || !hdrOff );
       
  1913   if( !hdrOff ){
       
  1914     hdrOff = szJ;
       
  1915   }
       
  1916   
       
  1917   /* Truncate the database back to its original size.
       
  1918   */
       
  1919   rc = pager_truncate(pPager, pPager->stmtSize);
       
  1920   assert( pPager->state>=PAGER_SHARED );
       
  1921 
       
  1922   /* Figure out how many records are in the statement journal.
       
  1923   */
       
  1924   assert( pPager->stmtInUse && pPager->journalOpen );
       
  1925   nRec = pPager->stmtNRec;
       
  1926   
       
  1927   /* Copy original pages out of the statement journal and back into the
       
  1928   ** database file.  Note that the statement journal omits checksums from
       
  1929   ** each record since power-failure recovery is not important to statement
       
  1930   ** journals.
       
  1931   */
       
  1932   for(i=0; i<nRec; i++){
       
  1933     i64 offset = i*(4+pPager->pageSize);
       
  1934     rc = pager_playback_one_page(pPager, pPager->stfd, offset, 0);
       
  1935     assert( rc!=SQLITE_DONE );
       
  1936     if( rc!=SQLITE_OK ) goto end_stmt_playback;
       
  1937   }
       
  1938 
       
  1939   /* Now roll some pages back from the transaction journal. Pager.stmtJSize
       
  1940   ** was the size of the journal file when this statement was started, so
       
  1941   ** everything after that needs to be rolled back, either into the
       
  1942   ** database, the memory cache, or both.
       
  1943   **
       
  1944   ** If it is not zero, then Pager.stmtHdrOff is the offset to the start
       
  1945   ** of the first journal header written during this statement transaction.
       
  1946   */
       
  1947   pPager->journalOff = pPager->stmtJSize;
       
  1948   pPager->cksumInit = pPager->stmtCksum;
       
  1949   while( pPager->journalOff < hdrOff ){
       
  1950     rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
       
  1951     assert( rc!=SQLITE_DONE );
       
  1952     if( rc!=SQLITE_OK ) goto end_stmt_playback;
       
  1953   }
       
  1954 
       
  1955   while( pPager->journalOff < szJ ){
       
  1956     u32 nJRec;         /* Number of Journal Records */
       
  1957     u32 dummy;
       
  1958     rc = readJournalHdr(pPager, szJ, &nJRec, &dummy);
       
  1959     if( rc!=SQLITE_OK ){
       
  1960       assert( rc!=SQLITE_DONE );
       
  1961       goto end_stmt_playback;
       
  1962     }
       
  1963     if( nJRec==0 ){
       
  1964       nJRec = (szJ - pPager->journalOff) / (pPager->pageSize+8);
       
  1965     }
       
  1966     for(i=nJRec-1; i>=0 && pPager->journalOff < szJ; i--){
       
  1967       rc = pager_playback_one_page(pPager, pPager->jfd, pPager->journalOff, 1);
       
  1968       assert( rc!=SQLITE_DONE );
       
  1969       if( rc!=SQLITE_OK ) goto end_stmt_playback;
       
  1970     }
       
  1971   }
       
  1972 
       
  1973   pPager->journalOff = szJ;
       
  1974   
       
  1975 end_stmt_playback:
       
  1976   if( rc==SQLITE_OK) {
       
  1977     pPager->journalOff = szJ;
       
  1978     /* pager_reload_cache(pPager); */
       
  1979   }
       
  1980   return rc;
       
  1981 }
       
  1982 
       
  1983 /*
       
  1984 ** Change the maximum number of in-memory pages that are allowed.
       
  1985 */
       
  1986 void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){
       
  1987   if( mxPage>10 ){
       
  1988     pPager->mxPage = mxPage;
       
  1989   }else{
       
  1990     pPager->mxPage = 10;
       
  1991   }
       
  1992 }
       
  1993 
       
  1994 /*
       
  1995 ** Adjust the robustness of the database to damage due to OS crashes
       
  1996 ** or power failures by changing the number of syncs()s when writing
       
  1997 ** the rollback journal.  There are three levels:
       
  1998 **
       
  1999 **    OFF       sqlite3OsSync() is never called.  This is the default
       
  2000 **              for temporary and transient files.
       
  2001 **
       
  2002 **    NORMAL    The journal is synced once before writes begin on the
       
  2003 **              database.  This is normally adequate protection, but
       
  2004 **              it is theoretically possible, though very unlikely,
       
  2005 **              that an inopertune power failure could leave the journal
       
  2006 **              in a state which would cause damage to the database
       
  2007 **              when it is rolled back.
       
  2008 **
       
  2009 **    FULL      The journal is synced twice before writes begin on the
       
  2010 **              database (with some additional information - the nRec field
       
  2011 **              of the journal header - being written in between the two
       
  2012 **              syncs).  If we assume that writing a
       
  2013 **              single disk sector is atomic, then this mode provides
       
  2014 **              assurance that the journal will not be corrupted to the
       
  2015 **              point of causing damage to the database during rollback.
       
  2016 **
       
  2017 ** Numeric values associated with these states are OFF==1, NORMAL=2,
       
  2018 ** and FULL=3.
       
  2019 */
       
  2020 #ifndef SQLITE_OMIT_PAGER_PRAGMAS
       
  2021 void sqlite3PagerSetSafetyLevel(Pager *pPager, int level, int full_fsync){
       
  2022   pPager->noSync =  level==1 || pPager->tempFile;
       
  2023   pPager->fullSync = level==3 && !pPager->tempFile;
       
  2024   pPager->sync_flags = (full_fsync?SQLITE_SYNC_FULL:SQLITE_SYNC_NORMAL);
       
  2025   if( pPager->noSync ) pPager->needSync = 0;
       
  2026 }
       
  2027 #endif
       
  2028 
       
  2029 /*
       
  2030 ** The following global variable is incremented whenever the library
       
  2031 ** attempts to open a temporary file.  This information is used for
       
  2032 ** testing and analysis only.  
       
  2033 */
       
  2034 #ifdef SQLITE_TEST
       
  2035 int sqlite3_opentemp_count = 0;
       
  2036 #endif
       
  2037 
       
  2038 /*
       
  2039 ** Open a temporary file. 
       
  2040 **
       
  2041 ** Write the file descriptor into *fd.  Return SQLITE_OK on success or some
       
  2042 ** other error code if we fail. The OS will automatically delete the temporary
       
  2043 ** file when it is closed.
       
  2044 */
       
  2045 static int sqlite3PagerOpentemp(
       
  2046   sqlite3_vfs *pVfs,    /* The virtual file system layer */
       
  2047   sqlite3_file *pFile,  /* Write the file descriptor here */
       
  2048   char *zFilename,      /* Name of the file.  Might be NULL */
       
  2049   int vfsFlags          /* Flags passed through to the VFS */
       
  2050 ){
       
  2051   int rc;
       
  2052   assert( zFilename!=0 );
       
  2053 
       
  2054 #ifdef SQLITE_TEST
       
  2055   sqlite3_opentemp_count++;  /* Used for testing and analysis only */
       
  2056 #endif
       
  2057 
       
  2058   vfsFlags |=  SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE |
       
  2059             SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE;
       
  2060   rc = sqlite3OsOpen(pVfs, zFilename, pFile, vfsFlags, 0);
       
  2061   assert( rc!=SQLITE_OK || pFile->pMethods );
       
  2062   return rc;
       
  2063 }
       
  2064 
       
  2065 
       
  2066 //void fopenTest()
       
  2067 //{
       
  2068 //	     FILE *fp = fopen("c:\\data\\redfivelabs\\temp\\sqlite.log", "w+");
       
  2069 //  if (fp != NULL)
       
  2070 //  {
       
  2071 //	  char tmp[256];
       
  2072 //	  sprintf(tmp, "Hallo Welt");
       
  2073 //	  fwrite(tmp, strlen(tmp), 1, fp);
       
  2074 //	  fclose(fp);
       
  2075 //  }
       
  2076 //  return 191280;
       
  2077 //}
       
  2078 
       
  2079 /*
       
  2080 ** Create a new page cache and put a pointer to the page cache in *ppPager.
       
  2081 ** The file to be cached need not exist.  The file is not locked until
       
  2082 ** the first call to sqlite3PagerGet() and is only held open until the
       
  2083 ** last page is released using sqlite3PagerUnref().
       
  2084 **
       
  2085 ** If zFilename is NULL then a randomly-named temporary file is created
       
  2086 ** and used as the file to be cached.  The file will be deleted
       
  2087 ** automatically when it is closed.
       
  2088 **
       
  2089 ** If zFilename is ":memory:" then all information is held in cache.
       
  2090 ** It is never written to disk.  This can be used to implement an
       
  2091 ** in-memory database.
       
  2092 */
       
  2093 int sqlite3PagerOpen(
       
  2094   sqlite3_vfs *pVfs,       /* The virtual file system to use */
       
  2095   Pager **ppPager,         /* Return the Pager structure here */
       
  2096   const char *zFilename,   /* Name of the database file to open */
       
  2097   int nExtra,              /* Extra bytes append to each in-memory page */
       
  2098   int flags,               /* flags controlling this file */
       
  2099   int vfsFlags             /* flags passed through to sqlite3_vfs.xOpen() */
       
  2100 ){
       
  2101   u8 *pPtr;
       
  2102   Pager *pPager = 0;
       
  2103   int rc = SQLITE_OK;
       
  2104   int i;
       
  2105   int tempFile = 0;
       
  2106   int memDb = 0;
       
  2107   int readOnly = 0;
       
  2108   int useJournal = (flags & PAGER_OMIT_JOURNAL)==0;
       
  2109   int noReadlock = (flags & PAGER_NO_READLOCK)!=0;
       
  2110   int journalFileSize = sqlite3JournalSize(pVfs);
       
  2111   int nDefaultPage = SQLITE_DEFAULT_PAGE_SIZE;
       
  2112   char *zPathname;
       
  2113   int nPathname;
       
  2114 
       
  2115 
       
  2116   /* The default return is a NULL pointer */
       
  2117   *ppPager = 0;
       
  2118 
       
  2119   /* Compute the full pathname */
       
  2120   nPathname = pVfs->mxPathname+1;
       
  2121   zPathname = (char*)sqlite3_malloc(nPathname);
       
  2122   if( zPathname==0 ){
       
  2123     return SQLITE_NOMEM;
       
  2124   }
       
  2125   if( zFilename && zFilename[0] ){
       
  2126 #ifndef SQLITE_OMIT_MEMORYDB
       
  2127     if( strcmp(zFilename,":memory:")==0 ){
       
  2128       memDb = 1;
       
  2129       zPathname[0] = 0;
       
  2130     }else
       
  2131 #endif
       
  2132     {
       
  2133 
       
  2134       rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname);
       
  2135     }
       
  2136   }else{
       
  2137     rc = sqlite3OsGetTempname(pVfs, nPathname, zPathname);
       
  2138   }
       
  2139   if( rc!=SQLITE_OK ){
       
  2140     sqlite3_free(zPathname);
       
  2141     return rc;
       
  2142   }
       
  2143 
       
  2144  nPathname = strlen(zPathname);
       
  2145 
       
  2146   /* Allocate memory for the pager structure */
       
  2147   pPager = (Pager*)sqlite3MallocZero(
       
  2148     sizeof(*pPager) +           /* Pager structure */
       
  2149     journalFileSize +           /* The journal file structure */ 
       
  2150     pVfs->szOsFile * 2 +        /* The db and stmt journal files */ 
       
  2151     4*nPathname + 40            /* zFilename, zDirectory, zJournal, zStmtJrnl */
       
  2152   );
       
  2153   if( !pPager ){
       
  2154     sqlite3_free(zPathname);
       
  2155     return SQLITE_NOMEM;
       
  2156   }
       
  2157   pPtr = (u8 *)&pPager[1];
       
  2158   pPager->vfsFlags = vfsFlags;
       
  2159   pPager->fd = (sqlite3_file*)&pPtr[pVfs->szOsFile*0];
       
  2160   pPager->stfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*1];
       
  2161   pPager->jfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*2];
       
  2162   pPager->zFilename = (char*)&pPtr[pVfs->szOsFile*2+journalFileSize];
       
  2163   pPager->zDirectory = &pPager->zFilename[nPathname+1];
       
  2164   pPager->zJournal = &pPager->zDirectory[nPathname+1];
       
  2165   pPager->zStmtJrnl = &pPager->zJournal[nPathname+10];
       
  2166   pPager->pVfs = pVfs;
       
  2167   memcpy(pPager->zFilename, zPathname, nPathname+1);
       
  2168   sqlite3_free(zPathname);
       
  2169 
       
  2170 
       
  2171   /* Open the pager file.
       
  2172   */
       
  2173   if( zFilename && zFilename[0] && !memDb ){
       
  2174     if( nPathname>(pVfs->mxPathname - sizeof("-journal")) ){
       
  2175       rc = SQLITE_CANTOPEN;
       
  2176     }else{
       
  2177       int fout = 0;
       
  2178 
       
  2179 		rc = winOpen(pVfs, pPager->zFilename, pPager->fd,
       
  2180                          pPager->vfsFlags, &fout);
       
  2181       readOnly = (fout&SQLITE_OPEN_READONLY);
       
  2182 
       
  2183       /* If the file was successfully opened for read/write access,
       
  2184       ** choose a default page size in case we have to create the
       
  2185       ** database file. The default page size is the maximum of:
       
  2186       **
       
  2187       **    + SQLITE_DEFAULT_PAGE_SIZE,
       
  2188       **    + The value returned by sqlite3OsSectorSize()
       
  2189       **    + The largest page size that can be written atomically.
       
  2190       */
       
  2191       if( rc==SQLITE_OK && !readOnly ){
       
  2192         int iSectorSize = sqlite3OsSectorSize(pPager->fd);
       
  2193         if( nDefaultPage<iSectorSize ){
       
  2194           nDefaultPage = iSectorSize;
       
  2195         }
       
  2196 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
       
  2197         {
       
  2198           int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
       
  2199           int ii;
       
  2200           assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
       
  2201           assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
       
  2202           assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536);
       
  2203           for(ii=nDefaultPage; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){
       
  2204             if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ) nDefaultPage = ii;
       
  2205           }
       
  2206         }
       
  2207 #endif
       
  2208         if( nDefaultPage>SQLITE_MAX_DEFAULT_PAGE_SIZE ){
       
  2209           nDefaultPage = SQLITE_MAX_DEFAULT_PAGE_SIZE;
       
  2210         }
       
  2211       }
       
  2212     }
       
  2213   }else if( !memDb ){
       
  2214     /* If a temporary file is requested, it is not opened immediately.
       
  2215     ** In this case we accept the default page size and delay actually
       
  2216     ** opening the file until the first call to OsWrite().
       
  2217     */ 
       
  2218     tempFile = 1;
       
  2219     pPager->state = PAGER_EXCLUSIVE;
       
  2220   }
       
  2221 
       
  2222   if( pPager && rc==SQLITE_OK ){
       
  2223     pPager->pTmpSpace = (char *)sqlite3_malloc(nDefaultPage);
       
  2224   }
       
  2225 
       
  2226   /* If an error occured in either of the blocks above.
       
  2227   ** Free the Pager structure and close the file.
       
  2228   ** Since the pager is not allocated there is no need to set 
       
  2229   ** any Pager.errMask variables.
       
  2230   */
       
  2231   if( !pPager || !pPager->pTmpSpace ){
       
  2232     sqlite3OsClose(pPager->fd);
       
  2233     sqlite3_free(pPager);
       
  2234     return ((rc==SQLITE_OK)?SQLITE_NOMEM:rc);
       
  2235   }
       
  2236 
       
  2237   PAGERTRACE3("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename);
       
  2238   IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename))
       
  2239 
       
  2240   /* Fill in Pager.zDirectory[] */
       
  2241   memcpy(pPager->zDirectory, pPager->zFilename, nPathname+1);
       
  2242   for(i=strlen(pPager->zDirectory); i>0 && pPager->zDirectory[i-1]!='/'; i--){}
       
  2243   if( i>0 ) pPager->zDirectory[i-1] = 0;
       
  2244 
       
  2245   /* Fill in Pager.zJournal[] and Pager.zStmtJrnl[] */
       
  2246   memcpy(pPager->zJournal, pPager->zFilename, nPathname);
       
  2247   memcpy(&pPager->zJournal[nPathname], "-journal", 9);
       
  2248   memcpy(pPager->zStmtJrnl, pPager->zFilename, nPathname);
       
  2249   memcpy(&pPager->zStmtJrnl[nPathname], "-stmtjrnl", 10);
       
  2250 
       
  2251   /* pPager->journalOpen = 0; */
       
  2252   pPager->useJournal = useJournal && !memDb;
       
  2253   pPager->noReadlock = noReadlock && readOnly;
       
  2254   /* pPager->stmtOpen = 0; */
       
  2255   /* pPager->stmtInUse = 0; */
       
  2256   /* pPager->nRef = 0; */
       
  2257   pPager->dbSize = memDb-1;
       
  2258   pPager->pageSize = nDefaultPage;
       
  2259   /* pPager->stmtSize = 0; */
       
  2260   /* pPager->stmtJSize = 0; */
       
  2261   /* pPager->nPage = 0; */
       
  2262   pPager->mxPage = 100;
       
  2263   pPager->mxPgno = SQLITE_MAX_PAGE_COUNT;
       
  2264   /* pPager->state = PAGER_UNLOCK; */
       
  2265   assert( pPager->state == (tempFile ? PAGER_EXCLUSIVE : PAGER_UNLOCK) );
       
  2266   /* pPager->errMask = 0; */
       
  2267   pPager->tempFile = tempFile;
       
  2268   assert( tempFile==PAGER_LOCKINGMODE_NORMAL 
       
  2269           || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE );
       
  2270   assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
       
  2271   pPager->exclusiveMode = tempFile; 
       
  2272   pPager->memDb = memDb;
       
  2273   pPager->readOnly = readOnly;
       
  2274   /* pPager->needSync = 0; */
       
  2275   pPager->noSync = pPager->tempFile || !useJournal;
       
  2276   pPager->fullSync = (pPager->noSync?0:1);
       
  2277   pPager->sync_flags = SQLITE_SYNC_NORMAL;
       
  2278   /* pPager->pFirst = 0; */
       
  2279   /* pPager->pFirstSynced = 0; */
       
  2280   /* pPager->pLast = 0; */
       
  2281   pPager->nExtra = FORCE_ALIGNMENT(nExtra);
       
  2282   assert(pPager->fd->pMethods||memDb||tempFile);
       
  2283   if( !memDb ){
       
  2284     setSectorSize(pPager);
       
  2285   }
       
  2286   /* pPager->pBusyHandler = 0; */
       
  2287   /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
       
  2288   *ppPager = pPager;
       
  2289 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
  2290   pPager->iInUseMM = 0;
       
  2291   pPager->iInUseDB = 0;
       
  2292   if( !memDb ){
       
  2293     sqlite3_mutex *mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MEM2);
       
  2294     sqlite3_mutex_enter(mutex);
       
  2295     pPager->pNext = sqlite3PagerList;
       
  2296     if( sqlite3PagerList ){
       
  2297       assert( sqlite3PagerList->pPrev==0 );
       
  2298       sqlite3PagerList->pPrev = pPager;
       
  2299     }
       
  2300     pPager->pPrev = 0;
       
  2301     sqlite3PagerList = pPager;
       
  2302     sqlite3_mutex_leave(mutex);
       
  2303   }
       
  2304 #endif
       
  2305   return SQLITE_OK;
       
  2306 }
       
  2307 
       
  2308 /*
       
  2309 ** Set the busy handler function.
       
  2310 */
       
  2311 void sqlite3PagerSetBusyhandler(Pager *pPager, BusyHandler *pBusyHandler){
       
  2312   pPager->pBusyHandler = pBusyHandler;
       
  2313 }
       
  2314 
       
  2315 /*
       
  2316 ** Set the destructor for this pager.  If not NULL, the destructor is called
       
  2317 ** when the reference count on each page reaches zero.  The destructor can
       
  2318 ** be used to clean up information in the extra segment appended to each page.
       
  2319 **
       
  2320 ** The destructor is not called as a result sqlite3PagerClose().  
       
  2321 ** Destructors are only called by sqlite3PagerUnref().
       
  2322 */
       
  2323 void sqlite3PagerSetDestructor(Pager *pPager, void (*xDesc)(DbPage*,int)){
       
  2324   pPager->xDestructor = xDesc;
       
  2325 }
       
  2326 
       
  2327 /*
       
  2328 ** Set the reinitializer for this pager.  If not NULL, the reinitializer
       
  2329 ** is called when the content of a page in cache is restored to its original
       
  2330 ** value as a result of a rollback.  The callback gives higher-level code
       
  2331 ** an opportunity to restore the EXTRA section to agree with the restored
       
  2332 ** page data.
       
  2333 */
       
  2334 void sqlite3PagerSetReiniter(Pager *pPager, void (*xReinit)(DbPage*,int)){
       
  2335   pPager->xReiniter = xReinit;
       
  2336 }
       
  2337 
       
  2338 /*
       
  2339 ** Set the page size to *pPageSize. If the suggest new page size is
       
  2340 ** inappropriate, then an alternative page size is set to that
       
  2341 ** value before returning.
       
  2342 */
       
  2343 int sqlite3PagerSetPagesize(Pager *pPager, u16 *pPageSize){
       
  2344   int rc = SQLITE_OK;
       
  2345   u16 pageSize = *pPageSize;
       
  2346   assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) );
       
  2347   if( pageSize && pageSize!=pPager->pageSize 
       
  2348    && !pPager->memDb && pPager->nRef==0 
       
  2349   ){
       
  2350     char *pNew = (char *)sqlite3_malloc(pageSize);
       
  2351     if( !pNew ){
       
  2352       rc = SQLITE_NOMEM;
       
  2353     }else{
       
  2354       pagerEnter(pPager);
       
  2355       pager_reset(pPager);
       
  2356       pPager->pageSize = pageSize;
       
  2357       setSectorSize(pPager);
       
  2358       sqlite3_free(pPager->pTmpSpace);
       
  2359       pPager->pTmpSpace = pNew;
       
  2360       pagerLeave(pPager);
       
  2361     }
       
  2362   }
       
  2363   *pPageSize = pPager->pageSize;
       
  2364   return rc;
       
  2365 }
       
  2366 
       
  2367 /*
       
  2368 ** Return a pointer to the "temporary page" buffer held internally
       
  2369 ** by the pager.  This is a buffer that is big enough to hold the
       
  2370 ** entire content of a database page.  This buffer is used internally
       
  2371 ** during rollback and will be overwritten whenever a rollback
       
  2372 ** occurs.  But other modules are free to use it too, as long as
       
  2373 ** no rollbacks are happening.
       
  2374 */
       
  2375 void *sqlite3PagerTempSpace(Pager *pPager){
       
  2376   return pPager->pTmpSpace;
       
  2377 }
       
  2378 
       
  2379 /*
       
  2380 ** Attempt to set the maximum database page count if mxPage is positive. 
       
  2381 ** Make no changes if mxPage is zero or negative.  And never reduce the
       
  2382 ** maximum page count below the current size of the database.
       
  2383 **
       
  2384 ** Regardless of mxPage, return the current maximum page count.
       
  2385 */
       
  2386 int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){
       
  2387   if( mxPage>0 ){
       
  2388     pPager->mxPgno = mxPage;
       
  2389   }
       
  2390   sqlite3PagerPagecount(pPager);
       
  2391   return pPager->mxPgno;
       
  2392 }
       
  2393 
       
  2394 /*
       
  2395 ** The following set of routines are used to disable the simulated
       
  2396 ** I/O error mechanism.  These routines are used to avoid simulated
       
  2397 ** errors in places where we do not care about errors.
       
  2398 **
       
  2399 ** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops
       
  2400 ** and generate no code.
       
  2401 */
       
  2402 #ifdef SQLITE_TEST
       
  2403 extern int sqlite3_io_error_pending;
       
  2404 extern int sqlite3_io_error_hit;
       
  2405 static int saved_cnt;
       
  2406 void disable_simulated_io_errors(void){
       
  2407   saved_cnt = sqlite3_io_error_pending;
       
  2408   sqlite3_io_error_pending = -1;
       
  2409 }
       
  2410 void enable_simulated_io_errors(void){
       
  2411   sqlite3_io_error_pending = saved_cnt;
       
  2412 }
       
  2413 #else
       
  2414 # define disable_simulated_io_errors()
       
  2415 # define enable_simulated_io_errors()
       
  2416 #endif
       
  2417 
       
  2418 /*
       
  2419 ** Read the first N bytes from the beginning of the file into memory
       
  2420 ** that pDest points to. 
       
  2421 **
       
  2422 ** No error checking is done. The rational for this is that this function 
       
  2423 ** may be called even if the file does not exist or contain a header. In 
       
  2424 ** these cases sqlite3OsRead() will return an error, to which the correct 
       
  2425 ** response is to zero the memory at pDest and continue.  A real IO error 
       
  2426 ** will presumably recur and be picked up later (Todo: Think about this).
       
  2427 */
       
  2428 int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){
       
  2429   int rc = SQLITE_OK;
       
  2430   memset(pDest, 0, N);
       
  2431   assert(MEMDB||pPager->fd->pMethods||pPager->tempFile);
       
  2432   if( pPager->fd->isOpen ){
       
  2433     IOTRACE(("DBHDR %p 0 %d\n", pPager, N))
       
  2434     rc = sqlite3OsRead(pPager->fd, pDest, N, 0);
       
  2435     if( rc==SQLITE_IOERR_SHORT_READ ){
       
  2436       rc = SQLITE_OK;
       
  2437     }
       
  2438   }
       
  2439   return rc;
       
  2440 }
       
  2441 
       
  2442 /*
       
  2443 ** Return the total number of pages in the disk file associated with
       
  2444 ** pPager. 
       
  2445 **
       
  2446 ** If the PENDING_BYTE lies on the page directly after the end of the
       
  2447 ** file, then consider this page part of the file too. For example, if
       
  2448 ** PENDING_BYTE is byte 4096 (the first byte of page 5) and the size of the
       
  2449 ** file is 4096 bytes, 5 is returned instead of 4.
       
  2450 */
       
  2451 int sqlite3PagerPagecount(Pager *pPager){
       
  2452   i64 n = 0;
       
  2453   int rc;
       
  2454   assert( pPager!=0 );
       
  2455   if( pPager->errCode ){
       
  2456     return 0;
       
  2457   }
       
  2458   if( pPager->dbSize>=0 ){
       
  2459     n = pPager->dbSize;
       
  2460   } else {
       
  2461     assert(pPager->fd->pMethods||pPager->tempFile);
       
  2462     if( (pPager->fd->isOpen)
       
  2463      && (rc = sqlite3OsFileSize(pPager->fd, &n))!=SQLITE_OK ){
       
  2464       pPager->nRef++;
       
  2465       pager_error(pPager, rc);
       
  2466       pPager->nRef--;
       
  2467       return 0;
       
  2468     }
       
  2469     if( n>0 && n<pPager->pageSize ){
       
  2470       n = 1;
       
  2471     }else{
       
  2472       n /= pPager->pageSize;
       
  2473     }
       
  2474     if( pPager->state!=PAGER_UNLOCK ){
       
  2475       pPager->dbSize = n;
       
  2476     }
       
  2477   }
       
  2478   if( n==(PENDING_BYTE/pPager->pageSize) ){
       
  2479     n++;
       
  2480   }
       
  2481   if( n>pPager->mxPgno ){
       
  2482     pPager->mxPgno = n;
       
  2483   }
       
  2484   return n;
       
  2485 }
       
  2486 
       
  2487 
       
  2488 #ifndef SQLITE_OMIT_MEMORYDB
       
  2489 /*
       
  2490 ** Clear a PgHistory block
       
  2491 */
       
  2492 static void clearHistory(PgHistory *pHist){
       
  2493   sqlite3_free(pHist->pOrig);
       
  2494   sqlite3_free(pHist->pStmt);
       
  2495   pHist->pOrig = 0;
       
  2496   pHist->pStmt = 0;
       
  2497 }
       
  2498 #else
       
  2499 #define clearHistory(x)
       
  2500 #endif
       
  2501 
       
  2502 /*
       
  2503 ** Forward declaration
       
  2504 */
       
  2505 static int syncJournal(Pager*);
       
  2506 
       
  2507 /*
       
  2508 ** Unlink pPg from its hash chain. Also set the page number to 0 to indicate
       
  2509 ** that the page is not part of any hash chain. This is required because the
       
  2510 ** sqlite3PagerMovepage() routine can leave a page in the 
       
  2511 ** pNextFree/pPrevFree list that is not a part of any hash-chain.
       
  2512 */
       
  2513 static void unlinkHashChain(Pager *pPager, PgHdr *pPg){
       
  2514   if( pPg->pgno==0 ){
       
  2515     assert( pPg->pNextHash==0 && pPg->pPrevHash==0 );
       
  2516     return;
       
  2517   }
       
  2518   if( pPg->pNextHash ){
       
  2519     pPg->pNextHash->pPrevHash = pPg->pPrevHash;
       
  2520   }
       
  2521   if( pPg->pPrevHash ){
       
  2522     assert( pPager->aHash[pPg->pgno & (pPager->nHash-1)]!=pPg );
       
  2523     pPg->pPrevHash->pNextHash = pPg->pNextHash;
       
  2524   }else{
       
  2525     int h = pPg->pgno & (pPager->nHash-1);
       
  2526     pPager->aHash[h] = pPg->pNextHash;
       
  2527   }
       
  2528   if( MEMDB ){
       
  2529     clearHistory(PGHDR_TO_HIST(pPg, pPager));
       
  2530   }
       
  2531   pPg->pgno = 0;
       
  2532   pPg->pNextHash = pPg->pPrevHash = 0;
       
  2533 }
       
  2534 
       
  2535 /*
       
  2536 ** Unlink a page from the free list (the list of all pages where nRef==0)
       
  2537 ** and from its hash collision chain.
       
  2538 */
       
  2539 static void unlinkPage(PgHdr *pPg){
       
  2540   Pager *pPager = pPg->pPager;
       
  2541 
       
  2542   /* Unlink from free page list */
       
  2543   lruListRemove(pPg);
       
  2544 
       
  2545   /* Unlink from the pgno hash table */
       
  2546   unlinkHashChain(pPager, pPg);
       
  2547 }
       
  2548 
       
  2549 /*
       
  2550 ** This routine is used to truncate the cache when a database
       
  2551 ** is truncated.  Drop from the cache all pages whose pgno is
       
  2552 ** larger than pPager->dbSize and is unreferenced.
       
  2553 **
       
  2554 ** Referenced pages larger than pPager->dbSize are zeroed.
       
  2555 **
       
  2556 ** Actually, at the point this routine is called, it would be
       
  2557 ** an error to have a referenced page.  But rather than delete
       
  2558 ** that page and guarantee a subsequent segfault, it seems better
       
  2559 ** to zero it and hope that we error out sanely.
       
  2560 */
       
  2561 static void pager_truncate_cache(Pager *pPager){
       
  2562   PgHdr *pPg;
       
  2563   PgHdr **ppPg;
       
  2564   int dbSize = pPager->dbSize;
       
  2565 
       
  2566   ppPg = &pPager->pAll;
       
  2567   while( (pPg = *ppPg)!=0 ){
       
  2568     if( pPg->pgno<=dbSize ){
       
  2569       ppPg = &pPg->pNextAll;
       
  2570     }else if( pPg->nRef>0 ){
       
  2571       memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
       
  2572       ppPg = &pPg->pNextAll;
       
  2573     }else{
       
  2574       *ppPg = pPg->pNextAll;
       
  2575       IOTRACE(("PGFREE %p %d\n", pPager, pPg->pgno));
       
  2576       PAGER_INCR(sqlite3_pager_pgfree_count);
       
  2577       unlinkPage(pPg);
       
  2578       makeClean(pPg);
       
  2579       sqlite3_free(pPg);
       
  2580       pPager->nPage--;
       
  2581     }
       
  2582   }
       
  2583 }
       
  2584 
       
  2585 /*
       
  2586 ** Try to obtain a lock on a file.  Invoke the busy callback if the lock
       
  2587 ** is currently not available.  Repeat until the busy callback returns
       
  2588 ** false or until the lock succeeds.
       
  2589 **
       
  2590 ** Return SQLITE_OK on success and an error code if we cannot obtain
       
  2591 ** the lock.
       
  2592 */
       
  2593 static int pager_wait_on_lock(Pager *pPager, int locktype){
       
  2594   int rc;
       
  2595 
       
  2596   /* The OS lock values must be the same as the Pager lock values */
       
  2597   assert( PAGER_SHARED==SHARED_LOCK );
       
  2598   assert( PAGER_RESERVED==RESERVED_LOCK );
       
  2599   assert( PAGER_EXCLUSIVE==EXCLUSIVE_LOCK );
       
  2600 
       
  2601   /* If the file is currently unlocked then the size must be unknown */
       
  2602   assert( pPager->state>=PAGER_SHARED || pPager->dbSize<0 || MEMDB );
       
  2603 
       
  2604   if( pPager->state>=locktype ){
       
  2605     rc = SQLITE_OK;
       
  2606   }else{
       
  2607     do {
       
  2608       rc = sqlite3OsLock(pPager->fd, locktype);
       
  2609     }while( rc==SQLITE_BUSY && sqlite3InvokeBusyHandler(pPager->pBusyHandler) );
       
  2610     if( rc==SQLITE_OK ){
       
  2611       pPager->state = locktype;
       
  2612       IOTRACE(("LOCK %p %d\n", pPager, locktype))
       
  2613     }
       
  2614   }
       
  2615   return rc;
       
  2616 }
       
  2617 
       
  2618 /*
       
  2619 ** Truncate the file to the number of pages specified.
       
  2620 */
       
  2621 int sqlite3PagerTruncate(Pager *pPager, Pgno nPage){
       
  2622   int rc;
       
  2623   assert( pPager->state>=PAGER_SHARED || MEMDB );
       
  2624   sqlite3PagerPagecount(pPager);
       
  2625   if( pPager->errCode ){
       
  2626     rc = pPager->errCode;
       
  2627     return rc;
       
  2628   }
       
  2629   if( nPage>=(unsigned)pPager->dbSize ){
       
  2630     return SQLITE_OK;
       
  2631   }
       
  2632   if( MEMDB ){
       
  2633     pPager->dbSize = nPage;
       
  2634     pager_truncate_cache(pPager);
       
  2635     return SQLITE_OK;
       
  2636   }
       
  2637   pagerEnter(pPager);
       
  2638   rc = syncJournal(pPager);
       
  2639   pagerLeave(pPager);
       
  2640   if( rc!=SQLITE_OK ){
       
  2641     return rc;
       
  2642   }
       
  2643 
       
  2644   /* Get an exclusive lock on the database before truncating. */
       
  2645   pagerEnter(pPager);
       
  2646   rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
       
  2647   pagerLeave(pPager);
       
  2648   if( rc!=SQLITE_OK ){
       
  2649     return rc;
       
  2650   }
       
  2651 
       
  2652   rc = pager_truncate(pPager, nPage);
       
  2653   return rc;
       
  2654 }
       
  2655 
       
  2656 /*
       
  2657 ** Shutdown the page cache.  Free all memory and close all files.
       
  2658 **
       
  2659 ** If a transaction was in progress when this routine is called, that
       
  2660 ** transaction is rolled back.  All outstanding pages are invalidated
       
  2661 ** and their memory is freed.  Any attempt to use a page associated
       
  2662 ** with this page cache after this function returns will likely
       
  2663 ** result in a coredump.
       
  2664 **
       
  2665 ** This function always succeeds. If a transaction is active an attempt
       
  2666 ** is made to roll it back. If an error occurs during the rollback 
       
  2667 ** a hot journal may be left in the filesystem but no error is returned
       
  2668 ** to the caller.
       
  2669 */
       
  2670 int sqlite3PagerClose(Pager *pPager){
       
  2671 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
  2672   if( !MEMDB ){
       
  2673     sqlite3_mutex *mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MEM2);
       
  2674     sqlite3_mutex_enter(mutex);
       
  2675     if( pPager->pPrev ){
       
  2676       pPager->pPrev->pNext = pPager->pNext;
       
  2677     }else{
       
  2678       sqlite3PagerList = pPager->pNext;
       
  2679     }
       
  2680     if( pPager->pNext ){
       
  2681       pPager->pNext->pPrev = pPager->pPrev;
       
  2682     }
       
  2683     sqlite3_mutex_leave(mutex);
       
  2684   }
       
  2685 #endif
       
  2686 
       
  2687   disable_simulated_io_errors();
       
  2688   pPager->errCode = 0;
       
  2689   pPager->exclusiveMode = 0;
       
  2690   pager_reset(pPager);
       
  2691   pagerUnlockAndRollback(pPager);
       
  2692   enable_simulated_io_errors();
       
  2693   PAGERTRACE2("CLOSE %d\n", PAGERID(pPager));
       
  2694   IOTRACE(("CLOSE %p\n", pPager))
       
  2695   assert( pPager->errCode || (pPager->journalOpen==0 && pPager->stmtOpen==0) );
       
  2696   if( pPager->journalOpen ){
       
  2697     sqlite3OsClose(pPager->jfd);
       
  2698   }
       
  2699   sqlite3_free(pPager->aInJournal);
       
  2700   if( pPager->stmtOpen ){
       
  2701     sqlite3OsClose(pPager->stfd);
       
  2702   }
       
  2703   sqlite3OsClose(pPager->fd);
       
  2704   /* Temp files are automatically deleted by the OS
       
  2705   ** if( pPager->tempFile ){
       
  2706   **   sqlite3OsDelete(pPager->zFilename);
       
  2707   ** }
       
  2708   */
       
  2709 
       
  2710   sqlite3_free(pPager->aHash);
       
  2711   sqlite3_free(pPager->pTmpSpace);
       
  2712   sqlite3_free(pPager);
       
  2713   return SQLITE_OK;
       
  2714 }
       
  2715 
       
  2716 #if !defined(NDEBUG) || defined(SQLITE_TEST)
       
  2717 /*
       
  2718 ** Return the page number for the given page data.
       
  2719 */
       
  2720 Pgno sqlite3PagerPagenumber(DbPage *p){
       
  2721   return p->pgno;
       
  2722 }
       
  2723 #endif
       
  2724 
       
  2725 /*
       
  2726 ** The page_ref() function increments the reference count for a page.
       
  2727 ** If the page is currently on the freelist (the reference count is zero) then
       
  2728 ** remove it from the freelist.
       
  2729 **
       
  2730 ** For non-test systems, page_ref() is a macro that calls _page_ref()
       
  2731 ** online of the reference count is zero.  For test systems, page_ref()
       
  2732 ** is a real function so that we can set breakpoints and trace it.
       
  2733 */
       
  2734 static void _page_ref(PgHdr *pPg){
       
  2735   if( pPg->nRef==0 ){
       
  2736     /* The page is currently on the freelist.  Remove it. */
       
  2737     lruListRemove(pPg);
       
  2738     pPg->pPager->nRef++;
       
  2739   }
       
  2740   pPg->nRef++;
       
  2741   REFINFO(pPg);
       
  2742 }
       
  2743 #ifdef SQLITE_DEBUG
       
  2744   static void page_ref(PgHdr *pPg){
       
  2745     if( pPg->nRef==0 ){
       
  2746       _page_ref(pPg);
       
  2747     }else{
       
  2748       pPg->nRef++;
       
  2749       REFINFO(pPg);
       
  2750     }
       
  2751   }
       
  2752 #else
       
  2753 # define page_ref(P)   ((P)->nRef==0?_page_ref(P):(void)(P)->nRef++)
       
  2754 #endif
       
  2755 
       
  2756 /*
       
  2757 ** Increment the reference count for a page.  The input pointer is
       
  2758 ** a reference to the page data.
       
  2759 */
       
  2760 int sqlite3PagerRef(DbPage *pPg){
       
  2761   pagerEnter(pPg->pPager);
       
  2762   page_ref(pPg);
       
  2763   pagerLeave(pPg->pPager);
       
  2764   return SQLITE_OK;
       
  2765 }
       
  2766 
       
  2767 /*
       
  2768 ** Sync the journal.  In other words, make sure all the pages that have
       
  2769 ** been written to the journal have actually reached the surface of the
       
  2770 ** disk.  It is not safe to modify the original database file until after
       
  2771 ** the journal has been synced.  If the original database is modified before
       
  2772 ** the journal is synced and a power failure occurs, the unsynced journal
       
  2773 ** data would be lost and we would be unable to completely rollback the
       
  2774 ** database changes.  Database corruption would occur.
       
  2775 ** 
       
  2776 ** This routine also updates the nRec field in the header of the journal.
       
  2777 ** (See comments on the pager_playback() routine for additional information.)
       
  2778 ** If the sync mode is FULL, two syncs will occur.  First the whole journal
       
  2779 ** is synced, then the nRec field is updated, then a second sync occurs.
       
  2780 **
       
  2781 ** For temporary databases, we do not care if we are able to rollback
       
  2782 ** after a power failure, so no sync occurs.
       
  2783 **
       
  2784 ** If the IOCAP_SEQUENTIAL flag is set for the persistent media on which
       
  2785 ** the database is stored, then OsSync() is never called on the journal
       
  2786 ** file. In this case all that is required is to update the nRec field in
       
  2787 ** the journal header.
       
  2788 **
       
  2789 ** This routine clears the needSync field of every page current held in
       
  2790 ** memory.
       
  2791 */
       
  2792 static int syncJournal(Pager *pPager){
       
  2793   PgHdr *pPg;
       
  2794   int rc = SQLITE_OK;
       
  2795 
       
  2796 
       
  2797   /* Sync the journal before modifying the main database
       
  2798   ** (assuming there is a journal and it needs to be synced.)
       
  2799   */
       
  2800   if( pPager->needSync ){
       
  2801     if( !pPager->tempFile ){
       
  2802       int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
       
  2803       assert( pPager->journalOpen );
       
  2804 
       
  2805       /* assert( !pPager->noSync ); // noSync might be set if synchronous
       
  2806       ** was turned off after the transaction was started.  Ticket #615 */
       
  2807 #ifndef NDEBUG
       
  2808       {
       
  2809         /* Make sure the pPager->nRec counter we are keeping agrees
       
  2810         ** with the nRec computed from the size of the journal file.
       
  2811         */
       
  2812         i64 jSz;
       
  2813         rc = sqlite3OsFileSize(pPager->jfd, &jSz);
       
  2814         if( rc!=0 ) return rc;
       
  2815         assert( pPager->journalOff==jSz );
       
  2816       }
       
  2817 #endif
       
  2818       if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
       
  2819         /* Write the nRec value into the journal file header. If in
       
  2820         ** full-synchronous mode, sync the journal first. This ensures that
       
  2821         ** all data has really hit the disk before nRec is updated to mark
       
  2822         ** it as a candidate for rollback.
       
  2823         **
       
  2824         ** This is not required if the persistent media supports the
       
  2825         ** SAFE_APPEND property. Because in this case it is not possible 
       
  2826         ** for garbage data to be appended to the file, the nRec field
       
  2827         ** is populated with 0xFFFFFFFF when the journal header is written
       
  2828         ** and never needs to be updated.
       
  2829         */
       
  2830         i64 jrnlOff;
       
  2831         if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
       
  2832           PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
       
  2833           IOTRACE(("JSYNC %p\n", pPager))
       
  2834           rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags);
       
  2835           if( rc!=0 ) return rc;
       
  2836         }
       
  2837 
       
  2838         jrnlOff = pPager->journalHdr + sizeof(aJournalMagic);
       
  2839         IOTRACE(("JHDR %p %lld %d\n", pPager, jrnlOff, 4));
       
  2840         rc = write32bits(pPager->jfd, jrnlOff, pPager->nRec);
       
  2841         if( rc ) return rc;
       
  2842       }
       
  2843       if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
       
  2844         PAGERTRACE2("SYNC journal of %d\n", PAGERID(pPager));
       
  2845         IOTRACE(("JSYNC %p\n", pPager))
       
  2846         rc = sqlite3OsSync(pPager->jfd, pPager->sync_flags| 
       
  2847           (pPager->sync_flags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0)
       
  2848         );
       
  2849         if( rc!=0 ) return rc;
       
  2850       }
       
  2851       pPager->journalStarted = 1;
       
  2852     }
       
  2853     pPager->needSync = 0;
       
  2854 
       
  2855     /* Erase the needSync flag from every page.
       
  2856     */
       
  2857     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
       
  2858       pPg->needSync = 0;
       
  2859     }
       
  2860     lruListSetFirstSynced(pPager);
       
  2861   }
       
  2862 
       
  2863 #ifndef NDEBUG
       
  2864   /* If the Pager.needSync flag is clear then the PgHdr.needSync
       
  2865   ** flag must also be clear for all pages.  Verify that this
       
  2866   ** invariant is true.
       
  2867   */
       
  2868   else{
       
  2869     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
       
  2870       assert( pPg->needSync==0 );
       
  2871     }
       
  2872     assert( pPager->lru.pFirstSynced==pPager->lru.pFirst );
       
  2873   }
       
  2874 #endif
       
  2875 
       
  2876   return rc;
       
  2877 }
       
  2878 
       
  2879 /*
       
  2880 ** Merge two lists of pages connected by pDirty and in pgno order.
       
  2881 ** Do not both fixing the pPrevDirty pointers.
       
  2882 */
       
  2883 static PgHdr *merge_pagelist(PgHdr *pA, PgHdr *pB){
       
  2884   PgHdr result, *pTail;
       
  2885   pTail = &result;
       
  2886   while( pA && pB ){
       
  2887     if( pA->pgno<pB->pgno ){
       
  2888       pTail->pDirty = pA;
       
  2889       pTail = pA;
       
  2890       pA = pA->pDirty;
       
  2891     }else{
       
  2892       pTail->pDirty = pB;
       
  2893       pTail = pB;
       
  2894       pB = pB->pDirty;
       
  2895     }
       
  2896   }
       
  2897   if( pA ){
       
  2898     pTail->pDirty = pA;
       
  2899   }else if( pB ){
       
  2900     pTail->pDirty = pB;
       
  2901   }else{
       
  2902     pTail->pDirty = 0;
       
  2903   }
       
  2904   return result.pDirty;
       
  2905 }
       
  2906 
       
  2907 /*
       
  2908 ** Sort the list of pages in accending order by pgno.  Pages are
       
  2909 ** connected by pDirty pointers.  The pPrevDirty pointers are
       
  2910 ** corrupted by this sort.
       
  2911 */
       
  2912 #define N_SORT_BUCKET_ALLOC 25
       
  2913 #define N_SORT_BUCKET       25
       
  2914 #ifdef SQLITE_TEST
       
  2915   int sqlite3_pager_n_sort_bucket = 0;
       
  2916   #undef N_SORT_BUCKET
       
  2917   #define N_SORT_BUCKET \
       
  2918    (sqlite3_pager_n_sort_bucket?sqlite3_pager_n_sort_bucket:N_SORT_BUCKET_ALLOC)
       
  2919 #endif
       
  2920 static PgHdr *sort_pagelist(PgHdr *pIn){
       
  2921   PgHdr *a[N_SORT_BUCKET_ALLOC], *p;
       
  2922   int i;
       
  2923   memset(a, 0, sizeof(a));
       
  2924   while( pIn ){
       
  2925     p = pIn;
       
  2926     pIn = p->pDirty;
       
  2927     p->pDirty = 0;
       
  2928     for(i=0; i<N_SORT_BUCKET-1; i++){
       
  2929       if( a[i]==0 ){
       
  2930         a[i] = p;
       
  2931         break;
       
  2932       }else{
       
  2933         p = merge_pagelist(a[i], p);
       
  2934         a[i] = 0;
       
  2935       }
       
  2936     }
       
  2937     if( i==N_SORT_BUCKET-1 ){
       
  2938       /* Coverage: To get here, there need to be 2^(N_SORT_BUCKET) 
       
  2939       ** elements in the input list. This is possible, but impractical.
       
  2940       ** Testing this line is the point of global variable
       
  2941       ** sqlite3_pager_n_sort_bucket.
       
  2942       */
       
  2943       a[i] = merge_pagelist(a[i], p);
       
  2944     }
       
  2945   }
       
  2946   p = a[0];
       
  2947   for(i=1; i<N_SORT_BUCKET; i++){
       
  2948     p = merge_pagelist(p, a[i]);
       
  2949   }
       
  2950   return p;
       
  2951 }
       
  2952 
       
  2953 /*
       
  2954 ** Given a list of pages (connected by the PgHdr.pDirty pointer) write
       
  2955 ** every one of those pages out to the database file and mark them all
       
  2956 ** as clean.
       
  2957 */
       
  2958 static int pager_write_pagelist(PgHdr *pList){
       
  2959   Pager *pPager;
       
  2960   PgHdr *p;
       
  2961   int rc;
       
  2962 
       
  2963   if( pList==0 ) return SQLITE_OK;
       
  2964   pPager = pList->pPager;
       
  2965 
       
  2966   /* At this point there may be either a RESERVED or EXCLUSIVE lock on the
       
  2967   ** database file. If there is already an EXCLUSIVE lock, the following
       
  2968   ** calls to sqlite3OsLock() are no-ops.
       
  2969   **
       
  2970   ** Moving the lock from RESERVED to EXCLUSIVE actually involves going
       
  2971   ** through an intermediate state PENDING.   A PENDING lock prevents new
       
  2972   ** readers from attaching to the database but is unsufficient for us to
       
  2973   ** write.  The idea of a PENDING lock is to prevent new readers from
       
  2974   ** coming in while we wait for existing readers to clear.
       
  2975   **
       
  2976   ** While the pager is in the RESERVED state, the original database file
       
  2977   ** is unchanged and we can rollback without having to playback the
       
  2978   ** journal into the original database file.  Once we transition to
       
  2979   ** EXCLUSIVE, it means the database file has been changed and any rollback
       
  2980   ** will require a journal playback.
       
  2981   */
       
  2982   rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
       
  2983   if( rc!=SQLITE_OK ){
       
  2984     return rc;
       
  2985   }
       
  2986 
       
  2987   pList = sort_pagelist(pList);
       
  2988   for(p=pList; p; p=p->pDirty){
       
  2989     assert( p->dirty );
       
  2990     p->dirty = 0;
       
  2991   }
       
  2992   while( pList ){
       
  2993 
       
  2994     /* If the file has not yet been opened, open it now. */
       
  2995     if( !pPager->fd->isOpen ){
       
  2996       assert(pPager->tempFile);
       
  2997       rc = sqlite3PagerOpentemp(pPager->pVfs, pPager->fd, pPager->zFilename,
       
  2998                                 pPager->vfsFlags);
       
  2999       if( rc ) return rc;
       
  3000     }
       
  3001 
       
  3002     /* If there are dirty pages in the page cache with page numbers greater
       
  3003     ** than Pager.dbSize, this means sqlite3PagerTruncate() was called to
       
  3004     ** make the file smaller (presumably by auto-vacuum code). Do not write
       
  3005     ** any such pages to the file.
       
  3006     */
       
  3007     if( pList->pgno<=pPager->dbSize ){
       
  3008       i64 offset = (pList->pgno-1)*(i64)pPager->pageSize;
       
  3009       char *pData = CODEC2(pPager, PGHDR_TO_DATA(pList), pList->pgno, 6);
       
  3010       PAGERTRACE4("STORE %d page %d hash(%08x)\n",
       
  3011                    PAGERID(pPager), pList->pgno, pager_pagehash(pList));
       
  3012       IOTRACE(("PGOUT %p %d\n", pPager, pList->pgno));
       
  3013       rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset);
       
  3014       PAGER_INCR(sqlite3_pager_writedb_count);
       
  3015       PAGER_INCR(pPager->nWrite);
       
  3016       if( pList->pgno==1 ){
       
  3017         memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers));
       
  3018       }
       
  3019     }
       
  3020 #ifndef NDEBUG
       
  3021     else{
       
  3022       PAGERTRACE3("NOSTORE %d page %d\n", PAGERID(pPager), pList->pgno);
       
  3023     }
       
  3024 #endif
       
  3025     if( rc ) return rc;
       
  3026 #ifdef SQLITE_CHECK_PAGES
       
  3027     pList->pageHash = pager_pagehash(pList);
       
  3028 #endif
       
  3029     pList = pList->pDirty;
       
  3030   }
       
  3031   return SQLITE_OK;
       
  3032 }
       
  3033 
       
  3034 /*
       
  3035 ** Collect every dirty page into a dirty list and
       
  3036 ** return a pointer to the head of that list.  All pages are
       
  3037 ** collected even if they are still in use.
       
  3038 */
       
  3039 static PgHdr *pager_get_all_dirty_pages(Pager *pPager){
       
  3040   return pPager->pDirty;
       
  3041 }
       
  3042 
       
  3043 /*
       
  3044 ** Return TRUE if there is a hot journal on the given pager.
       
  3045 ** A hot journal is one that needs to be played back.
       
  3046 **
       
  3047 ** If the current size of the database file is 0 but a journal file
       
  3048 ** exists, that is probably an old journal left over from a prior
       
  3049 ** database with the same name.  Just delete the journal.
       
  3050 */
       
  3051 static int hasHotJournal(Pager *pPager){
       
  3052   sqlite3_vfs *pVfs = pPager->pVfs;
       
  3053   if( !pPager->useJournal ) return 0;
       
  3054   if( !pPager->fd->isOpen ) return 0;
       
  3055   if( !sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS) ){
       
  3056     return 0;
       
  3057   }
       
  3058   if( sqlite3OsCheckReservedLock(pPager->fd) ){
       
  3059     return 0;
       
  3060   }
       
  3061   if( sqlite3PagerPagecount(pPager)==0 ){
       
  3062     sqlite3OsDelete(pVfs, pPager->zJournal, 0);
       
  3063     return 0;
       
  3064   }else{
       
  3065     return 1;
       
  3066   }
       
  3067 }
       
  3068 
       
  3069 /*
       
  3070 ** Try to find a page in the cache that can be recycled. 
       
  3071 **
       
  3072 ** This routine may return SQLITE_IOERR, SQLITE_FULL or SQLITE_OK. It 
       
  3073 ** does not set the pPager->errCode variable.
       
  3074 */
       
  3075 static int pager_recycle(Pager *pPager, PgHdr **ppPg){
       
  3076   PgHdr *pPg;
       
  3077   *ppPg = 0;
       
  3078 
       
  3079   /* It is illegal to call this function unless the pager object
       
  3080   ** pointed to by pPager has at least one free page (page with nRef==0).
       
  3081   */ 
       
  3082   assert(!MEMDB);
       
  3083   assert(pPager->lru.pFirst);
       
  3084 
       
  3085   /* Find a page to recycle.  Try to locate a page that does not
       
  3086   ** require us to do an fsync() on the journal.
       
  3087   */
       
  3088   pPg = pPager->lru.pFirstSynced;
       
  3089 
       
  3090   /* If we could not find a page that does not require an fsync()
       
  3091   ** on the journal file then fsync the journal file.  This is a
       
  3092   ** very slow operation, so we work hard to avoid it.  But sometimes
       
  3093   ** it can't be helped.
       
  3094   */
       
  3095   if( pPg==0 && pPager->lru.pFirst){
       
  3096     int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
       
  3097     int rc = syncJournal(pPager);
       
  3098     if( rc!=0 ){
       
  3099       return rc;
       
  3100     }
       
  3101     if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
       
  3102       /* If in full-sync mode, write a new journal header into the
       
  3103       ** journal file. This is done to avoid ever modifying a journal
       
  3104       ** header that is involved in the rollback of pages that have
       
  3105       ** already been written to the database (in case the header is
       
  3106       ** trashed when the nRec field is updated).
       
  3107       */
       
  3108       pPager->nRec = 0;
       
  3109       assert( pPager->journalOff > 0 );
       
  3110       assert( pPager->doNotSync==0 );
       
  3111       rc = writeJournalHdr(pPager);
       
  3112       if( rc!=0 ){
       
  3113         return rc;
       
  3114       }
       
  3115     }
       
  3116     pPg = pPager->lru.pFirst;
       
  3117   }
       
  3118 
       
  3119   assert( pPg->nRef==0 );
       
  3120 
       
  3121   /* Write the page to the database file if it is dirty.
       
  3122   */
       
  3123   if( pPg->dirty ){
       
  3124     int rc;
       
  3125     assert( pPg->needSync==0 );
       
  3126     makeClean(pPg);
       
  3127     pPg->dirty = 1;
       
  3128     pPg->pDirty = 0;
       
  3129     rc = pager_write_pagelist( pPg );
       
  3130     pPg->dirty = 0;
       
  3131     if( rc!=SQLITE_OK ){
       
  3132       return rc;
       
  3133     }
       
  3134   }
       
  3135   assert( pPg->dirty==0 );
       
  3136 
       
  3137   /* If the page we are recycling is marked as alwaysRollback, then
       
  3138   ** set the global alwaysRollback flag, thus disabling the
       
  3139   ** sqlite3PagerDontRollback() optimization for the rest of this transaction.
       
  3140   ** It is necessary to do this because the page marked alwaysRollback
       
  3141   ** might be reloaded at a later time but at that point we won't remember
       
  3142   ** that is was marked alwaysRollback.  This means that all pages must
       
  3143   ** be marked as alwaysRollback from here on out.
       
  3144   */
       
  3145   if( pPg->alwaysRollback ){
       
  3146     IOTRACE(("ALWAYS_ROLLBACK %p\n", pPager))
       
  3147     pPager->alwaysRollback = 1;
       
  3148   }
       
  3149 
       
  3150   /* Unlink the old page from the free list and the hash table
       
  3151   */
       
  3152   unlinkPage(pPg);
       
  3153   assert( pPg->pgno==0 );
       
  3154 
       
  3155   *ppPg = pPg;
       
  3156   return SQLITE_OK;
       
  3157 }
       
  3158 
       
  3159 #ifdef SQLITE_ENABLE_MEMORY_MANAGEMENT
       
  3160 /*
       
  3161 ** This function is called to free superfluous dynamically allocated memory
       
  3162 ** held by the pager system. Memory in use by any SQLite pager allocated
       
  3163 ** by the current thread may be sqlite3_free()ed.
       
  3164 **
       
  3165 ** nReq is the number of bytes of memory required. Once this much has
       
  3166 ** been released, the function returns. The return value is the total number 
       
  3167 ** of bytes of memory released.
       
  3168 */
       
  3169 int sqlite3PagerReleaseMemory(int nReq){
       
  3170   int nReleased = 0;          /* Bytes of memory released so far */
       
  3171   sqlite3_mutex *mutex;       /* The MEM2 mutex */
       
  3172   Pager *pPager;              /* For looping over pagers */
       
  3173   BusyHandler *savedBusy;     /* Saved copy of the busy handler */
       
  3174   int rc = SQLITE_OK;
       
  3175 
       
  3176   /* Acquire the memory-management mutex
       
  3177   */
       
  3178   mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_MEM2);
       
  3179   sqlite3_mutex_enter(mutex);
       
  3180 
       
  3181   /* Signal all database connections that memory management wants
       
  3182   ** to have access to the pagers.
       
  3183   */
       
  3184   for(pPager=sqlite3PagerList; pPager; pPager=pPager->pNext){
       
  3185      pPager->iInUseMM = 1;
       
  3186   }
       
  3187 
       
  3188   while( rc==SQLITE_OK && (nReq<0 || nReleased<nReq) ){
       
  3189     PgHdr *pPg;
       
  3190     PgHdr *pRecycled;
       
  3191  
       
  3192     /* Try to find a page to recycle that does not require a sync(). If
       
  3193     ** this is not possible, find one that does require a sync().
       
  3194     */
       
  3195     sqlite3_mutex_enter(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
       
  3196     pPg = sqlite3LruPageList.pFirstSynced;
       
  3197     while( pPg && (pPg->needSync || pPg->pPager->iInUseDB) ){
       
  3198       pPg = pPg->gfree.pNext;
       
  3199     }
       
  3200     if( !pPg ){
       
  3201       pPg = sqlite3LruPageList.pFirst;
       
  3202       while( pPg && pPg->pPager->iInUseDB ){
       
  3203         pPg = pPg->gfree.pNext;
       
  3204       }
       
  3205     }
       
  3206     sqlite3_mutex_leave(sqlite3_mutex_alloc(SQLITE_MUTEX_STATIC_LRU));
       
  3207 
       
  3208     /* If pPg==0, then the block above has failed to find a page to
       
  3209     ** recycle. In this case return early - no further memory will
       
  3210     ** be released.
       
  3211     */
       
  3212     if( !pPg ) break;
       
  3213 
       
  3214     pPager = pPg->pPager;
       
  3215     assert(!pPg->needSync || pPg==pPager->lru.pFirst);
       
  3216     assert(pPg->needSync || pPg==pPager->lru.pFirstSynced);
       
  3217   
       
  3218     savedBusy = pPager->pBusyHandler;
       
  3219     pPager->pBusyHandler = 0;
       
  3220     rc = pager_recycle(pPager, &pRecycled);
       
  3221     pPager->pBusyHandler = savedBusy;
       
  3222     assert(pRecycled==pPg || rc!=SQLITE_OK);
       
  3223     if( rc==SQLITE_OK ){
       
  3224       /* We've found a page to free. At this point the page has been 
       
  3225       ** removed from the page hash-table, free-list and synced-list 
       
  3226       ** (pFirstSynced). It is still in the all pages (pAll) list. 
       
  3227       ** Remove it from this list before freeing.
       
  3228       **
       
  3229       ** Todo: Check the Pager.pStmt list to make sure this is Ok. It 
       
  3230       ** probably is though.
       
  3231       */
       
  3232       PgHdr *pTmp;
       
  3233       assert( pPg );
       
  3234       if( pPg==pPager->pAll ){
       
  3235          pPager->pAll = pPg->pNextAll;
       
  3236       }else{
       
  3237         for( pTmp=pPager->pAll; pTmp->pNextAll!=pPg; pTmp=pTmp->pNextAll ){}
       
  3238         pTmp->pNextAll = pPg->pNextAll;
       
  3239       }
       
  3240       nReleased += (
       
  3241           sizeof(*pPg) + pPager->pageSize
       
  3242           + sizeof(u32) + pPager->nExtra
       
  3243           + MEMDB*sizeof(PgHistory) 
       
  3244       );
       
  3245       IOTRACE(("PGFREE %p %d *\n", pPager, pPg->pgno));
       
  3246       PAGER_INCR(sqlite3_pager_pgfree_count);
       
  3247       sqlite3_free(pPg);
       
  3248       pPager->nPage--;
       
  3249     }else{
       
  3250       /* An error occured whilst writing to the database file or 
       
  3251       ** journal in pager_recycle(). The error is not returned to the 
       
  3252       ** caller of this function. Instead, set the Pager.errCode variable.
       
  3253       ** The error will be returned to the user (or users, in the case 
       
  3254       ** of a shared pager cache) of the pager for which the error occured.
       
  3255       */
       
  3256       assert(
       
  3257           (rc&0xff)==SQLITE_IOERR ||
       
  3258           rc==SQLITE_FULL ||
       
  3259           rc==SQLITE_BUSY
       
  3260       );
       
  3261       assert( pPager->state>=PAGER_RESERVED );
       
  3262       pager_error(pPager, rc);
       
  3263     }
       
  3264   }
       
  3265 
       
  3266   /* Clear the memory management flags and release the mutex
       
  3267   */
       
  3268   for(pPager=sqlite3PagerList; pPager; pPager=pPager->pNext){
       
  3269      pPager->iInUseMM = 0;
       
  3270   }
       
  3271   sqlite3_mutex_leave(mutex);
       
  3272 
       
  3273   /* Return the number of bytes released
       
  3274   */
       
  3275   return nReleased;
       
  3276 }
       
  3277 #endif /* SQLITE_ENABLE_MEMORY_MANAGEMENT */
       
  3278 
       
  3279 /*
       
  3280 ** Read the content of page pPg out of the database file.
       
  3281 */
       
  3282 static int readDbPage(Pager *pPager, PgHdr *pPg, Pgno pgno){
       
  3283   int rc;
       
  3284   i64 offset;
       
  3285   assert( MEMDB==0 );
       
  3286   assert(pPager->fd->pMethods||pPager->tempFile);
       
  3287   if( !pPager->fd->isOpen ){
       
  3288     return SQLITE_IOERR_SHORT_READ;
       
  3289   }
       
  3290   offset = (pgno-1)*(i64)pPager->pageSize;
       
  3291   rc = sqlite3OsRead(pPager->fd, PGHDR_TO_DATA(pPg), pPager->pageSize, offset);
       
  3292   PAGER_INCR(sqlite3_pager_readdb_count);
       
  3293   PAGER_INCR(pPager->nRead);
       
  3294   IOTRACE(("PGIN %p %d\n", pPager, pgno));
       
  3295   if( pgno==1 ){
       
  3296     memcpy(&pPager->dbFileVers, &((u8*)PGHDR_TO_DATA(pPg))[24],
       
  3297                                               sizeof(pPager->dbFileVers));
       
  3298   }
       
  3299   CODEC1(pPager, PGHDR_TO_DATA(pPg), pPg->pgno, 3);
       
  3300   PAGERTRACE4("FETCH %d page %d hash(%08x)\n",
       
  3301                PAGERID(pPager), pPg->pgno, pager_pagehash(pPg));
       
  3302   return rc;
       
  3303 }
       
  3304 
       
  3305 
       
  3306 /*
       
  3307 ** This function is called to obtain the shared lock required before
       
  3308 ** data may be read from the pager cache. If the shared lock has already
       
  3309 ** been obtained, this function is a no-op.
       
  3310 **
       
  3311 ** Immediately after obtaining the shared lock (if required), this function
       
  3312 ** checks for a hot-journal file. If one is found, an emergency rollback
       
  3313 ** is performed immediately.
       
  3314 */
       
  3315 static int pagerSharedLock(Pager *pPager){
       
  3316   int rc = SQLITE_OK;
       
  3317   int isHot = 0;
       
  3318 
       
  3319   /* If this database is opened for exclusive access, has no outstanding 
       
  3320   ** page references and is in an error-state, now is the chance to clear
       
  3321   ** the error. Discard the contents of the pager-cache and treat any
       
  3322   ** open journal file as a hot-journal.
       
  3323   */
       
  3324   if( !MEMDB && pPager->exclusiveMode && pPager->nRef==0 && pPager->errCode ){
       
  3325     if( pPager->journalOpen ){
       
  3326       isHot = 1;
       
  3327     }
       
  3328     pager_reset(pPager);
       
  3329     pPager->errCode = SQLITE_OK;
       
  3330   }
       
  3331 
       
  3332   /* If the pager is still in an error state, do not proceed. The error 
       
  3333   ** state will be cleared at some point in the future when all page 
       
  3334   ** references are dropped and the cache can be discarded.
       
  3335   */
       
  3336   if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
       
  3337     return pPager->errCode;
       
  3338   }
       
  3339 
       
  3340   if( pPager->state==PAGER_UNLOCK || isHot ){
       
  3341     sqlite3_vfs *pVfs = pPager->pVfs;
       
  3342     if( !MEMDB ){
       
  3343       assert( pPager->nRef==0 );
       
  3344       if( !pPager->noReadlock ){
       
  3345         rc = pager_wait_on_lock(pPager, SHARED_LOCK);
       
  3346         if( rc!=SQLITE_OK ){
       
  3347           return pager_error(pPager, rc);
       
  3348         }
       
  3349         assert( pPager->state>=SHARED_LOCK );
       
  3350       }
       
  3351   
       
  3352       /* If a journal file exists, and there is no RESERVED lock on the
       
  3353       ** database file, then it either needs to be played back or deleted.
       
  3354       */
       
  3355       if( hasHotJournal(pPager) || isHot ){
       
  3356         /* Get an EXCLUSIVE lock on the database file. At this point it is
       
  3357         ** important that a RESERVED lock is not obtained on the way to the
       
  3358         ** EXCLUSIVE lock. If it were, another process might open the
       
  3359         ** database file, detect the RESERVED lock, and conclude that the
       
  3360         ** database is safe to read while this process is still rolling it 
       
  3361         ** back.
       
  3362         ** 
       
  3363         ** Because the intermediate RESERVED lock is not requested, the
       
  3364         ** second process will get to this point in the code and fail to
       
  3365         ** obtain its own EXCLUSIVE lock on the database file.
       
  3366         */
       
  3367         if( pPager->state<EXCLUSIVE_LOCK ){
       
  3368           rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK);
       
  3369           if( rc!=SQLITE_OK ){
       
  3370             pager_unlock(pPager);
       
  3371             return pager_error(pPager, rc);
       
  3372           }
       
  3373           pPager->state = PAGER_EXCLUSIVE;
       
  3374         }
       
  3375  
       
  3376         /* Open the journal for reading only.  Return SQLITE_BUSY if
       
  3377         ** we are unable to open the journal file. 
       
  3378         **
       
  3379         ** The journal file does not need to be locked itself.  The
       
  3380         ** journal file is never open unless the main database file holds
       
  3381         ** a write lock, so there is never any chance of two or more
       
  3382         ** processes opening the journal at the same time.
       
  3383         **
       
  3384         ** Open the journal for read/write access. This is because in 
       
  3385         ** exclusive-access mode the file descriptor will be kept open and
       
  3386         ** possibly used for a transaction later on. On some systems, the
       
  3387         ** OsTruncate() call used in exclusive-access mode also requires
       
  3388         ** a read/write file handle.
       
  3389         */
       
  3390         if( !isHot ){
       
  3391           rc = SQLITE_BUSY;
       
  3392           if( sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS) ){
       
  3393             int fout = 0;
       
  3394             int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL;
       
  3395             assert( !pPager->tempFile );
       
  3396             rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout);
       
  3397             assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
       
  3398             if( fout&SQLITE_OPEN_READONLY ){
       
  3399               rc = SQLITE_BUSY;
       
  3400               sqlite3OsClose(pPager->jfd);
       
  3401             }
       
  3402           }
       
  3403         }
       
  3404         if( rc!=SQLITE_OK ){
       
  3405           pager_unlock(pPager);
       
  3406           return ((rc==SQLITE_NOMEM||rc==SQLITE_IOERR_NOMEM)?rc:SQLITE_BUSY);
       
  3407         }
       
  3408         pPager->journalOpen = 1;
       
  3409         pPager->journalStarted = 0;
       
  3410         pPager->journalOff = 0;
       
  3411         pPager->setMaster = 0;
       
  3412         pPager->journalHdr = 0;
       
  3413  
       
  3414         /* Playback and delete the journal.  Drop the database write
       
  3415         ** lock and reacquire the read lock.
       
  3416         */
       
  3417         rc = pager_playback(pPager, 1);
       
  3418         if( rc!=SQLITE_OK ){
       
  3419           return pager_error(pPager, rc);
       
  3420         }
       
  3421         assert(pPager->state==PAGER_SHARED || 
       
  3422             (pPager->exclusiveMode && pPager->state>PAGER_SHARED)
       
  3423         );
       
  3424       }
       
  3425 
       
  3426       if( pPager->pAll ){
       
  3427         /* The shared-lock has just been acquired on the database file
       
  3428         ** and there are already pages in the cache (from a previous
       
  3429         ** read or write transaction).  Check to see if the database
       
  3430         ** has been modified.  If the database has changed, flush the
       
  3431         ** cache.
       
  3432         **
       
  3433         ** Database changes is detected by looking at 15 bytes beginning
       
  3434         ** at offset 24 into the file.  The first 4 of these 16 bytes are
       
  3435         ** a 32-bit counter that is incremented with each change.  The
       
  3436         ** other bytes change randomly with each file change when
       
  3437         ** a codec is in use.
       
  3438         ** 
       
  3439         ** There is a vanishingly small chance that a change will not be 
       
  3440         ** detected.  The chance of an undetected change is so small that
       
  3441         ** it can be neglected.
       
  3442         */
       
  3443         char dbFileVers[sizeof(pPager->dbFileVers)];
       
  3444         sqlite3PagerPagecount(pPager);
       
  3445 
       
  3446         if( pPager->errCode ){
       
  3447           return pPager->errCode;
       
  3448         }
       
  3449 
       
  3450         if( pPager->dbSize>0 ){
       
  3451           IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers)));
       
  3452           rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24);
       
  3453           if( rc!=SQLITE_OK ){
       
  3454             return rc;
       
  3455           }
       
  3456         }else{
       
  3457           memset(dbFileVers, 0, sizeof(dbFileVers));
       
  3458         }
       
  3459 
       
  3460         if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
       
  3461           pager_reset(pPager);
       
  3462         }
       
  3463       }
       
  3464     }
       
  3465     assert( pPager->exclusiveMode || pPager->state<=PAGER_SHARED );
       
  3466     if( pPager->state==PAGER_UNLOCK ){
       
  3467       pPager->state = PAGER_SHARED;
       
  3468     }
       
  3469   }
       
  3470 
       
  3471   return rc;
       
  3472 }
       
  3473 
       
  3474 /*
       
  3475 ** Allocate a PgHdr object.   Either create a new one or reuse
       
  3476 ** an existing one that is not otherwise in use.
       
  3477 **
       
  3478 ** A new PgHdr structure is created if any of the following are
       
  3479 ** true:
       
  3480 **
       
  3481 **     (1)  We have not exceeded our maximum allocated cache size
       
  3482 **          as set by the "PRAGMA cache_size" command.
       
  3483 **
       
  3484 **     (2)  There are no unused PgHdr objects available at this time.
       
  3485 **
       
  3486 **     (3)  This is an in-memory database.
       
  3487 **
       
  3488 **     (4)  There are no PgHdr objects that do not require a journal
       
  3489 **          file sync and a sync of the journal file is currently
       
  3490 **          prohibited.
       
  3491 **
       
  3492 ** Otherwise, reuse an existing PgHdr.  In other words, reuse an
       
  3493 ** existing PgHdr if all of the following are true:
       
  3494 **
       
  3495 **     (1)  We have reached or exceeded the maximum cache size
       
  3496 **          allowed by "PRAGMA cache_size".
       
  3497 **
       
  3498 **     (2)  There is a PgHdr available with PgHdr->nRef==0
       
  3499 **
       
  3500 **     (3)  We are not in an in-memory database
       
  3501 **
       
  3502 **     (4)  Either there is an available PgHdr that does not need
       
  3503 **          to be synced to disk or else disk syncing is currently
       
  3504 **          allowed.
       
  3505 */
       
  3506 static int pagerAllocatePage(Pager *pPager, PgHdr **ppPg){
       
  3507   int rc = SQLITE_OK;
       
  3508   PgHdr *pPg;
       
  3509   int nByteHdr;
       
  3510 
       
  3511   /* Create a new PgHdr if any of the four conditions defined 
       
  3512   ** above are met: */
       
  3513   if( pPager->nPage<pPager->mxPage
       
  3514    || pPager->lru.pFirst==0 
       
  3515    || MEMDB
       
  3516    || (pPager->lru.pFirstSynced==0 && pPager->doNotSync)
       
  3517   ){
       
  3518     if( pPager->nPage>=pPager->nHash ){
       
  3519       pager_resize_hash_table(pPager,
       
  3520          pPager->nHash<256 ? 256 : pPager->nHash*2);
       
  3521       if( pPager->nHash==0 ){
       
  3522         rc = SQLITE_NOMEM;
       
  3523         goto pager_allocate_out;
       
  3524       }
       
  3525     }
       
  3526     pagerLeave(pPager);
       
  3527     nByteHdr = sizeof(*pPg) + sizeof(u32) + pPager->nExtra
       
  3528               + MEMDB*sizeof(PgHistory);
       
  3529     pPg = (PgHdr*)sqlite3_malloc( nByteHdr + pPager->pageSize );
       
  3530     pagerEnter(pPager);
       
  3531     if( pPg==0 ){
       
  3532       rc = SQLITE_NOMEM;
       
  3533       goto pager_allocate_out;
       
  3534     }
       
  3535     memset(pPg, 0, nByteHdr);
       
  3536     pPg->pData = (void*)(nByteHdr + (char*)pPg);
       
  3537     pPg->pPager = pPager;
       
  3538     pPg->pNextAll = pPager->pAll;
       
  3539     pPager->pAll = pPg;
       
  3540     pPager->nPage++;
       
  3541   }else{
       
  3542     /* Recycle an existing page with a zero ref-count. */
       
  3543     rc = pager_recycle(pPager, &pPg);
       
  3544     if( rc==SQLITE_BUSY ){
       
  3545       rc = SQLITE_IOERR_BLOCKED;
       
  3546     }
       
  3547     if( rc!=SQLITE_OK ){
       
  3548       goto pager_allocate_out;
       
  3549     }
       
  3550     assert( pPager->state>=SHARED_LOCK );
       
  3551     assert(pPg);
       
  3552   }
       
  3553   *ppPg = pPg;
       
  3554 
       
  3555 pager_allocate_out:
       
  3556   return rc;
       
  3557 }
       
  3558 
       
  3559 /*
       
  3560 ** Make sure we have the content for a page.  If the page was
       
  3561 ** previously acquired with noContent==1, then the content was
       
  3562 ** just initialized to zeros instead of being read from disk.
       
  3563 ** But now we need the real data off of disk.  So make sure we
       
  3564 ** have it.  Read it in if we do not have it already.
       
  3565 */
       
  3566 static int pager_get_content(PgHdr *pPg){
       
  3567   if( pPg->needRead ){
       
  3568     int rc = readDbPage(pPg->pPager, pPg, pPg->pgno);
       
  3569     if( rc==SQLITE_OK ){
       
  3570       pPg->needRead = 0;
       
  3571     }else{
       
  3572       return rc;
       
  3573     }
       
  3574   }
       
  3575   return SQLITE_OK;
       
  3576 }
       
  3577 
       
  3578 /*
       
  3579 ** Acquire a page.
       
  3580 **
       
  3581 ** A read lock on the disk file is obtained when the first page is acquired. 
       
  3582 ** This read lock is dropped when the last page is released.
       
  3583 **
       
  3584 ** This routine works for any page number greater than 0.  If the database
       
  3585 ** file is smaller than the requested page, then no actual disk
       
  3586 ** read occurs and the memory image of the page is initialized to
       
  3587 ** all zeros.  The extra data appended to a page is always initialized
       
  3588 ** to zeros the first time a page is loaded into memory.
       
  3589 **
       
  3590 ** The acquisition might fail for several reasons.  In all cases,
       
  3591 ** an appropriate error code is returned and *ppPage is set to NULL.
       
  3592 **
       
  3593 ** See also sqlite3PagerLookup().  Both this routine and Lookup() attempt
       
  3594 ** to find a page in the in-memory cache first.  If the page is not already
       
  3595 ** in memory, this routine goes to disk to read it in whereas Lookup()
       
  3596 ** just returns 0.  This routine acquires a read-lock the first time it
       
  3597 ** has to go to disk, and could also playback an old journal if necessary.
       
  3598 ** Since Lookup() never goes to disk, it never has to deal with locks
       
  3599 ** or journal files.
       
  3600 **
       
  3601 ** If noContent is false, the page contents are actually read from disk.
       
  3602 ** If noContent is true, it means that we do not care about the contents
       
  3603 ** of the page at this time, so do not do a disk read.  Just fill in the
       
  3604 ** page content with zeros.  But mark the fact that we have not read the
       
  3605 ** content by setting the PgHdr.needRead flag.  Later on, if 
       
  3606 ** sqlite3PagerWrite() is called on this page or if this routine is
       
  3607 ** called again with noContent==0, that means that the content is needed
       
  3608 ** and the disk read should occur at that point.
       
  3609 */
       
  3610 static int pagerAcquire(
       
  3611   Pager *pPager,      /* The pager open on the database file */
       
  3612   Pgno pgno,          /* Page number to fetch */
       
  3613   DbPage **ppPage,    /* Write a pointer to the page here */
       
  3614   int noContent       /* Do not bother reading content from disk if true */
       
  3615 ){
       
  3616   PgHdr *pPg;
       
  3617   int rc;
       
  3618 
       
  3619   assert( pPager->state==PAGER_UNLOCK || pPager->nRef>0 || pgno==1 );
       
  3620 
       
  3621   /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
       
  3622   ** number greater than this, or zero, is requested.
       
  3623   */
       
  3624   if( pgno>PAGER_MAX_PGNO || pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
       
  3625     return SQLITE_CORRUPT_BKPT;
       
  3626   }
       
  3627 
       
  3628   /* Make sure we have not hit any critical errors.
       
  3629   */ 
       
  3630   assert( pPager!=0 );
       
  3631   *ppPage = 0;
       
  3632 
       
  3633   /* If this is the first page accessed, then get a SHARED lock
       
  3634   ** on the database file. pagerSharedLock() is a no-op if 
       
  3635   ** a database lock is already held.
       
  3636   */
       
  3637   rc = pagerSharedLock(pPager);
       
  3638   if( rc!=SQLITE_OK ){
       
  3639     return rc;
       
  3640   }
       
  3641   assert( pPager->state!=PAGER_UNLOCK );
       
  3642 
       
  3643   pPg = pager_lookup(pPager, pgno);
       
  3644   if( pPg==0 ){
       
  3645     /* The requested page is not in the page cache. */
       
  3646     int nMax;
       
  3647     int h;
       
  3648     PAGER_INCR(pPager->nMiss);
       
  3649     rc = pagerAllocatePage(pPager, &pPg);
       
  3650     if( rc!=SQLITE_OK ){
       
  3651       return rc;
       
  3652     }
       
  3653 
       
  3654     pPg->pgno = pgno;
       
  3655     assert( !MEMDB || pgno>pPager->stmtSize );
       
  3656     if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
       
  3657 #if 0
       
  3658       sqlite3CheckMemory(pPager->aInJournal, pgno/8);
       
  3659 #endif
       
  3660       assert( pPager->journalOpen );
       
  3661       pPg->inJournal = (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
       
  3662       pPg->needSync = 0;
       
  3663     }else{
       
  3664       pPg->inJournal = 0;
       
  3665       pPg->needSync = 0;
       
  3666     }
       
  3667 
       
  3668     makeClean(pPg);
       
  3669     pPg->nRef = 1;
       
  3670     REFINFO(pPg);
       
  3671 
       
  3672     pPager->nRef++;
       
  3673     if( pPager->nExtra>0 ){
       
  3674       memset(PGHDR_TO_EXTRA(pPg, pPager), 0, pPager->nExtra);
       
  3675     }
       
  3676     nMax = sqlite3PagerPagecount(pPager);
       
  3677     if( pPager->errCode ){
       
  3678       rc = pPager->errCode;
       
  3679       sqlite3PagerUnref(pPg);
       
  3680       return rc;
       
  3681     }
       
  3682 
       
  3683     /* Populate the page with data, either by reading from the database
       
  3684     ** file, or by setting the entire page to zero.
       
  3685     */
       
  3686     if( nMax<(int)pgno || MEMDB || (noContent && !pPager->alwaysRollback) ){
       
  3687       if( pgno>pPager->mxPgno ){
       
  3688         sqlite3PagerUnref(pPg);
       
  3689         return SQLITE_FULL;
       
  3690       }
       
  3691       memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
       
  3692       pPg->needRead = noContent && !pPager->alwaysRollback;
       
  3693       IOTRACE(("ZERO %p %d\n", pPager, pgno));
       
  3694     }else{
       
  3695       rc = readDbPage(pPager, pPg, pgno);
       
  3696       if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
       
  3697         pPg->pgno = 0;
       
  3698         sqlite3PagerUnref(pPg);
       
  3699         return rc;
       
  3700       }
       
  3701       pPg->needRead = 0;
       
  3702     }
       
  3703 
       
  3704     /* Link the page into the page hash table */
       
  3705     h = pgno & (pPager->nHash-1);
       
  3706     assert( pgno!=0 );
       
  3707     pPg->pNextHash = pPager->aHash[h];
       
  3708     pPager->aHash[h] = pPg;
       
  3709     if( pPg->pNextHash ){
       
  3710       assert( pPg->pNextHash->pPrevHash==0 );
       
  3711       pPg->pNextHash->pPrevHash = pPg;
       
  3712     }
       
  3713 
       
  3714 #ifdef SQLITE_CHECK_PAGES
       
  3715     pPg->pageHash = pager_pagehash(pPg);
       
  3716 #endif
       
  3717   }else{
       
  3718     /* The requested page is in the page cache. */
       
  3719     assert(pPager->nRef>0 || pgno==1);
       
  3720     PAGER_INCR(pPager->nHit);
       
  3721     if( !noContent ){
       
  3722       rc = pager_get_content(pPg);
       
  3723       if( rc ){
       
  3724         return rc;
       
  3725       }
       
  3726     }
       
  3727     page_ref(pPg);
       
  3728   }
       
  3729   *ppPage = pPg;
       
  3730   return SQLITE_OK;
       
  3731 }
       
  3732 int sqlite3PagerAcquire(
       
  3733   Pager *pPager,      /* The pager open on the database file */
       
  3734   Pgno pgno,          /* Page number to fetch */
       
  3735   DbPage **ppPage,    /* Write a pointer to the page here */
       
  3736   int noContent       /* Do not bother reading content from disk if true */
       
  3737 ){
       
  3738   int rc;
       
  3739   pagerEnter(pPager);
       
  3740   rc = pagerAcquire(pPager, pgno, ppPage, noContent);
       
  3741   pagerLeave(pPager);
       
  3742   return rc;
       
  3743 }
       
  3744 
       
  3745 
       
  3746 /*
       
  3747 ** Acquire a page if it is already in the in-memory cache.  Do
       
  3748 ** not read the page from disk.  Return a pointer to the page,
       
  3749 ** or 0 if the page is not in cache.
       
  3750 **
       
  3751 ** See also sqlite3PagerGet().  The difference between this routine
       
  3752 ** and sqlite3PagerGet() is that _get() will go to the disk and read
       
  3753 ** in the page if the page is not already in cache.  This routine
       
  3754 ** returns NULL if the page is not in cache or if a disk I/O error 
       
  3755 ** has ever happened.
       
  3756 */
       
  3757 DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
       
  3758   PgHdr *pPg = 0;
       
  3759 
       
  3760   assert( pPager!=0 );
       
  3761   assert( pgno!=0 );
       
  3762 
       
  3763   pagerEnter(pPager);
       
  3764   if( pPager->state==PAGER_UNLOCK ){
       
  3765     assert( !pPager->pAll || pPager->exclusiveMode );
       
  3766   }else if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
       
  3767     /* Do nothing */
       
  3768   }else if( (pPg = pager_lookup(pPager, pgno))!=0 ){
       
  3769     page_ref(pPg);
       
  3770   }
       
  3771   pagerLeave(pPager);
       
  3772   return pPg;
       
  3773 }
       
  3774 
       
  3775 /*
       
  3776 ** Release a page.
       
  3777 **
       
  3778 ** If the number of references to the page drop to zero, then the
       
  3779 ** page is added to the LRU list.  When all references to all pages
       
  3780 ** are released, a rollback occurs and the lock on the database is
       
  3781 ** removed.
       
  3782 */
       
  3783 int sqlite3PagerUnref(DbPage *pPg){
       
  3784   Pager *pPager = pPg->pPager;
       
  3785 
       
  3786   /* Decrement the reference count for this page
       
  3787   */
       
  3788   assert( pPg->nRef>0 );
       
  3789   pagerEnter(pPg->pPager);
       
  3790   pPg->nRef--;
       
  3791   REFINFO(pPg);
       
  3792 
       
  3793   CHECK_PAGE(pPg);
       
  3794 
       
  3795   /* When the number of references to a page reach 0, call the
       
  3796   ** destructor and add the page to the freelist.
       
  3797   */
       
  3798   if( pPg->nRef==0 ){
       
  3799 
       
  3800     lruListAdd(pPg);
       
  3801     if( pPager->xDestructor ){
       
  3802       pPager->xDestructor(pPg, pPager->pageSize);
       
  3803     }
       
  3804   
       
  3805     /* When all pages reach the freelist, drop the read lock from
       
  3806     ** the database file.
       
  3807     */
       
  3808     pPager->nRef--;
       
  3809     assert( pPager->nRef>=0 );
       
  3810     if( pPager->nRef==0 && (!pPager->exclusiveMode || pPager->journalOff>0) ){
       
  3811       pagerUnlockAndRollback(pPager);
       
  3812     }
       
  3813   }
       
  3814   pagerLeave(pPager);
       
  3815   return SQLITE_OK;
       
  3816 }
       
  3817 
       
  3818 /*
       
  3819 ** Create a journal file for pPager.  There should already be a RESERVED
       
  3820 ** or EXCLUSIVE lock on the database file when this routine is called.
       
  3821 **
       
  3822 ** Return SQLITE_OK if everything.  Return an error code and release the
       
  3823 ** write lock if anything goes wrong.
       
  3824 */
       
  3825 static int pager_open_journal(Pager *pPager){
       
  3826   sqlite3_vfs *pVfs = pPager->pVfs;
       
  3827   int flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_EXCLUSIVE|SQLITE_OPEN_CREATE);
       
  3828 
       
  3829   int rc;
       
  3830   assert( !MEMDB );
       
  3831   assert( pPager->state>=PAGER_RESERVED );
       
  3832   assert( pPager->journalOpen==0 );
       
  3833   assert( pPager->useJournal );
       
  3834   assert( pPager->aInJournal==0 );
       
  3835   sqlite3PagerPagecount(pPager);
       
  3836   pagerLeave(pPager);
       
  3837   pPager->aInJournal = (u8*)sqlite3MallocZero( pPager->dbSize/8 + 1 );
       
  3838   pagerEnter(pPager);
       
  3839   if( pPager->aInJournal==0 ){
       
  3840     rc = SQLITE_NOMEM;
       
  3841     goto failed_to_open_journal;
       
  3842   }
       
  3843 
       
  3844   if( pPager->tempFile ){
       
  3845     flags |= (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL);
       
  3846   }else{
       
  3847     flags |= (SQLITE_OPEN_MAIN_JOURNAL);
       
  3848   }
       
  3849 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
       
  3850   rc = sqlite3JournalOpen(
       
  3851       pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager)
       
  3852   );
       
  3853 #else
       
  3854   rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0);
       
  3855 #endif
       
  3856   assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
       
  3857   pPager->journalOff = 0;
       
  3858   pPager->setMaster = 0;
       
  3859   pPager->journalHdr = 0;
       
  3860   if( rc!=SQLITE_OK ){
       
  3861     if( rc==SQLITE_NOMEM ){
       
  3862       sqlite3OsDelete(pVfs, pPager->zJournal, 0);
       
  3863     }
       
  3864     goto failed_to_open_journal;
       
  3865   }
       
  3866   pPager->journalOpen = 1;
       
  3867   pPager->journalStarted = 0;
       
  3868   pPager->needSync = 0;
       
  3869   pPager->alwaysRollback = 0;
       
  3870   pPager->nRec = 0;
       
  3871   if( pPager->errCode ){
       
  3872     rc = pPager->errCode;
       
  3873     goto failed_to_open_journal;
       
  3874   }
       
  3875   pPager->origDbSize = pPager->dbSize;
       
  3876 
       
  3877   rc = writeJournalHdr(pPager);
       
  3878 
       
  3879   if( pPager->stmtAutoopen && rc==SQLITE_OK ){
       
  3880     rc = sqlite3PagerStmtBegin(pPager);
       
  3881   }
       
  3882   if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM && rc!=SQLITE_IOERR_NOMEM ){
       
  3883     rc = pager_end_transaction(pPager);
       
  3884     if( rc==SQLITE_OK ){
       
  3885       rc = SQLITE_FULL;
       
  3886     }
       
  3887   }
       
  3888   return rc;
       
  3889 
       
  3890 failed_to_open_journal:
       
  3891   sqlite3_free(pPager->aInJournal);
       
  3892   pPager->aInJournal = 0;
       
  3893   return rc;
       
  3894 }
       
  3895 
       
  3896 /*
       
  3897 ** Acquire a write-lock on the database.  The lock is removed when
       
  3898 ** the any of the following happen:
       
  3899 **
       
  3900 **   *  sqlite3PagerCommitPhaseTwo() is called.
       
  3901 **   *  sqlite3PagerRollback() is called.
       
  3902 **   *  sqlite3PagerClose() is called.
       
  3903 **   *  sqlite3PagerUnref() is called to on every outstanding page.
       
  3904 **
       
  3905 ** The first parameter to this routine is a pointer to any open page of the
       
  3906 ** database file.  Nothing changes about the page - it is used merely to
       
  3907 ** acquire a pointer to the Pager structure and as proof that there is
       
  3908 ** already a read-lock on the database.
       
  3909 **
       
  3910 ** The second parameter indicates how much space in bytes to reserve for a
       
  3911 ** master journal file-name at the start of the journal when it is created.
       
  3912 **
       
  3913 ** A journal file is opened if this is not a temporary file.  For temporary
       
  3914 ** files, the opening of the journal file is deferred until there is an
       
  3915 ** actual need to write to the journal.
       
  3916 **
       
  3917 ** If the database is already reserved for writing, this routine is a no-op.
       
  3918 **
       
  3919 ** If exFlag is true, go ahead and get an EXCLUSIVE lock on the file
       
  3920 ** immediately instead of waiting until we try to flush the cache.  The
       
  3921 ** exFlag is ignored if a transaction is already active.
       
  3922 */
       
  3923 int sqlite3PagerBegin(DbPage *pPg, int exFlag){
       
  3924   Pager *pPager = pPg->pPager;
       
  3925   int rc = SQLITE_OK;
       
  3926   pagerEnter(pPager);
       
  3927   assert( pPg->nRef>0 );
       
  3928   assert( pPager->state!=PAGER_UNLOCK );
       
  3929   if( pPager->state==PAGER_SHARED ){
       
  3930     assert( pPager->aInJournal==0 );
       
  3931     if( MEMDB ){
       
  3932       pPager->state = PAGER_EXCLUSIVE;
       
  3933       pPager->origDbSize = pPager->dbSize;
       
  3934     }else{
       
  3935       rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
       
  3936       if( rc==SQLITE_OK ){
       
  3937         pPager->state = PAGER_RESERVED;
       
  3938         if( exFlag ){
       
  3939           rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
       
  3940         }
       
  3941       }
       
  3942       if( rc!=SQLITE_OK ){
       
  3943         pagerLeave(pPager);
       
  3944         return rc;
       
  3945       }
       
  3946       pPager->dirtyCache = 0;
       
  3947       PAGERTRACE2("TRANSACTION %d\n", PAGERID(pPager));
       
  3948       if( pPager->useJournal && !pPager->tempFile ){
       
  3949         rc = pager_open_journal(pPager);
       
  3950       }
       
  3951     }
       
  3952   }else if( pPager->journalOpen && pPager->journalOff==0 ){
       
  3953     /* This happens when the pager was in exclusive-access mode last
       
  3954     ** time a (read or write) transaction was successfully concluded
       
  3955     ** by this connection. Instead of deleting the journal file it was 
       
  3956     ** kept open and truncated to 0 bytes.
       
  3957     */
       
  3958     assert( pPager->nRec==0 );
       
  3959     assert( pPager->origDbSize==0 );
       
  3960     assert( pPager->aInJournal==0 );
       
  3961     sqlite3PagerPagecount(pPager);
       
  3962     pagerLeave(pPager);
       
  3963     pPager->aInJournal = (u8*)sqlite3MallocZero( pPager->dbSize/8 + 1 );
       
  3964     pagerEnter(pPager);
       
  3965     if( !pPager->aInJournal ){
       
  3966       rc = SQLITE_NOMEM;
       
  3967     }else{
       
  3968       pPager->origDbSize = pPager->dbSize;
       
  3969       rc = writeJournalHdr(pPager);
       
  3970     }
       
  3971   }
       
  3972   assert( !pPager->journalOpen || pPager->journalOff>0 || rc!=SQLITE_OK );
       
  3973   pagerLeave(pPager);
       
  3974   return rc;
       
  3975 }
       
  3976 
       
  3977 /*
       
  3978 ** Make a page dirty.  Set its dirty flag and add it to the dirty
       
  3979 ** page list.
       
  3980 */
       
  3981 static void makeDirty(PgHdr *pPg){
       
  3982   if( pPg->dirty==0 ){
       
  3983     Pager *pPager = pPg->pPager;
       
  3984     pPg->dirty = 1;
       
  3985     pPg->pDirty = pPager->pDirty;
       
  3986     if( pPager->pDirty ){
       
  3987       pPager->pDirty->pPrevDirty = pPg;
       
  3988     }
       
  3989     pPg->pPrevDirty = 0;
       
  3990     pPager->pDirty = pPg;
       
  3991   }
       
  3992 }
       
  3993 
       
  3994 /*
       
  3995 ** Make a page clean.  Clear its dirty bit and remove it from the
       
  3996 ** dirty page list.
       
  3997 */
       
  3998 static void makeClean(PgHdr *pPg){
       
  3999   if( pPg->dirty ){
       
  4000     pPg->dirty = 0;
       
  4001     if( pPg->pDirty ){
       
  4002       assert( pPg->pDirty->pPrevDirty==pPg );
       
  4003       pPg->pDirty->pPrevDirty = pPg->pPrevDirty;
       
  4004     }
       
  4005     if( pPg->pPrevDirty ){
       
  4006       assert( pPg->pPrevDirty->pDirty==pPg );
       
  4007       pPg->pPrevDirty->pDirty = pPg->pDirty;
       
  4008     }else{
       
  4009       assert( pPg->pPager->pDirty==pPg );
       
  4010       pPg->pPager->pDirty = pPg->pDirty;
       
  4011     }
       
  4012   }
       
  4013 }
       
  4014 
       
  4015 
       
  4016 /*
       
  4017 ** Mark a data page as writeable.  The page is written into the journal 
       
  4018 ** if it is not there already.  This routine must be called before making
       
  4019 ** changes to a page.
       
  4020 **
       
  4021 ** The first time this routine is called, the pager creates a new
       
  4022 ** journal and acquires a RESERVED lock on the database.  If the RESERVED
       
  4023 ** lock could not be acquired, this routine returns SQLITE_BUSY.  The
       
  4024 ** calling routine must check for that return value and be careful not to
       
  4025 ** change any page data until this routine returns SQLITE_OK.
       
  4026 **
       
  4027 ** If the journal file could not be written because the disk is full,
       
  4028 ** then this routine returns SQLITE_FULL and does an immediate rollback.
       
  4029 ** All subsequent write attempts also return SQLITE_FULL until there
       
  4030 ** is a call to sqlite3PagerCommit() or sqlite3PagerRollback() to
       
  4031 ** reset.
       
  4032 */
       
  4033 static int pager_write(PgHdr *pPg){
       
  4034   void *pData = PGHDR_TO_DATA(pPg);
       
  4035   Pager *pPager = pPg->pPager;
       
  4036   int rc = SQLITE_OK;
       
  4037 
       
  4038   /* Check for errors
       
  4039   */
       
  4040   if( pPager->errCode ){ 
       
  4041     return pPager->errCode;
       
  4042   }
       
  4043   if( pPager->readOnly ){
       
  4044     return SQLITE_PERM;
       
  4045   }
       
  4046 
       
  4047   assert( !pPager->setMaster );
       
  4048 
       
  4049   CHECK_PAGE(pPg);
       
  4050 
       
  4051   /* If this page was previously acquired with noContent==1, that means
       
  4052   ** we didn't really read in the content of the page.  This can happen
       
  4053   ** (for example) when the page is being moved to the freelist.  But
       
  4054   ** now we are (perhaps) moving the page off of the freelist for
       
  4055   ** reuse and we need to know its original content so that content
       
  4056   ** can be stored in the rollback journal.  So do the read at this
       
  4057   ** time.
       
  4058   */
       
  4059   rc = pager_get_content(pPg);
       
  4060   if( rc ){
       
  4061     return rc;
       
  4062   }
       
  4063 
       
  4064   /* Mark the page as dirty.  If the page has already been written
       
  4065   ** to the journal then we can return right away.
       
  4066   */
       
  4067   makeDirty(pPg);
       
  4068   if( pPg->inJournal && (pageInStatement(pPg) || pPager->stmtInUse==0) ){
       
  4069     pPager->dirtyCache = 1;
       
  4070   }else{
       
  4071 
       
  4072     /* If we get this far, it means that the page needs to be
       
  4073     ** written to the transaction journal or the ckeckpoint journal
       
  4074     ** or both.
       
  4075     **
       
  4076     ** First check to see that the transaction journal exists and
       
  4077     ** create it if it does not.
       
  4078     */
       
  4079     assert( pPager->state!=PAGER_UNLOCK );
       
  4080     rc = sqlite3PagerBegin(pPg, 0);
       
  4081     if( rc!=SQLITE_OK ){
       
  4082       return rc;
       
  4083     }
       
  4084     assert( pPager->state>=PAGER_RESERVED );
       
  4085     if( !pPager->journalOpen && pPager->useJournal ){
       
  4086       rc = pager_open_journal(pPager);
       
  4087       if( rc!=SQLITE_OK ) return rc;
       
  4088     }
       
  4089     assert( pPager->journalOpen || !pPager->useJournal );
       
  4090     pPager->dirtyCache = 1;
       
  4091   
       
  4092     /* The transaction journal now exists and we have a RESERVED or an
       
  4093     ** EXCLUSIVE lock on the main database file.  Write the current page to
       
  4094     ** the transaction journal if it is not there already.
       
  4095     */
       
  4096     if( !pPg->inJournal && (pPager->useJournal || MEMDB) ){
       
  4097       if( (int)pPg->pgno <= pPager->origDbSize ){
       
  4098         if( MEMDB ){
       
  4099           PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
       
  4100           PAGERTRACE3("JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
       
  4101           assert( pHist->pOrig==0 );
       
  4102           pHist->pOrig = (u8*)sqlite3_malloc( pPager->pageSize );
       
  4103           if( !pHist->pOrig ){
       
  4104             return SQLITE_NOMEM;
       
  4105           }
       
  4106           memcpy(pHist->pOrig, PGHDR_TO_DATA(pPg), pPager->pageSize);
       
  4107         }else{
       
  4108           u32 cksum;
       
  4109           char *pData2;
       
  4110 
       
  4111           /* We should never write to the journal file the page that
       
  4112           ** contains the database locks.  The following assert verifies
       
  4113           ** that we do not. */
       
  4114           assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
       
  4115           pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
       
  4116           cksum = pager_cksum(pPager, (u8*)pData2);
       
  4117           rc = write32bits(pPager->jfd, pPager->journalOff, pPg->pgno);
       
  4118           if( rc==SQLITE_OK ){
       
  4119             rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize,
       
  4120                                 pPager->journalOff + 4);
       
  4121             pPager->journalOff += pPager->pageSize+4;
       
  4122           }
       
  4123           if( rc==SQLITE_OK ){
       
  4124             rc = write32bits(pPager->jfd, pPager->journalOff, cksum);
       
  4125             pPager->journalOff += 4;
       
  4126           }
       
  4127           IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, 
       
  4128                    pPager->journalOff, pPager->pageSize));
       
  4129           PAGER_INCR(sqlite3_pager_writej_count);
       
  4130           PAGERTRACE5("JOURNAL %d page %d needSync=%d hash(%08x)\n",
       
  4131                PAGERID(pPager), pPg->pgno, pPg->needSync, pager_pagehash(pPg));
       
  4132 
       
  4133           /* An error has occured writing to the journal file. The 
       
  4134           ** transaction will be rolled back by the layer above.
       
  4135           */
       
  4136           if( rc!=SQLITE_OK ){
       
  4137             return rc;
       
  4138           }
       
  4139 
       
  4140           pPager->nRec++;
       
  4141           assert( pPager->aInJournal!=0 );
       
  4142           pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
       
  4143           pPg->needSync = !pPager->noSync;
       
  4144           if( pPager->stmtInUse ){
       
  4145             pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
       
  4146           }
       
  4147         }
       
  4148       }else{
       
  4149         pPg->needSync = !pPager->journalStarted && !pPager->noSync;
       
  4150         PAGERTRACE4("APPEND %d page %d needSync=%d\n",
       
  4151                 PAGERID(pPager), pPg->pgno, pPg->needSync);
       
  4152       }
       
  4153       if( pPg->needSync ){
       
  4154         pPager->needSync = 1;
       
  4155       }
       
  4156       pPg->inJournal = 1;
       
  4157     }
       
  4158   
       
  4159     /* If the statement journal is open and the page is not in it,
       
  4160     ** then write the current page to the statement journal.  Note that
       
  4161     ** the statement journal format differs from the standard journal format
       
  4162     ** in that it omits the checksums and the header.
       
  4163     */
       
  4164     if( pPager->stmtInUse 
       
  4165      && !pageInStatement(pPg) 
       
  4166      && (int)pPg->pgno<=pPager->stmtSize 
       
  4167     ){
       
  4168       assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
       
  4169       if( MEMDB ){
       
  4170         PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
       
  4171         assert( pHist->pStmt==0 );
       
  4172         pHist->pStmt = (u8*)sqlite3_malloc( pPager->pageSize );
       
  4173         if( pHist->pStmt ){
       
  4174           memcpy(pHist->pStmt, PGHDR_TO_DATA(pPg), pPager->pageSize);
       
  4175         }
       
  4176         PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
       
  4177         page_add_to_stmt_list(pPg);
       
  4178       }else{
       
  4179         i64 offset = pPager->stmtNRec*(4+pPager->pageSize);
       
  4180         char *pData2 = CODEC2(pPager, pData, pPg->pgno, 7);
       
  4181         rc = write32bits(pPager->stfd, offset, pPg->pgno);
       
  4182         if( rc==SQLITE_OK ){
       
  4183           rc = sqlite3OsWrite(pPager->stfd, pData2, pPager->pageSize, offset+4);
       
  4184         }
       
  4185         PAGERTRACE3("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno);
       
  4186         if( rc!=SQLITE_OK ){
       
  4187           return rc;
       
  4188         }
       
  4189         pPager->stmtNRec++;
       
  4190         assert( pPager->aInStmt!=0 );
       
  4191         pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
       
  4192       }
       
  4193     }
       
  4194   }
       
  4195 
       
  4196   /* Update the database size and return.
       
  4197   */
       
  4198   assert( pPager->state>=PAGER_SHARED );
       
  4199   if( pPager->dbSize<(int)pPg->pgno ){
       
  4200     pPager->dbSize = pPg->pgno;
       
  4201     if( !MEMDB && pPager->dbSize==PENDING_BYTE/pPager->pageSize ){
       
  4202       pPager->dbSize++;
       
  4203     }
       
  4204   }
       
  4205   return rc;
       
  4206 }
       
  4207 
       
  4208 /*
       
  4209 ** This function is used to mark a data-page as writable. It uses 
       
  4210 ** pager_write() to open a journal file (if it is not already open)
       
  4211 ** and write the page *pData to the journal.
       
  4212 **
       
  4213 ** The difference between this function and pager_write() is that this
       
  4214 ** function also deals with the special case where 2 or more pages
       
  4215 ** fit on a single disk sector. In this case all co-resident pages
       
  4216 ** must have been written to the journal file before returning.
       
  4217 */
       
  4218 int sqlite3PagerWrite(DbPage *pDbPage){
       
  4219   int rc = SQLITE_OK;
       
  4220 
       
  4221   PgHdr *pPg = pDbPage;
       
  4222   Pager *pPager = pPg->pPager;
       
  4223   Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
       
  4224 
       
  4225   pagerEnter(pPager);
       
  4226   if( !MEMDB && nPagePerSector>1 ){
       
  4227     Pgno nPageCount;          /* Total number of pages in database file */
       
  4228     Pgno pg1;                 /* First page of the sector pPg is located on. */
       
  4229     int nPage;                /* Number of pages starting at pg1 to journal */
       
  4230     int ii;
       
  4231     int needSync = 0;
       
  4232 
       
  4233     /* Set the doNotSync flag to 1. This is because we cannot allow a journal
       
  4234     ** header to be written between the pages journaled by this function.
       
  4235     */
       
  4236     assert( pPager->doNotSync==0 );
       
  4237     pPager->doNotSync = 1;
       
  4238 
       
  4239     /* This trick assumes that both the page-size and sector-size are
       
  4240     ** an integer power of 2. It sets variable pg1 to the identifier
       
  4241     ** of the first page of the sector pPg is located on.
       
  4242     */
       
  4243     pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1;
       
  4244 
       
  4245     nPageCount = sqlite3PagerPagecount(pPager);
       
  4246     if( pPg->pgno>nPageCount ){
       
  4247       nPage = (pPg->pgno - pg1)+1;
       
  4248     }else if( (pg1+nPagePerSector-1)>nPageCount ){
       
  4249       nPage = nPageCount+1-pg1;
       
  4250     }else{
       
  4251       nPage = nPagePerSector;
       
  4252     }
       
  4253     assert(nPage>0);
       
  4254     assert(pg1<=pPg->pgno);
       
  4255     assert((pg1+nPage)>pPg->pgno);
       
  4256 
       
  4257     for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){
       
  4258       Pgno pg = pg1+ii;
       
  4259       PgHdr *pPage;
       
  4260       if( !pPager->aInJournal || pg==pPg->pgno || 
       
  4261           pg>pPager->origDbSize || !(pPager->aInJournal[pg/8]&(1<<(pg&7)))
       
  4262       ) {
       
  4263         if( pg!=PAGER_MJ_PGNO(pPager) ){
       
  4264           rc = sqlite3PagerGet(pPager, pg, &pPage);
       
  4265           if( rc==SQLITE_OK ){
       
  4266             rc = pager_write(pPage);
       
  4267             if( pPage->needSync ){
       
  4268               needSync = 1;
       
  4269             }
       
  4270             sqlite3PagerUnref(pPage);
       
  4271           }
       
  4272         }
       
  4273       }else if( (pPage = pager_lookup(pPager, pg)) ){
       
  4274         if( pPage->needSync ){
       
  4275           needSync = 1;
       
  4276         }
       
  4277       }
       
  4278     }
       
  4279 
       
  4280     /* If the PgHdr.needSync flag is set for any of the nPage pages 
       
  4281     ** starting at pg1, then it needs to be set for all of them. Because
       
  4282     ** writing to any of these nPage pages may damage the others, the
       
  4283     ** journal file must contain sync()ed copies of all of them
       
  4284     ** before any of them can be written out to the database file.
       
  4285     */
       
  4286     if( needSync ){
       
  4287       for(ii=0; ii<nPage && needSync; ii++){
       
  4288         PgHdr *pPage = pager_lookup(pPager, pg1+ii);
       
  4289         if( pPage ) pPage->needSync = 1;
       
  4290       }
       
  4291       assert(pPager->needSync);
       
  4292     }
       
  4293 
       
  4294     assert( pPager->doNotSync==1 );
       
  4295     pPager->doNotSync = 0;
       
  4296   }else{
       
  4297     rc = pager_write(pDbPage);
       
  4298   }
       
  4299   pagerLeave(pPager);
       
  4300   return rc;
       
  4301 }
       
  4302 
       
  4303 /*
       
  4304 ** Return TRUE if the page given in the argument was previously passed
       
  4305 ** to sqlite3PagerWrite().  In other words, return TRUE if it is ok
       
  4306 ** to change the content of the page.
       
  4307 */
       
  4308 #ifndef NDEBUG
       
  4309 int sqlite3PagerIswriteable(DbPage *pPg){
       
  4310   return pPg->dirty;
       
  4311 }
       
  4312 #endif
       
  4313 
       
  4314 #ifndef SQLITE_OMIT_VACUUM
       
  4315 /*
       
  4316 ** Replace the content of a single page with the information in the third
       
  4317 ** argument.
       
  4318 */
       
  4319 int sqlite3PagerOverwrite(Pager *pPager, Pgno pgno, void *pData){
       
  4320   PgHdr *pPg;
       
  4321   int rc;
       
  4322 
       
  4323   pagerEnter(pPager);
       
  4324   rc = sqlite3PagerGet(pPager, pgno, &pPg);
       
  4325   if( rc==SQLITE_OK ){
       
  4326     rc = sqlite3PagerWrite(pPg);
       
  4327     if( rc==SQLITE_OK ){
       
  4328       memcpy(sqlite3PagerGetData(pPg), pData, pPager->pageSize);
       
  4329     }
       
  4330     sqlite3PagerUnref(pPg);
       
  4331   }
       
  4332   pagerLeave(pPager);
       
  4333   return rc;
       
  4334 }
       
  4335 #endif
       
  4336 
       
  4337 /*
       
  4338 ** A call to this routine tells the pager that it is not necessary to
       
  4339 ** write the information on page pPg back to the disk, even though
       
  4340 ** that page might be marked as dirty.
       
  4341 **
       
  4342 ** The overlying software layer calls this routine when all of the data
       
  4343 ** on the given page is unused.  The pager marks the page as clean so
       
  4344 ** that it does not get written to disk.
       
  4345 **
       
  4346 ** Tests show that this optimization, together with the
       
  4347 ** sqlite3PagerDontRollback() below, more than double the speed
       
  4348 ** of large INSERT operations and quadruple the speed of large DELETEs.
       
  4349 **
       
  4350 ** When this routine is called, set the alwaysRollback flag to true.
       
  4351 ** Subsequent calls to sqlite3PagerDontRollback() for the same page
       
  4352 ** will thereafter be ignored.  This is necessary to avoid a problem
       
  4353 ** where a page with data is added to the freelist during one part of
       
  4354 ** a transaction then removed from the freelist during a later part
       
  4355 ** of the same transaction and reused for some other purpose.  When it
       
  4356 ** is first added to the freelist, this routine is called.  When reused,
       
  4357 ** the sqlite3PagerDontRollback() routine is called.  But because the
       
  4358 ** page contains critical data, we still need to be sure it gets
       
  4359 ** rolled back in spite of the sqlite3PagerDontRollback() call.
       
  4360 */
       
  4361 void sqlite3PagerDontWrite(DbPage *pDbPage){
       
  4362   PgHdr *pPg = pDbPage;
       
  4363   Pager *pPager = pPg->pPager;
       
  4364 
       
  4365   if( MEMDB ) return;
       
  4366   pagerEnter(pPager);
       
  4367   pPg->alwaysRollback = 1;
       
  4368   if( pPg->dirty && !pPager->stmtInUse ){
       
  4369     assert( pPager->state>=PAGER_SHARED );
       
  4370     if( pPager->dbSize==(int)pPg->pgno && pPager->origDbSize<pPager->dbSize ){
       
  4371       /* If this pages is the last page in the file and the file has grown
       
  4372       ** during the current transaction, then do NOT mark the page as clean.
       
  4373       ** When the database file grows, we must make sure that the last page
       
  4374       ** gets written at least once so that the disk file will be the correct
       
  4375       ** size. If you do not write this page and the size of the file
       
  4376       ** on the disk ends up being too small, that can lead to database
       
  4377       ** corruption during the next transaction.
       
  4378       */
       
  4379     }else{
       
  4380       PAGERTRACE3("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager));
       
  4381       IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno))
       
  4382       makeClean(pPg);
       
  4383 #ifdef SQLITE_CHECK_PAGES
       
  4384       pPg->pageHash = pager_pagehash(pPg);
       
  4385 #endif
       
  4386     }
       
  4387   }
       
  4388   pagerLeave(pPager);
       
  4389 }
       
  4390 
       
  4391 /*
       
  4392 ** A call to this routine tells the pager that if a rollback occurs,
       
  4393 ** it is not necessary to restore the data on the given page.  This
       
  4394 ** means that the pager does not have to record the given page in the
       
  4395 ** rollback journal.
       
  4396 **
       
  4397 ** If we have not yet actually read the content of this page (if
       
  4398 ** the PgHdr.needRead flag is set) then this routine acts as a promise
       
  4399 ** that we will never need to read the page content in the future.
       
  4400 ** so the needRead flag can be cleared at this point.
       
  4401 */
       
  4402 void sqlite3PagerDontRollback(DbPage *pPg){
       
  4403   Pager *pPager = pPg->pPager;
       
  4404 
       
  4405   pagerEnter(pPager);
       
  4406   assert( pPager->state>=PAGER_RESERVED );
       
  4407   if( pPager->journalOpen==0 ) return;
       
  4408   if( pPg->alwaysRollback || pPager->alwaysRollback || MEMDB ) return;
       
  4409   if( !pPg->inJournal && (int)pPg->pgno <= pPager->origDbSize ){
       
  4410     assert( pPager->aInJournal!=0 );
       
  4411     pPager->aInJournal[pPg->pgno/8] |= 1<<(pPg->pgno&7);
       
  4412     pPg->inJournal = 1;
       
  4413     pPg->needRead = 0;
       
  4414     if( pPager->stmtInUse ){
       
  4415       pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
       
  4416     }
       
  4417     PAGERTRACE3("DONT_ROLLBACK page %d of %d\n", pPg->pgno, PAGERID(pPager));
       
  4418     IOTRACE(("GARBAGE %p %d\n", pPager, pPg->pgno))
       
  4419   }
       
  4420   if( pPager->stmtInUse 
       
  4421    && !pageInStatement(pPg) 
       
  4422    && (int)pPg->pgno<=pPager->stmtSize 
       
  4423   ){
       
  4424     assert( pPg->inJournal || (int)pPg->pgno>pPager->origDbSize );
       
  4425     assert( pPager->aInStmt!=0 );
       
  4426     pPager->aInStmt[pPg->pgno/8] |= 1<<(pPg->pgno&7);
       
  4427   }
       
  4428   pagerLeave(pPager);
       
  4429 }
       
  4430 
       
  4431 
       
  4432 /*
       
  4433 ** This routine is called to increment the database file change-counter,
       
  4434 ** stored at byte 24 of the pager file.
       
  4435 */
       
  4436 static int pager_incr_changecounter(Pager *pPager, int isDirect){
       
  4437   PgHdr *pPgHdr;
       
  4438   u32 change_counter;
       
  4439   int rc = SQLITE_OK;
       
  4440 
       
  4441   if( !pPager->changeCountDone ){
       
  4442     /* Open page 1 of the file for writing. */
       
  4443     rc = sqlite3PagerGet(pPager, 1, &pPgHdr);
       
  4444     if( rc!=SQLITE_OK ) return rc;
       
  4445 
       
  4446     if( !isDirect ){
       
  4447       rc = sqlite3PagerWrite(pPgHdr);
       
  4448       if( rc!=SQLITE_OK ){
       
  4449         sqlite3PagerUnref(pPgHdr);
       
  4450         return rc;
       
  4451       }
       
  4452     }
       
  4453 
       
  4454     /* Increment the value just read and write it back to byte 24. */
       
  4455     change_counter = sqlite3Get4byte((u8*)pPager->dbFileVers);
       
  4456     change_counter++;
       
  4457     put32bits(((char*)PGHDR_TO_DATA(pPgHdr))+24, change_counter);
       
  4458 
       
  4459     if( isDirect && pPager->fd->isOpen ){
       
  4460       const void *zBuf = PGHDR_TO_DATA(pPgHdr);
       
  4461       rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0);
       
  4462     }
       
  4463 
       
  4464     /* Release the page reference. */
       
  4465     sqlite3PagerUnref(pPgHdr);
       
  4466     pPager->changeCountDone = 1;
       
  4467   }
       
  4468   return rc;
       
  4469 }
       
  4470 
       
  4471 /*
       
  4472 ** Sync the database file for the pager pPager. zMaster points to the name
       
  4473 ** of a master journal file that should be written into the individual
       
  4474 ** journal file. zMaster may be NULL, which is interpreted as no master
       
  4475 ** journal (a single database transaction).
       
  4476 **
       
  4477 ** This routine ensures that the journal is synced, all dirty pages written
       
  4478 ** to the database file and the database file synced. The only thing that
       
  4479 ** remains to commit the transaction is to delete the journal file (or
       
  4480 ** master journal file if specified).
       
  4481 **
       
  4482 ** Note that if zMaster==NULL, this does not overwrite a previous value
       
  4483 ** passed to an sqlite3PagerCommitPhaseOne() call.
       
  4484 **
       
  4485 ** If parameter nTrunc is non-zero, then the pager file is truncated to
       
  4486 ** nTrunc pages (this is used by auto-vacuum databases).
       
  4487 */
       
  4488 int sqlite3PagerCommitPhaseOne(Pager *pPager, const char *zMaster, Pgno nTrunc){
       
  4489   int rc = SQLITE_OK;
       
  4490 
       
  4491   PAGERTRACE4("DATABASE SYNC: File=%s zMaster=%s nTrunc=%d\n", 
       
  4492       pPager->zFilename, zMaster, nTrunc);
       
  4493   pagerEnter(pPager);
       
  4494 
       
  4495   /* If this is an in-memory db, or no pages have been written to, or this
       
  4496   ** function has already been called, it is a no-op.
       
  4497   */
       
  4498   if( pPager->state!=PAGER_SYNCED && !MEMDB && pPager->dirtyCache ){
       
  4499     PgHdr *pPg;
       
  4500 
       
  4501 #ifdef SQLITE_ENABLE_ATOMIC_WRITE
       
  4502     /* The atomic-write optimization can be used if all of the
       
  4503     ** following are true:
       
  4504     **
       
  4505     **    + The file-system supports the atomic-write property for
       
  4506     **      blocks of size page-size, and
       
  4507     **    + This commit is not part of a multi-file transaction, and
       
  4508     **    + Exactly one page has been modified and store in the journal file.
       
  4509     **
       
  4510     ** If the optimization can be used, then the journal file will never
       
  4511     ** be created for this transaction.
       
  4512     */
       
  4513     int useAtomicWrite = (
       
  4514         !zMaster && 
       
  4515         pPager->journalOff==jrnlBufferSize(pPager) && 
       
  4516         nTrunc==0 && 
       
  4517         (0==pPager->pDirty || 0==pPager->pDirty->pDirty)
       
  4518     );
       
  4519     if( useAtomicWrite ){
       
  4520       /* Update the nRec field in the journal file. */
       
  4521       int offset = pPager->journalHdr + sizeof(aJournalMagic);
       
  4522       assert(pPager->nRec==1);
       
  4523       rc = write32bits(pPager->jfd, offset, pPager->nRec);
       
  4524 
       
  4525       /* Update the db file change counter. The following call will modify
       
  4526       ** the in-memory representation of page 1 to include the updated
       
  4527       ** change counter and then write page 1 directly to the database
       
  4528       ** file. Because of the atomic-write property of the host file-system, 
       
  4529       ** this is safe.
       
  4530       */
       
  4531       if( rc==SQLITE_OK ){
       
  4532         rc = pager_incr_changecounter(pPager, 1);
       
  4533       }
       
  4534     }else{
       
  4535       rc = sqlite3JournalCreate(pPager->jfd);
       
  4536     }
       
  4537 
       
  4538     if( !useAtomicWrite && rc==SQLITE_OK )
       
  4539 #endif
       
  4540 
       
  4541     /* If a master journal file name has already been written to the
       
  4542     ** journal file, then no sync is required. This happens when it is
       
  4543     ** written, then the process fails to upgrade from a RESERVED to an
       
  4544     ** EXCLUSIVE lock. The next time the process tries to commit the
       
  4545     ** transaction the m-j name will have already been written.
       
  4546     */
       
  4547     if( !pPager->setMaster ){
       
  4548       assert( pPager->journalOpen );
       
  4549       rc = pager_incr_changecounter(pPager, 0);
       
  4550       if( rc!=SQLITE_OK ) goto sync_exit;
       
  4551 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  4552       if( nTrunc!=0 ){
       
  4553         /* If this transaction has made the database smaller, then all pages
       
  4554         ** being discarded by the truncation must be written to the journal
       
  4555         ** file.
       
  4556         */
       
  4557         Pgno i;
       
  4558         int iSkip = PAGER_MJ_PGNO(pPager);
       
  4559         for( i=nTrunc+1; i<=pPager->origDbSize; i++ ){
       
  4560           if( !(pPager->aInJournal[i/8] & (1<<(i&7))) && i!=iSkip ){
       
  4561             rc = sqlite3PagerGet(pPager, i, &pPg);
       
  4562             if( rc!=SQLITE_OK ) goto sync_exit;
       
  4563             rc = sqlite3PagerWrite(pPg);
       
  4564             sqlite3PagerUnref(pPg);
       
  4565             if( rc!=SQLITE_OK ) goto sync_exit;
       
  4566           }
       
  4567         } 
       
  4568       }
       
  4569 #endif
       
  4570       rc = writeMasterJournal(pPager, zMaster);
       
  4571       if( rc!=SQLITE_OK ) goto sync_exit;
       
  4572       rc = syncJournal(pPager);
       
  4573     }
       
  4574     if( rc!=SQLITE_OK ) goto sync_exit;
       
  4575 
       
  4576 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  4577     if( nTrunc!=0 ){
       
  4578       rc = sqlite3PagerTruncate(pPager, nTrunc);
       
  4579       if( rc!=SQLITE_OK ) goto sync_exit;
       
  4580     }
       
  4581 #endif
       
  4582 
       
  4583     /* Write all dirty pages to the database file */
       
  4584     pPg = pager_get_all_dirty_pages(pPager);
       
  4585     rc = pager_write_pagelist(pPg);
       
  4586     if( rc!=SQLITE_OK ){
       
  4587       while( pPg && !pPg->dirty ){ pPg = pPg->pDirty; }
       
  4588       pPager->pDirty = pPg;
       
  4589       goto sync_exit;
       
  4590     }
       
  4591     pPager->pDirty = 0;
       
  4592 
       
  4593     /* Sync the database file. */
       
  4594     if( !pPager->noSync ){
       
  4595       rc = sqlite3OsSync(pPager->fd, pPager->sync_flags);
       
  4596     }
       
  4597     IOTRACE(("DBSYNC %p\n", pPager))
       
  4598 
       
  4599     pPager->state = PAGER_SYNCED;
       
  4600   }else if( MEMDB && nTrunc!=0 ){
       
  4601     rc = sqlite3PagerTruncate(pPager, nTrunc);
       
  4602   }
       
  4603 
       
  4604 sync_exit:
       
  4605   if( rc==SQLITE_IOERR_BLOCKED ){
       
  4606     /* pager_incr_changecounter() may attempt to obtain an exclusive
       
  4607      * lock to spill the cache and return IOERR_BLOCKED. But since 
       
  4608      * there is no chance the cache is inconsistent, it is
       
  4609      * better to return SQLITE_BUSY.
       
  4610      */
       
  4611     rc = SQLITE_BUSY;
       
  4612   }
       
  4613   pagerLeave(pPager);
       
  4614   return rc;
       
  4615 }
       
  4616 
       
  4617 
       
  4618 /*
       
  4619 ** Commit all changes to the database and release the write lock.
       
  4620 **
       
  4621 ** If the commit fails for any reason, a rollback attempt is made
       
  4622 ** and an error code is returned.  If the commit worked, SQLITE_OK
       
  4623 ** is returned.
       
  4624 */
       
  4625 int sqlite3PagerCommitPhaseTwo(Pager *pPager){
       
  4626   int rc;
       
  4627   PgHdr *pPg;
       
  4628 
       
  4629   if( pPager->errCode ){
       
  4630     return pPager->errCode;
       
  4631   }
       
  4632   if( pPager->state<PAGER_RESERVED ){
       
  4633     return SQLITE_ERROR;
       
  4634   }
       
  4635   pagerEnter(pPager);
       
  4636   PAGERTRACE2("COMMIT %d\n", PAGERID(pPager));
       
  4637   if( MEMDB ){
       
  4638     pPg = pager_get_all_dirty_pages(pPager);
       
  4639     while( pPg ){
       
  4640       PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
       
  4641       clearHistory(pHist);
       
  4642       pPg->dirty = 0;
       
  4643       pPg->inJournal = 0;
       
  4644       pHist->inStmt = 0;
       
  4645       pPg->needSync = 0;
       
  4646       pHist->pPrevStmt = pHist->pNextStmt = 0;
       
  4647       pPg = pPg->pDirty;
       
  4648     }
       
  4649     pPager->pDirty = 0;
       
  4650 #ifndef NDEBUG
       
  4651     for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
       
  4652       PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
       
  4653       assert( !pPg->alwaysRollback );
       
  4654       assert( !pHist->pOrig );
       
  4655       assert( !pHist->pStmt );
       
  4656     }
       
  4657 #endif
       
  4658     pPager->pStmt = 0;
       
  4659     pPager->state = PAGER_SHARED;
       
  4660     return SQLITE_OK;
       
  4661   }
       
  4662   assert( pPager->journalOpen || !pPager->dirtyCache );
       
  4663   assert( pPager->state==PAGER_SYNCED || !pPager->dirtyCache );
       
  4664   rc = pager_end_transaction(pPager);
       
  4665   rc = pager_error(pPager, rc);
       
  4666   pagerLeave(pPager);
       
  4667   return rc;
       
  4668 }
       
  4669 
       
  4670 /*
       
  4671 ** Rollback all changes.  The database falls back to PAGER_SHARED mode.
       
  4672 ** All in-memory cache pages revert to their original data contents.
       
  4673 ** The journal is deleted.
       
  4674 **
       
  4675 ** This routine cannot fail unless some other process is not following
       
  4676 ** the correct locking protocol or unless some other
       
  4677 ** process is writing trash into the journal file (SQLITE_CORRUPT) or
       
  4678 ** unless a prior malloc() failed (SQLITE_NOMEM).  Appropriate error
       
  4679 ** codes are returned for all these occasions.  Otherwise,
       
  4680 ** SQLITE_OK is returned.
       
  4681 */
       
  4682 int sqlite3PagerRollback(Pager *pPager){
       
  4683   int rc;
       
  4684   PAGERTRACE2("ROLLBACK %d\n", PAGERID(pPager));
       
  4685   if( MEMDB ){
       
  4686     PgHdr *p;
       
  4687     for(p=pPager->pAll; p; p=p->pNextAll){
       
  4688       PgHistory *pHist;
       
  4689       assert( !p->alwaysRollback );
       
  4690       if( !p->dirty ){
       
  4691         assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pOrig );
       
  4692         assert( !((PgHistory *)PGHDR_TO_HIST(p, pPager))->pStmt );
       
  4693         continue;
       
  4694       }
       
  4695 
       
  4696       pHist = PGHDR_TO_HIST(p, pPager);
       
  4697       if( pHist->pOrig ){
       
  4698         memcpy(PGHDR_TO_DATA(p), pHist->pOrig, pPager->pageSize);
       
  4699         PAGERTRACE3("ROLLBACK-PAGE %d of %d\n", p->pgno, PAGERID(pPager));
       
  4700       }else{
       
  4701         PAGERTRACE3("PAGE %d is clean on %d\n", p->pgno, PAGERID(pPager));
       
  4702       }
       
  4703       clearHistory(pHist);
       
  4704       p->dirty = 0;
       
  4705       p->inJournal = 0;
       
  4706       pHist->inStmt = 0;
       
  4707       pHist->pPrevStmt = pHist->pNextStmt = 0;
       
  4708       if( pPager->xReiniter ){
       
  4709         pPager->xReiniter(p, pPager->pageSize);
       
  4710       }
       
  4711     }
       
  4712     pPager->pDirty = 0;
       
  4713     pPager->pStmt = 0;
       
  4714     pPager->dbSize = pPager->origDbSize;
       
  4715     pager_truncate_cache(pPager);
       
  4716     pPager->stmtInUse = 0;
       
  4717     pPager->state = PAGER_SHARED;
       
  4718     return SQLITE_OK;
       
  4719   }
       
  4720 
       
  4721   pagerEnter(pPager);
       
  4722   if( !pPager->dirtyCache || !pPager->journalOpen ){
       
  4723     rc = pager_end_transaction(pPager);
       
  4724     pagerLeave(pPager);
       
  4725     return rc;
       
  4726   }
       
  4727 
       
  4728   if( pPager->errCode && pPager->errCode!=SQLITE_FULL ){
       
  4729     if( pPager->state>=PAGER_EXCLUSIVE ){
       
  4730       pager_playback(pPager, 0);
       
  4731     }
       
  4732     pagerLeave(pPager);
       
  4733     return pPager->errCode;
       
  4734   }
       
  4735   if( pPager->state==PAGER_RESERVED ){
       
  4736     int rc2;
       
  4737     rc = pager_playback(pPager, 0);
       
  4738     rc2 = pager_end_transaction(pPager);
       
  4739     if( rc==SQLITE_OK ){
       
  4740       rc = rc2;
       
  4741     }
       
  4742   }else{
       
  4743     rc = pager_playback(pPager, 0);
       
  4744   }
       
  4745   /* pager_reset(pPager); */
       
  4746   pPager->dbSize = -1;
       
  4747 
       
  4748   /* If an error occurs during a ROLLBACK, we can no longer trust the pager
       
  4749   ** cache. So call pager_error() on the way out to make any error 
       
  4750   ** persistent.
       
  4751   */
       
  4752   rc = pager_error(pPager, rc);
       
  4753   pagerLeave(pPager);
       
  4754   return rc;
       
  4755 }
       
  4756 
       
  4757 /*
       
  4758 ** Return TRUE if the database file is opened read-only.  Return FALSE
       
  4759 ** if the database is (in theory) writable.
       
  4760 */
       
  4761 int sqlite3PagerIsreadonly(Pager *pPager){
       
  4762   return pPager->readOnly;
       
  4763 }
       
  4764 
       
  4765 /*
       
  4766 ** Return the number of references to the pager.
       
  4767 */
       
  4768 int sqlite3PagerRefcount(Pager *pPager){
       
  4769   return pPager->nRef;
       
  4770 }
       
  4771 
       
  4772 #ifdef SQLITE_TEST
       
  4773 /*
       
  4774 ** This routine is used for testing and analysis only.
       
  4775 */
       
  4776 int *sqlite3PagerStats(Pager *pPager){
       
  4777   static int a[11];
       
  4778   a[0] = pPager->nRef;
       
  4779   a[1] = pPager->nPage;
       
  4780   a[2] = pPager->mxPage;
       
  4781   a[3] = pPager->dbSize;
       
  4782   a[4] = pPager->state;
       
  4783   a[5] = pPager->errCode;
       
  4784   a[6] = pPager->nHit;
       
  4785   a[7] = pPager->nMiss;
       
  4786   a[8] = 0;  /* Used to be pPager->nOvfl */
       
  4787   a[9] = pPager->nRead;
       
  4788   a[10] = pPager->nWrite;
       
  4789   return a;
       
  4790 }
       
  4791 #endif
       
  4792 
       
  4793 /*
       
  4794 ** Set the statement rollback point.
       
  4795 **
       
  4796 ** This routine should be called with the transaction journal already
       
  4797 ** open.  A new statement journal is created that can be used to rollback
       
  4798 ** changes of a single SQL command within a larger transaction.
       
  4799 */
       
  4800 static int pagerStmtBegin(Pager *pPager){
       
  4801   int rc;
       
  4802   assert( !pPager->stmtInUse );
       
  4803   assert( pPager->state>=PAGER_SHARED );
       
  4804   assert( pPager->dbSize>=0 );
       
  4805   PAGERTRACE2("STMT-BEGIN %d\n", PAGERID(pPager));
       
  4806   if( MEMDB ){
       
  4807     pPager->stmtInUse = 1;
       
  4808     pPager->stmtSize = pPager->dbSize;
       
  4809     return SQLITE_OK;
       
  4810   }
       
  4811   if( !pPager->journalOpen ){
       
  4812     pPager->stmtAutoopen = 1;
       
  4813     return SQLITE_OK;
       
  4814   }
       
  4815   assert( pPager->journalOpen );
       
  4816   pagerLeave(pPager);
       
  4817   assert( pPager->aInStmt==0 );
       
  4818   pPager->aInStmt = (u8*)sqlite3MallocZero( pPager->dbSize/8 + 1 );
       
  4819   pagerEnter(pPager);
       
  4820   if( pPager->aInStmt==0 ){
       
  4821     /* sqlite3OsLock(pPager->fd, SHARED_LOCK); */
       
  4822     return SQLITE_NOMEM;
       
  4823   }
       
  4824 #ifndef NDEBUG
       
  4825   rc = sqlite3OsFileSize(pPager->jfd, &pPager->stmtJSize);
       
  4826   if( rc ) goto stmt_begin_failed;
       
  4827   assert( pPager->stmtJSize == pPager->journalOff );
       
  4828 #endif
       
  4829   pPager->stmtJSize = pPager->journalOff;
       
  4830   pPager->stmtSize = pPager->dbSize;
       
  4831   pPager->stmtHdrOff = 0;
       
  4832   pPager->stmtCksum = pPager->cksumInit;
       
  4833   if( !pPager->stmtOpen ){
       
  4834     rc = sqlite3PagerOpentemp(pPager->pVfs, pPager->stfd, pPager->zStmtJrnl,
       
  4835                               SQLITE_OPEN_SUBJOURNAL);
       
  4836     if( rc ){
       
  4837       goto stmt_begin_failed;
       
  4838     }
       
  4839     pPager->stmtOpen = 1;
       
  4840     pPager->stmtNRec = 0;
       
  4841   }
       
  4842   pPager->stmtInUse = 1;
       
  4843   return SQLITE_OK;
       
  4844  
       
  4845 stmt_begin_failed:
       
  4846   if( pPager->aInStmt ){
       
  4847     sqlite3_free(pPager->aInStmt);
       
  4848     pPager->aInStmt = 0;
       
  4849   }
       
  4850   return rc;
       
  4851 }
       
  4852 int sqlite3PagerStmtBegin(Pager *pPager){
       
  4853   int rc;
       
  4854   pagerEnter(pPager);
       
  4855   rc = pagerStmtBegin(pPager);
       
  4856   pagerLeave(pPager);
       
  4857   return rc;
       
  4858 }
       
  4859 
       
  4860 /*
       
  4861 ** Commit a statement.
       
  4862 */
       
  4863 int sqlite3PagerStmtCommit(Pager *pPager){
       
  4864   pagerEnter(pPager);
       
  4865   if( pPager->stmtInUse ){
       
  4866     PgHdr *pPg, *pNext;
       
  4867     PAGERTRACE2("STMT-COMMIT %d\n", PAGERID(pPager));
       
  4868     if( !MEMDB ){
       
  4869       /* sqlite3OsTruncate(pPager->stfd, 0); */
       
  4870       sqlite3_free( pPager->aInStmt );
       
  4871       pPager->aInStmt = 0;
       
  4872     }else{
       
  4873       for(pPg=pPager->pStmt; pPg; pPg=pNext){
       
  4874         PgHistory *pHist = PGHDR_TO_HIST(pPg, pPager);
       
  4875         pNext = pHist->pNextStmt;
       
  4876         assert( pHist->inStmt );
       
  4877         pHist->inStmt = 0;
       
  4878         pHist->pPrevStmt = pHist->pNextStmt = 0;
       
  4879         sqlite3_free(pHist->pStmt);
       
  4880         pHist->pStmt = 0;
       
  4881       }
       
  4882     }
       
  4883     pPager->stmtNRec = 0;
       
  4884     pPager->stmtInUse = 0;
       
  4885     pPager->pStmt = 0;
       
  4886   }
       
  4887   pPager->stmtAutoopen = 0;
       
  4888   pagerLeave(pPager);
       
  4889   return SQLITE_OK;
       
  4890 }
       
  4891 
       
  4892 /*
       
  4893 ** Rollback a statement.
       
  4894 */
       
  4895 int sqlite3PagerStmtRollback(Pager *pPager){
       
  4896   int rc;
       
  4897   pagerEnter(pPager);
       
  4898   if( pPager->stmtInUse ){
       
  4899     PAGERTRACE2("STMT-ROLLBACK %d\n", PAGERID(pPager));
       
  4900     if( MEMDB ){
       
  4901       PgHdr *pPg;
       
  4902       PgHistory *pHist;
       
  4903       for(pPg=pPager->pStmt; pPg; pPg=pHist->pNextStmt){
       
  4904         pHist = PGHDR_TO_HIST(pPg, pPager);
       
  4905         if( pHist->pStmt ){
       
  4906           memcpy(PGHDR_TO_DATA(pPg), pHist->pStmt, pPager->pageSize);
       
  4907           sqlite3_free(pHist->pStmt);
       
  4908           pHist->pStmt = 0;
       
  4909         }
       
  4910       }
       
  4911       pPager->dbSize = pPager->stmtSize;
       
  4912       pager_truncate_cache(pPager);
       
  4913       rc = SQLITE_OK;
       
  4914     }else{
       
  4915       rc = pager_stmt_playback(pPager);
       
  4916     }
       
  4917     sqlite3PagerStmtCommit(pPager);
       
  4918   }else{
       
  4919     rc = SQLITE_OK;
       
  4920   }
       
  4921   pPager->stmtAutoopen = 0;
       
  4922   pagerLeave(pPager);
       
  4923   return rc;
       
  4924 }
       
  4925 
       
  4926 /*
       
  4927 ** Return the full pathname of the database file.
       
  4928 */
       
  4929 const char *sqlite3PagerFilename(Pager *pPager){
       
  4930   return pPager->zFilename;
       
  4931 }
       
  4932 
       
  4933 /*
       
  4934 ** Return the VFS structure for the pager.
       
  4935 */
       
  4936 const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){
       
  4937   return pPager->pVfs;
       
  4938 }
       
  4939 
       
  4940 /*
       
  4941 ** Return the file handle for the database file associated
       
  4942 ** with the pager.  This might return NULL if the file has
       
  4943 ** not yet been opened.
       
  4944 */
       
  4945 sqlite3_file *sqlite3PagerFile(Pager *pPager){
       
  4946   return pPager->fd;
       
  4947 }
       
  4948 
       
  4949 /*
       
  4950 ** Return the directory of the database file.
       
  4951 */
       
  4952 const char *sqlite3PagerDirname(Pager *pPager){
       
  4953   return pPager->zDirectory;
       
  4954 }
       
  4955 
       
  4956 /*
       
  4957 ** Return the full pathname of the journal file.
       
  4958 */
       
  4959 const char *sqlite3PagerJournalname(Pager *pPager){
       
  4960   return pPager->zJournal;
       
  4961 }
       
  4962 
       
  4963 /*
       
  4964 ** Return true if fsync() calls are disabled for this pager.  Return FALSE
       
  4965 ** if fsync()s are executed normally.
       
  4966 */
       
  4967 int sqlite3PagerNosync(Pager *pPager){
       
  4968   return pPager->noSync;
       
  4969 }
       
  4970 
       
  4971 #ifdef SQLITE_HAS_CODEC
       
  4972 /*
       
  4973 ** Set the codec for this pager
       
  4974 */
       
  4975 void sqlite3PagerSetCodec(
       
  4976   Pager *pPager,
       
  4977   void *(*xCodec)(void*,void*,Pgno,int),
       
  4978   void *pCodecArg
       
  4979 ){
       
  4980   pPager->xCodec = xCodec;
       
  4981   pPager->pCodecArg = pCodecArg;
       
  4982 }
       
  4983 #endif
       
  4984 
       
  4985 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  4986 /*
       
  4987 ** Move the page pPg to location pgno in the file. 
       
  4988 **
       
  4989 ** There must be no references to the page previously located at
       
  4990 ** pgno (which we call pPgOld) though that page is allowed to be
       
  4991 ** in cache.  If the page previous located at pgno is not already
       
  4992 ** in the rollback journal, it is not put there by by this routine.
       
  4993 **
       
  4994 ** References to the page pPg remain valid. Updating any
       
  4995 ** meta-data associated with pPg (i.e. data stored in the nExtra bytes
       
  4996 ** allocated along with the page) is the responsibility of the caller.
       
  4997 **
       
  4998 ** A transaction must be active when this routine is called. It used to be
       
  4999 ** required that a statement transaction was not active, but this restriction
       
  5000 ** has been removed (CREATE INDEX needs to move a page when a statement
       
  5001 ** transaction is active).
       
  5002 */
       
  5003 int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno){
       
  5004   PgHdr *pPgOld;  /* The page being overwritten. */
       
  5005   int h;
       
  5006   Pgno needSyncPgno = 0;
       
  5007 
       
  5008   pagerEnter(pPager);
       
  5009   assert( pPg->nRef>0 );
       
  5010 
       
  5011   PAGERTRACE5("MOVE %d page %d (needSync=%d) moves to %d\n", 
       
  5012       PAGERID(pPager), pPg->pgno, pPg->needSync, pgno);
       
  5013   IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno))
       
  5014 
       
  5015   pager_get_content(pPg);
       
  5016   if( pPg->needSync ){
       
  5017     needSyncPgno = pPg->pgno;
       
  5018     assert( pPg->inJournal || (int)pgno>pPager->origDbSize );
       
  5019     assert( pPg->dirty );
       
  5020     assert( pPager->needSync );
       
  5021   }
       
  5022 
       
  5023   /* Unlink pPg from its hash-chain */
       
  5024   unlinkHashChain(pPager, pPg);
       
  5025 
       
  5026   /* If the cache contains a page with page-number pgno, remove it
       
  5027   ** from its hash chain. Also, if the PgHdr.needSync was set for 
       
  5028   ** page pgno before the 'move' operation, it needs to be retained 
       
  5029   ** for the page moved there.
       
  5030   */
       
  5031   pPg->needSync = 0;
       
  5032   pPgOld = pager_lookup(pPager, pgno);
       
  5033   if( pPgOld ){
       
  5034     assert( pPgOld->nRef==0 );
       
  5035     unlinkHashChain(pPager, pPgOld);
       
  5036     makeClean(pPgOld);
       
  5037     pPg->needSync = pPgOld->needSync;
       
  5038   }else{
       
  5039     pPg->needSync = 0;
       
  5040   }
       
  5041   if( pPager->aInJournal && (int)pgno<=pPager->origDbSize ){
       
  5042     pPg->inJournal =  (pPager->aInJournal[pgno/8] & (1<<(pgno&7)))!=0;
       
  5043   }else{
       
  5044     pPg->inJournal = 0;
       
  5045     assert( pPg->needSync==0 || (int)pgno>pPager->origDbSize );
       
  5046   }
       
  5047 
       
  5048   /* Change the page number for pPg and insert it into the new hash-chain. */
       
  5049   assert( pgno!=0 );
       
  5050   pPg->pgno = pgno;
       
  5051   h = pgno & (pPager->nHash-1);
       
  5052   if( pPager->aHash[h] ){
       
  5053     assert( pPager->aHash[h]->pPrevHash==0 );
       
  5054     pPager->aHash[h]->pPrevHash = pPg;
       
  5055   }
       
  5056   pPg->pNextHash = pPager->aHash[h];
       
  5057   pPager->aHash[h] = pPg;
       
  5058   pPg->pPrevHash = 0;
       
  5059 
       
  5060   makeDirty(pPg);
       
  5061   pPager->dirtyCache = 1;
       
  5062 
       
  5063   if( needSyncPgno ){
       
  5064     /* If needSyncPgno is non-zero, then the journal file needs to be 
       
  5065     ** sync()ed before any data is written to database file page needSyncPgno.
       
  5066     ** Currently, no such page exists in the page-cache and the 
       
  5067     ** Pager.aInJournal bit has been set. This needs to be remedied by loading
       
  5068     ** the page into the pager-cache and setting the PgHdr.needSync flag.
       
  5069     **
       
  5070     ** The sqlite3PagerGet() call may cause the journal to sync. So make
       
  5071     ** sure the Pager.needSync flag is set too.
       
  5072     */
       
  5073     int rc;
       
  5074     PgHdr *pPgHdr;
       
  5075     assert( pPager->needSync );
       
  5076     rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr);
       
  5077     if( rc!=SQLITE_OK ) return rc;
       
  5078     pPager->needSync = 1;
       
  5079     pPgHdr->needSync = 1;
       
  5080     pPgHdr->inJournal = 1;
       
  5081     makeDirty(pPgHdr);
       
  5082     sqlite3PagerUnref(pPgHdr);
       
  5083   }
       
  5084 
       
  5085   pagerLeave(pPager);
       
  5086   return SQLITE_OK;
       
  5087 }
       
  5088 #endif
       
  5089 
       
  5090 /*
       
  5091 ** Return a pointer to the data for the specified page.
       
  5092 */
       
  5093 void *sqlite3PagerGetData(DbPage *pPg){
       
  5094   return PGHDR_TO_DATA(pPg);
       
  5095 }
       
  5096 
       
  5097 /*
       
  5098 ** Return a pointer to the Pager.nExtra bytes of "extra" space 
       
  5099 ** allocated along with the specified page.
       
  5100 */
       
  5101 void *sqlite3PagerGetExtra(DbPage *pPg){
       
  5102   Pager *pPager = pPg->pPager;
       
  5103   return (pPager?PGHDR_TO_EXTRA(pPg, pPager):0);
       
  5104 }
       
  5105 
       
  5106 /*
       
  5107 ** Get/set the locking-mode for this pager. Parameter eMode must be one
       
  5108 ** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or 
       
  5109 ** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then
       
  5110 ** the locking-mode is set to the value specified.
       
  5111 **
       
  5112 ** The returned value is either PAGER_LOCKINGMODE_NORMAL or
       
  5113 ** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated)
       
  5114 ** locking-mode.
       
  5115 */
       
  5116 int sqlite3PagerLockingMode(Pager *pPager, int eMode){
       
  5117   assert( eMode==PAGER_LOCKINGMODE_QUERY
       
  5118             || eMode==PAGER_LOCKINGMODE_NORMAL
       
  5119             || eMode==PAGER_LOCKINGMODE_EXCLUSIVE );
       
  5120   assert( PAGER_LOCKINGMODE_QUERY<0 );
       
  5121   assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 );
       
  5122   if( eMode>=0 && !pPager->tempFile ){
       
  5123     pPager->exclusiveMode = eMode;
       
  5124   }
       
  5125   return (int)pPager->exclusiveMode;
       
  5126 }
       
  5127 
       
  5128 #ifdef SQLITE_TEST
       
  5129 /*
       
  5130 ** Print a listing of all referenced pages and their ref count.
       
  5131 */
       
  5132 void sqlite3PagerRefdump(Pager *pPager){
       
  5133   PgHdr *pPg;
       
  5134   for(pPg=pPager->pAll; pPg; pPg=pPg->pNextAll){
       
  5135     if( pPg->nRef<=0 ) continue;
       
  5136     sqlite3DebugPrintf("PAGE %3d addr=%p nRef=%d\n", 
       
  5137        pPg->pgno, PGHDR_TO_DATA(pPg), pPg->nRef);
       
  5138   }
       
  5139 }
       
  5140 #endif
       
  5141 
       
  5142 #endif /* SQLITE_OMIT_DISKIO */