webengine/webkitutils/SqliteSymbian/btree.c
changeset 0 dd21522fd290
equal deleted inserted replaced
-1:000000000000 0:dd21522fd290
       
     1 /*
       
     2 ** 2004 April 6
       
     3 **
       
     4 ** The author disclaims copyright to this source code.  In place of
       
     5 ** a legal notice, here is a blessing:
       
     6 **
       
     7 **    May you do good and not evil.
       
     8 **    May you find forgiveness for yourself and forgive others.
       
     9 **    May you share freely, never taking more than you give.
       
    10 **
       
    11 *************************************************************************
       
    12 ** $Id: btree.c,v 1.328 2006/08/16 16:42:48 drh Exp $
       
    13 **
       
    14 ** This file implements a external (disk-based) database using BTrees.
       
    15 ** For a detailed discussion of BTrees, refer to
       
    16 **
       
    17 **     Donald E. Knuth, THE ART OF COMPUTER PROGRAMMING, Volume 3:
       
    18 **     "Sorting And Searching", pages 473-480. Addison-Wesley
       
    19 **     Publishing Company, Reading, Massachusetts.
       
    20 **
       
    21 ** The basic idea is that each page of the file contains N database
       
    22 ** entries and N+1 pointers to subpages.
       
    23 **
       
    24 **   ----------------------------------------------------------------
       
    25 **   |  Ptr(0) | Key(0) | Ptr(1) | Key(1) | ... | Key(N) | Ptr(N+1) |
       
    26 **   ----------------------------------------------------------------
       
    27 **
       
    28 ** All of the keys on the page that Ptr(0) points to have values less
       
    29 ** than Key(0).  All of the keys on page Ptr(1) and its subpages have
       
    30 ** values greater than Key(0) and less than Key(1).  All of the keys
       
    31 ** on Ptr(N+1) and its subpages have values greater than Key(N).  And
       
    32 ** so forth.
       
    33 **
       
    34 ** Finding a particular key requires reading O(log(M)) pages from the 
       
    35 ** disk where M is the number of entries in the tree.
       
    36 **
       
    37 ** In this implementation, a single file can hold one or more separate 
       
    38 ** BTrees.  Each BTree is identified by the index of its root page.  The
       
    39 ** key and data for any entry are combined to form the "payload".  A
       
    40 ** fixed amount of payload can be carried directly on the database
       
    41 ** page.  If the payload is larger than the preset amount then surplus
       
    42 ** bytes are stored on overflow pages.  The payload for an entry
       
    43 ** and the preceding pointer are combined to form a "Cell".  Each 
       
    44 ** page has a small header which contains the Ptr(N+1) pointer and other
       
    45 ** information such as the size of key and data.
       
    46 **
       
    47 ** FORMAT DETAILS
       
    48 **
       
    49 ** The file is divided into pages.  The first page is called page 1,
       
    50 ** the second is page 2, and so forth.  A page number of zero indicates
       
    51 ** "no such page".  The page size can be anything between 512 and 65536.
       
    52 ** Each page can be either a btree page, a freelist page or an overflow
       
    53 ** page.
       
    54 **
       
    55 ** The first page is always a btree page.  The first 100 bytes of the first
       
    56 ** page contain a special header (the "file header") that describes the file.
       
    57 ** The format of the file header is as follows:
       
    58 **
       
    59 **   OFFSET   SIZE    DESCRIPTION
       
    60 **      0      16     Header string: "SQLite format 3\000"
       
    61 **     16       2     Page size in bytes.  
       
    62 **     18       1     File format write version
       
    63 **     19       1     File format read version
       
    64 **     20       1     Bytes of unused space at the end of each page
       
    65 **     21       1     Max embedded payload fraction
       
    66 **     22       1     Min embedded payload fraction
       
    67 **     23       1     Min leaf payload fraction
       
    68 **     24       4     File change counter
       
    69 **     28       4     Reserved for future use
       
    70 **     32       4     First freelist page
       
    71 **     36       4     Number of freelist pages in the file
       
    72 **     40      60     15 4-byte meta values passed to higher layers
       
    73 **
       
    74 ** All of the integer values are big-endian (most significant byte first).
       
    75 **
       
    76 ** The file change counter is incremented when the database is changed more
       
    77 ** than once within the same second.  This counter, together with the
       
    78 ** modification time of the file, allows other processes to know
       
    79 ** when the file has changed and thus when they need to flush their
       
    80 ** cache.
       
    81 **
       
    82 ** The max embedded payload fraction is the amount of the total usable
       
    83 ** space in a page that can be consumed by a single cell for standard
       
    84 ** B-tree (non-LEAFDATA) tables.  A value of 255 means 100%.  The default
       
    85 ** is to limit the maximum cell size so that at least 4 cells will fit
       
    86 ** on one page.  Thus the default max embedded payload fraction is 64.
       
    87 **
       
    88 ** If the payload for a cell is larger than the max payload, then extra
       
    89 ** payload is spilled to overflow pages.  Once an overflow page is allocated,
       
    90 ** as many bytes as possible are moved into the overflow pages without letting
       
    91 ** the cell size drop below the min embedded payload fraction.
       
    92 **
       
    93 ** The min leaf payload fraction is like the min embedded payload fraction
       
    94 ** except that it applies to leaf nodes in a LEAFDATA tree.  The maximum
       
    95 ** payload fraction for a LEAFDATA tree is always 100% (or 255) and it
       
    96 ** not specified in the header.
       
    97 **
       
    98 ** Each btree pages is divided into three sections:  The header, the
       
    99 ** cell pointer array, and the cell area area.  Page 1 also has a 100-byte
       
   100 ** file header that occurs before the page header.
       
   101 **
       
   102 **      |----------------|
       
   103 **      | file header    |   100 bytes.  Page 1 only.
       
   104 **      |----------------|
       
   105 **      | page header    |   8 bytes for leaves.  12 bytes for interior nodes
       
   106 **      |----------------|
       
   107 **      | cell pointer   |   |  2 bytes per cell.  Sorted order.
       
   108 **      | array          |   |  Grows downward
       
   109 **      |                |   v
       
   110 **      |----------------|
       
   111 **      | unallocated    |
       
   112 **      | space          |
       
   113 **      |----------------|   ^  Grows upwards
       
   114 **      | cell content   |   |  Arbitrary order interspersed with freeblocks.
       
   115 **      | area           |   |  and free space fragments.
       
   116 **      |----------------|
       
   117 **
       
   118 ** The page headers looks like this:
       
   119 **
       
   120 **   OFFSET   SIZE     DESCRIPTION
       
   121 **      0       1      Flags. 1: intkey, 2: zerodata, 4: leafdata, 8: leaf
       
   122 **      1       2      byte offset to the first freeblock
       
   123 **      3       2      number of cells on this page
       
   124 **      5       2      first byte of the cell content area
       
   125 **      7       1      number of fragmented free bytes
       
   126 **      8       4      Right child (the Ptr(N+1) value).  Omitted on leaves.
       
   127 **
       
   128 ** The flags define the format of this btree page.  The leaf flag means that
       
   129 ** this page has no children.  The zerodata flag means that this page carries
       
   130 ** only keys and no data.  The intkey flag means that the key is a integer
       
   131 ** which is stored in the key size entry of the cell header rather than in
       
   132 ** the payload area.
       
   133 **
       
   134 ** The cell pointer array begins on the first byte after the page header.
       
   135 ** The cell pointer array contains zero or more 2-byte numbers which are
       
   136 ** offsets from the beginning of the page to the cell content in the cell
       
   137 ** content area.  The cell pointers occur in sorted order.  The system strives
       
   138 ** to keep free space after the last cell pointer so that new cells can
       
   139 ** be easily added without having to defragment the page.
       
   140 **
       
   141 ** Cell content is stored at the very end of the page and grows toward the
       
   142 ** beginning of the page.
       
   143 **
       
   144 ** Unused space within the cell content area is collected into a linked list of
       
   145 ** freeblocks.  Each freeblock is at least 4 bytes in size.  The byte offset
       
   146 ** to the first freeblock is given in the header.  Freeblocks occur in
       
   147 ** increasing order.  Because a freeblock must be at least 4 bytes in size,
       
   148 ** any group of 3 or fewer unused bytes in the cell content area cannot
       
   149 ** exist on the freeblock chain.  A group of 3 or fewer free bytes is called
       
   150 ** a fragment.  The total number of bytes in all fragments is recorded.
       
   151 ** in the page header at offset 7.
       
   152 **
       
   153 **    SIZE    DESCRIPTION
       
   154 **      2     Byte offset of the next freeblock
       
   155 **      2     Bytes in this freeblock
       
   156 **
       
   157 ** Cells are of variable length.  Cells are stored in the cell content area at
       
   158 ** the end of the page.  Pointers to the cells are in the cell pointer array
       
   159 ** that immediately follows the page header.  Cells is not necessarily
       
   160 ** contiguous or in order, but cell pointers are contiguous and in order.
       
   161 **
       
   162 ** Cell content makes use of variable length integers.  A variable
       
   163 ** length integer is 1 to 9 bytes where the lower 7 bits of each 
       
   164 ** byte are used.  The integer consists of all bytes that have bit 8 set and
       
   165 ** the first byte with bit 8 clear.  The most significant byte of the integer
       
   166 ** appears first.  A variable-length integer may not be more than 9 bytes long.
       
   167 ** As a special case, all 8 bytes of the 9th byte are used as data.  This
       
   168 ** allows a 64-bit integer to be encoded in 9 bytes.
       
   169 **
       
   170 **    0x00                      becomes  0x00000000
       
   171 **    0x7f                      becomes  0x0000007f
       
   172 **    0x81 0x00                 becomes  0x00000080
       
   173 **    0x82 0x00                 becomes  0x00000100
       
   174 **    0x80 0x7f                 becomes  0x0000007f
       
   175 **    0x8a 0x91 0xd1 0xac 0x78  becomes  0x12345678
       
   176 **    0x81 0x81 0x81 0x81 0x01  becomes  0x10204081
       
   177 **
       
   178 ** Variable length integers are used for rowids and to hold the number of
       
   179 ** bytes of key and data in a btree cell.
       
   180 **
       
   181 ** The content of a cell looks like this:
       
   182 **
       
   183 **    SIZE    DESCRIPTION
       
   184 **      4     Page number of the left child. Omitted if leaf flag is set.
       
   185 **     var    Number of bytes of data. Omitted if the zerodata flag is set.
       
   186 **     var    Number of bytes of key. Or the key itself if intkey flag is set.
       
   187 **      *     Payload
       
   188 **      4     First page of the overflow chain.  Omitted if no overflow
       
   189 **
       
   190 ** Overflow pages form a linked list.  Each page except the last is completely
       
   191 ** filled with data (pagesize - 4 bytes).  The last page can have as little
       
   192 ** as 1 byte of data.
       
   193 **
       
   194 **    SIZE    DESCRIPTION
       
   195 **      4     Page number of next overflow page
       
   196 **      *     Data
       
   197 **
       
   198 ** Freelist pages come in two subtypes: trunk pages and leaf pages.  The
       
   199 ** file header points to first in a linked list of trunk page.  Each trunk
       
   200 ** page points to multiple leaf pages.  The content of a leaf page is
       
   201 ** unspecified.  A trunk page looks like this:
       
   202 **
       
   203 **    SIZE    DESCRIPTION
       
   204 **      4     Page number of next trunk page
       
   205 **      4     Number of leaf pointers on this page
       
   206 **      *     zero or more pages numbers of leaves
       
   207 */
       
   208 #include "sqliteInt.h"
       
   209 #include "pager.h"
       
   210 #include "btree.h"
       
   211 #include "os.h"
       
   212 #include <assert.h>
       
   213 
       
   214 /* Round up a number to the next larger multiple of 8.  This is used
       
   215 ** to force 8-byte alignment on 64-bit architectures.
       
   216 */
       
   217 #define ROUND8(x)   ((x+7)&~7)
       
   218 
       
   219 
       
   220 /* The following value is the maximum cell size assuming a maximum page
       
   221 ** size give above.
       
   222 */
       
   223 #define MX_CELL_SIZE(pBt)  (pBt->pageSize-8)
       
   224 
       
   225 /* The maximum number of cells on a single page of the database.  This
       
   226 ** assumes a minimum cell size of 3 bytes.  Such small cells will be
       
   227 ** exceedingly rare, but they are possible.
       
   228 */
       
   229 #define MX_CELL(pBt) ((pBt->pageSize-8)/3)
       
   230 
       
   231 /* Forward declarations */
       
   232 typedef struct MemPage MemPage;
       
   233 typedef struct BtLock BtLock;
       
   234 
       
   235 /*
       
   236 ** This is a magic string that appears at the beginning of every
       
   237 ** SQLite database in order to identify the file as a real database.
       
   238 **
       
   239 ** You can change this value at compile-time by specifying a
       
   240 ** -DSQLITE_FILE_HEADER="..." on the compiler command-line.  The
       
   241 ** header must be exactly 16 bytes including the zero-terminator so
       
   242 ** the string itself should be 15 characters long.  If you change
       
   243 ** the header, then your custom library will not be able to read 
       
   244 ** databases generated by the standard tools and the standard tools
       
   245 ** will not be able to read databases created by your custom library.
       
   246 */
       
   247 #ifndef SQLITE_FILE_HEADER /* 123456789 123456 */
       
   248 #  define SQLITE_FILE_HEADER "SQLite format 3"
       
   249 #endif
       
   250 static const char zMagicHeader[] = SQLITE_FILE_HEADER;
       
   251 
       
   252 /*
       
   253 ** Page type flags.  An ORed combination of these flags appear as the
       
   254 ** first byte of every BTree page.
       
   255 */
       
   256 #define PTF_INTKEY    0x01
       
   257 #define PTF_ZERODATA  0x02
       
   258 #define PTF_LEAFDATA  0x04
       
   259 #define PTF_LEAF      0x08
       
   260 
       
   261 /*
       
   262 ** As each page of the file is loaded into memory, an instance of the following
       
   263 ** structure is appended and initialized to zero.  This structure stores
       
   264 ** information about the page that is decoded from the raw file page.
       
   265 **
       
   266 ** The pParent field points back to the parent page.  This allows us to
       
   267 ** walk up the BTree from any leaf to the root.  Care must be taken to
       
   268 ** unref() the parent page pointer when this page is no longer referenced.
       
   269 ** The pageDestructor() routine handles that chore.
       
   270 */
       
   271 struct MemPage {
       
   272   u8 isInit;           /* True if previously initialized. MUST BE FIRST! */
       
   273   u8 idxShift;         /* True if Cell indices have changed */
       
   274   u8 nOverflow;        /* Number of overflow cell bodies in aCell[] */
       
   275   u8 intKey;           /* True if intkey flag is set */
       
   276   u8 leaf;             /* True if leaf flag is set */
       
   277   u8 zeroData;         /* True if table stores keys only */
       
   278   u8 leafData;         /* True if tables stores data on leaves only */
       
   279   u8 hasData;          /* True if this page stores data */
       
   280   u8 hdrOffset;        /* 100 for page 1.  0 otherwise */
       
   281   u8 childPtrSize;     /* 0 if leaf==1.  4 if leaf==0 */
       
   282   u16 maxLocal;        /* Copy of Btree.maxLocal or Btree.maxLeaf */
       
   283   u16 minLocal;        /* Copy of Btree.minLocal or Btree.minLeaf */
       
   284   u16 cellOffset;      /* Index in aData of first cell pointer */
       
   285   u16 idxParent;       /* Index in parent of this node */
       
   286   u16 nFree;           /* Number of free bytes on the page */
       
   287   u16 nCell;           /* Number of cells on this page, local and ovfl */
       
   288   struct _OvflCell {   /* Cells that will not fit on aData[] */
       
   289     u8 *pCell;          /* Pointers to the body of the overflow cell */
       
   290     u16 idx;            /* Insert this cell before idx-th non-overflow cell */
       
   291   } aOvfl[5];
       
   292   BtShared *pBt;       /* Pointer back to BTree structure */
       
   293   u8 *aData;           /* Pointer back to the start of the page */
       
   294   Pgno pgno;           /* Page number for this page */
       
   295   MemPage *pParent;    /* The parent of this page.  NULL for root */
       
   296 };
       
   297 
       
   298 /*
       
   299 ** The in-memory image of a disk page has the auxiliary information appended
       
   300 ** to the end.  EXTRA_SIZE is the number of bytes of space needed to hold
       
   301 ** that extra information.
       
   302 */
       
   303 #define EXTRA_SIZE sizeof(MemPage)
       
   304 
       
   305 /* Btree handle */
       
   306 struct Btree {
       
   307   sqlite3 *pSqlite;
       
   308   BtShared *pBt;
       
   309   u8 inTrans;            /* TRANS_NONE, TRANS_READ or TRANS_WRITE */
       
   310 };
       
   311 
       
   312 /*
       
   313 ** Btree.inTrans may take one of the following values.
       
   314 **
       
   315 ** If the shared-data extension is enabled, there may be multiple users
       
   316 ** of the Btree structure. At most one of these may open a write transaction,
       
   317 ** but any number may have active read transactions. Variable Btree.pDb 
       
   318 ** points to the handle that owns any current write-transaction.
       
   319 */
       
   320 #define TRANS_NONE  0
       
   321 #define TRANS_READ  1
       
   322 #define TRANS_WRITE 2
       
   323 
       
   324 /*
       
   325 ** Everything we need to know about an open database
       
   326 */
       
   327 struct BtShared {
       
   328   Pager *pPager;        /* The page cache */
       
   329   BtCursor *pCursor;    /* A list of all open cursors */
       
   330   MemPage *pPage1;      /* First page of the database */
       
   331   u8 inStmt;            /* True if we are in a statement subtransaction */
       
   332   u8 readOnly;          /* True if the underlying file is readonly */
       
   333   u8 maxEmbedFrac;      /* Maximum payload as % of total page size */
       
   334   u8 minEmbedFrac;      /* Minimum payload as % of total page size */
       
   335   u8 minLeafFrac;       /* Minimum leaf payload as % of total page size */
       
   336   u8 pageSizeFixed;     /* True if the page size can no longer be changed */
       
   337 #ifndef SQLITE_OMIT_AUTOVACUUM
       
   338   u8 autoVacuum;        /* True if database supports auto-vacuum */
       
   339 #endif
       
   340   u16 pageSize;         /* Total number of bytes on a page */
       
   341   u16 usableSize;       /* Number of usable bytes on each page */
       
   342   int maxLocal;         /* Maximum local payload in non-LEAFDATA tables */
       
   343   int minLocal;         /* Minimum local payload in non-LEAFDATA tables */
       
   344   int maxLeaf;          /* Maximum local payload in a LEAFDATA table */
       
   345   int minLeaf;          /* Minimum local payload in a LEAFDATA table */
       
   346   BusyHandler *pBusyHandler;   /* Callback for when there is lock contention */
       
   347   u8 inTransaction;     /* Transaction state */
       
   348   int nRef;             /* Number of references to this structure */
       
   349   int nTransaction;     /* Number of open transactions (read + write) */
       
   350   void *pSchema;        /* Pointer to space allocated by sqlite3BtreeSchema() */
       
   351   void (*xFreeSchema)(void*);  /* Destructor for BtShared.pSchema */
       
   352 #ifndef SQLITE_OMIT_SHARED_CACHE
       
   353   BtLock *pLock;        /* List of locks held on this shared-btree struct */
       
   354   BtShared *pNext;      /* Next in ThreadData.pBtree linked list */
       
   355 #endif
       
   356 };
       
   357 
       
   358 /*
       
   359 ** An instance of the following structure is used to hold information
       
   360 ** about a cell.  The parseCellPtr() function fills in this structure
       
   361 ** based on information extract from the raw disk page.
       
   362 */
       
   363 typedef struct CellInfo CellInfo;
       
   364 struct CellInfo {
       
   365   u8 *pCell;     /* Pointer to the start of cell content */
       
   366   i64 nKey;      /* The key for INTKEY tables, or number of bytes in key */
       
   367   u32 nData;     /* Number of bytes of data */
       
   368   u16 nHeader;   /* Size of the cell content header in bytes */
       
   369   u16 nLocal;    /* Amount of payload held locally */
       
   370   u16 iOverflow; /* Offset to overflow page number.  Zero if no overflow */
       
   371   u16 nSize;     /* Size of the cell content on the main b-tree page */
       
   372 };
       
   373 
       
   374 /*
       
   375 ** A cursor is a pointer to a particular entry in the BTree.
       
   376 ** The entry is identified by its MemPage and the index in
       
   377 ** MemPage.aCell[] of the entry.
       
   378 */
       
   379 struct BtCursor {
       
   380   Btree *pBtree;            /* The Btree to which this cursor belongs */
       
   381   BtCursor *pNext, *pPrev;  /* Forms a linked list of all cursors */
       
   382   int (*xCompare)(void*,int,const void*,int,const void*); /* Key comp func */
       
   383   void *pArg;               /* First arg to xCompare() */
       
   384   Pgno pgnoRoot;            /* The root page of this tree */
       
   385   MemPage *pPage;           /* Page that contains the entry */
       
   386   int idx;                  /* Index of the entry in pPage->aCell[] */
       
   387   CellInfo info;            /* A parse of the cell we are pointing at */
       
   388   u8 wrFlag;                /* True if writable */
       
   389   u8 eState;                /* One of the CURSOR_XXX constants (see below) */
       
   390   void *pKey;      /* Saved key that was cursor's last known position */
       
   391   i64 nKey;        /* Size of pKey, or last integer key */
       
   392   int skip;        /* (skip<0) -> Prev() is a no-op. (skip>0) -> Next() is */
       
   393 };
       
   394 
       
   395 /*
       
   396 ** Potential values for BtCursor.eState.
       
   397 **
       
   398 ** CURSOR_VALID:
       
   399 **   Cursor points to a valid entry. getPayload() etc. may be called.
       
   400 **
       
   401 ** CURSOR_INVALID:
       
   402 **   Cursor does not point to a valid entry. This can happen (for example) 
       
   403 **   because the table is empty or because BtreeCursorFirst() has not been
       
   404 **   called.
       
   405 **
       
   406 ** CURSOR_REQUIRESEEK:
       
   407 **   The table that this cursor was opened on still exists, but has been 
       
   408 **   modified since the cursor was last used. The cursor position is saved
       
   409 **   in variables BtCursor.pKey and BtCursor.nKey. When a cursor is in 
       
   410 **   this state, restoreOrClearCursorPosition() can be called to attempt to
       
   411 **   seek the cursor to the saved position.
       
   412 */
       
   413 #define CURSOR_INVALID           0
       
   414 #define CURSOR_VALID             1
       
   415 #define CURSOR_REQUIRESEEK       2
       
   416 
       
   417 /*
       
   418 ** The TRACE macro will print high-level status information about the
       
   419 ** btree operation when the global variable sqlite3_btree_trace is
       
   420 ** enabled.
       
   421 */
       
   422 #if SQLITE_TEST
       
   423 # define TRACE(X)   if( sqlite3_btree_trace )\
       
   424                         { sqlite3DebugPrintf X; fflush(stdout); }
       
   425 int sqlite3_btree_trace=0;  /* True to enable tracing */
       
   426 #else
       
   427 # define TRACE(X)
       
   428 #endif
       
   429 
       
   430 /*
       
   431 ** Forward declaration
       
   432 */
       
   433 static int checkReadLocks(Btree*,Pgno,BtCursor*);
       
   434 
       
   435 /*
       
   436 ** Read or write a two- and four-byte big-endian integer values.
       
   437 */
       
   438 static u32 get2byte(unsigned char *p){
       
   439   return (p[0]<<8) | p[1];
       
   440 }
       
   441 static u32 get4byte(unsigned char *p){
       
   442   return (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
       
   443 }
       
   444 static void put2byte(unsigned char *p, u32 v){
       
   445   p[0] = v>>8;
       
   446   p[1] = v;
       
   447 }
       
   448 static void put4byte(unsigned char *p, u32 v){
       
   449   p[0] = v>>24;
       
   450   p[1] = v>>16;
       
   451   p[2] = v>>8;
       
   452   p[3] = v;
       
   453 }
       
   454 
       
   455 /*
       
   456 ** Routines to read and write variable-length integers.  These used to
       
   457 ** be defined locally, but now we use the varint routines in the util.c
       
   458 ** file.
       
   459 */
       
   460 #define getVarint    sqlite3GetVarint
       
   461 /* #define getVarint32  sqlite3GetVarint32 */
       
   462 #define getVarint32(A,B)  ((*B=*(A))<=0x7f?1:sqlite3GetVarint32(A,B))
       
   463 #define putVarint    sqlite3PutVarint
       
   464 
       
   465 /* The database page the PENDING_BYTE occupies. This page is never used.
       
   466 ** TODO: This macro is very similary to PAGER_MJ_PGNO() in pager.c. They
       
   467 ** should possibly be consolidated (presumably in pager.h).
       
   468 **
       
   469 ** If disk I/O is omitted (meaning that the database is stored purely
       
   470 ** in memory) then there is no pending byte.
       
   471 */
       
   472 #ifdef SQLITE_OMIT_DISKIO
       
   473 # define PENDING_BYTE_PAGE(pBt)  0x7fffffff
       
   474 #else
       
   475 # define PENDING_BYTE_PAGE(pBt) ((PENDING_BYTE/(pBt)->pageSize)+1)
       
   476 #endif
       
   477 
       
   478 /*
       
   479 ** A linked list of the following structures is stored at BtShared.pLock.
       
   480 ** Locks are added (or upgraded from READ_LOCK to WRITE_LOCK) when a cursor 
       
   481 ** is opened on the table with root page BtShared.iTable. Locks are removed
       
   482 ** from this list when a transaction is committed or rolled back, or when
       
   483 ** a btree handle is closed.
       
   484 */
       
   485 struct BtLock {
       
   486   Btree *pBtree;        /* Btree handle holding this lock */
       
   487   Pgno iTable;          /* Root page of table */
       
   488   u8 eLock;             /* READ_LOCK or WRITE_LOCK */
       
   489   BtLock *pNext;        /* Next in BtShared.pLock list */
       
   490 };
       
   491 
       
   492 /* Candidate values for BtLock.eLock */
       
   493 #define READ_LOCK     1
       
   494 #define WRITE_LOCK    2
       
   495 
       
   496 #ifdef SQLITE_OMIT_SHARED_CACHE
       
   497   /*
       
   498   ** The functions queryTableLock(), lockTable() and unlockAllTables()
       
   499   ** manipulate entries in the BtShared.pLock linked list used to store
       
   500   ** shared-cache table level locks. If the library is compiled with the
       
   501   ** shared-cache feature disabled, then there is only ever one user
       
   502   ** of each BtShared structure and so this locking is not necessary. 
       
   503   ** So define the lock related functions as no-ops.
       
   504   */
       
   505   #define queryTableLock(a,b,c) SQLITE_OK
       
   506   #define lockTable(a,b,c) SQLITE_OK
       
   507   #define unlockAllTables(a)
       
   508 #else
       
   509 
       
   510 
       
   511 /*
       
   512 ** Query to see if btree handle p may obtain a lock of type eLock 
       
   513 ** (READ_LOCK or WRITE_LOCK) on the table with root-page iTab. Return
       
   514 ** SQLITE_OK if the lock may be obtained (by calling lockTable()), or
       
   515 ** SQLITE_LOCKED if not.
       
   516 */
       
   517 static int queryTableLock(Btree *p, Pgno iTab, u8 eLock){
       
   518   BtShared *pBt = p->pBt;
       
   519   BtLock *pIter;
       
   520 
       
   521   /* This is a no-op if the shared-cache is not enabled */
       
   522   if( 0==sqlite3ThreadDataReadOnly()->useSharedData ){
       
   523     return SQLITE_OK;
       
   524   }
       
   525 
       
   526   /* This (along with lockTable()) is where the ReadUncommitted flag is
       
   527   ** dealt with. If the caller is querying for a read-lock and the flag is
       
   528   ** set, it is unconditionally granted - even if there are write-locks
       
   529   ** on the table. If a write-lock is requested, the ReadUncommitted flag
       
   530   ** is not considered.
       
   531   **
       
   532   ** In function lockTable(), if a read-lock is demanded and the 
       
   533   ** ReadUncommitted flag is set, no entry is added to the locks list 
       
   534   ** (BtShared.pLock).
       
   535   **
       
   536   ** To summarize: If the ReadUncommitted flag is set, then read cursors do
       
   537   ** not create or respect table locks. The locking procedure for a 
       
   538   ** write-cursor does not change.
       
   539   */
       
   540   if( 
       
   541     !p->pSqlite || 
       
   542     0==(p->pSqlite->flags&SQLITE_ReadUncommitted) || 
       
   543     eLock==WRITE_LOCK ||
       
   544     iTab==MASTER_ROOT
       
   545   ){
       
   546     for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
       
   547       if( pIter->pBtree!=p && pIter->iTable==iTab && 
       
   548           (pIter->eLock!=eLock || eLock!=READ_LOCK) ){
       
   549         return SQLITE_LOCKED;
       
   550       }
       
   551     }
       
   552   }
       
   553   return SQLITE_OK;
       
   554 }
       
   555 
       
   556 /*
       
   557 ** Add a lock on the table with root-page iTable to the shared-btree used
       
   558 ** by Btree handle p. Parameter eLock must be either READ_LOCK or 
       
   559 ** WRITE_LOCK.
       
   560 **
       
   561 ** SQLITE_OK is returned if the lock is added successfully. SQLITE_BUSY and
       
   562 ** SQLITE_NOMEM may also be returned.
       
   563 */
       
   564 static int lockTable(Btree *p, Pgno iTable, u8 eLock){
       
   565   BtShared *pBt = p->pBt;
       
   566   BtLock *pLock = 0;
       
   567   BtLock *pIter;
       
   568 
       
   569   /* This is a no-op if the shared-cache is not enabled */
       
   570   if( 0==sqlite3ThreadDataReadOnly()->useSharedData ){
       
   571     return SQLITE_OK;
       
   572   }
       
   573 
       
   574   assert( SQLITE_OK==queryTableLock(p, iTable, eLock) );
       
   575 
       
   576   /* If the read-uncommitted flag is set and a read-lock is requested,
       
   577   ** return early without adding an entry to the BtShared.pLock list. See
       
   578   ** comment in function queryTableLock() for more info on handling 
       
   579   ** the ReadUncommitted flag.
       
   580   */
       
   581   if( 
       
   582     (p->pSqlite) && 
       
   583     (p->pSqlite->flags&SQLITE_ReadUncommitted) && 
       
   584     (eLock==READ_LOCK) &&
       
   585     iTable!=MASTER_ROOT
       
   586   ){
       
   587     return SQLITE_OK;
       
   588   }
       
   589 
       
   590   /* First search the list for an existing lock on this table. */
       
   591   for(pIter=pBt->pLock; pIter; pIter=pIter->pNext){
       
   592     if( pIter->iTable==iTable && pIter->pBtree==p ){
       
   593       pLock = pIter;
       
   594       break;
       
   595     }
       
   596   }
       
   597 
       
   598   /* If the above search did not find a BtLock struct associating Btree p
       
   599   ** with table iTable, allocate one and link it into the list.
       
   600   */
       
   601   if( !pLock ){
       
   602     pLock = (BtLock *)sqliteMalloc(sizeof(BtLock));
       
   603     if( !pLock ){
       
   604       return SQLITE_NOMEM;
       
   605     }
       
   606     pLock->iTable = iTable;
       
   607     pLock->pBtree = p;
       
   608     pLock->pNext = pBt->pLock;
       
   609     pBt->pLock = pLock;
       
   610   }
       
   611 
       
   612   /* Set the BtLock.eLock variable to the maximum of the current lock
       
   613   ** and the requested lock. This means if a write-lock was already held
       
   614   ** and a read-lock requested, we don't incorrectly downgrade the lock.
       
   615   */
       
   616   assert( WRITE_LOCK>READ_LOCK );
       
   617   if( eLock>pLock->eLock ){
       
   618     pLock->eLock = eLock;
       
   619   }
       
   620 
       
   621   return SQLITE_OK;
       
   622 }
       
   623 
       
   624 /*
       
   625 ** Release all the table locks (locks obtained via calls to the lockTable()
       
   626 ** procedure) held by Btree handle p.
       
   627 */
       
   628 static void unlockAllTables(Btree *p){
       
   629   BtLock **ppIter = &p->pBt->pLock;
       
   630 
       
   631   /* If the shared-cache extension is not enabled, there should be no
       
   632   ** locks in the BtShared.pLock list, making this procedure a no-op. Assert
       
   633   ** that this is the case.
       
   634   */
       
   635   assert( sqlite3ThreadDataReadOnly()->useSharedData || 0==*ppIter );
       
   636 
       
   637   while( *ppIter ){
       
   638     BtLock *pLock = *ppIter;
       
   639     if( pLock->pBtree==p ){
       
   640       *ppIter = pLock->pNext;
       
   641       sqliteFree(pLock);
       
   642     }else{
       
   643       ppIter = &pLock->pNext;
       
   644     }
       
   645   }
       
   646 }
       
   647 #endif /* SQLITE_OMIT_SHARED_CACHE */
       
   648 
       
   649 static void releasePage(MemPage *pPage);  /* Forward reference */
       
   650 
       
   651 /*
       
   652 ** Save the current cursor position in the variables BtCursor.nKey 
       
   653 ** and BtCursor.pKey. The cursor's state is set to CURSOR_REQUIRESEEK.
       
   654 */
       
   655 static int saveCursorPosition(BtCursor *pCur){
       
   656   int rc;
       
   657 
       
   658   assert( CURSOR_VALID==pCur->eState );
       
   659   assert( 0==pCur->pKey );
       
   660 
       
   661   rc = sqlite3BtreeKeySize(pCur, &pCur->nKey);
       
   662 
       
   663   /* If this is an intKey table, then the above call to BtreeKeySize()
       
   664   ** stores the integer key in pCur->nKey. In this case this value is
       
   665   ** all that is required. Otherwise, if pCur is not open on an intKey
       
   666   ** table, then malloc space for and store the pCur->nKey bytes of key 
       
   667   ** data.
       
   668   */
       
   669   if( rc==SQLITE_OK && 0==pCur->pPage->intKey){
       
   670     void *pKey = sqliteMalloc(pCur->nKey);
       
   671     if( pKey ){
       
   672       rc = sqlite3BtreeKey(pCur, 0, pCur->nKey, pKey);
       
   673       if( rc==SQLITE_OK ){
       
   674         pCur->pKey = pKey;
       
   675       }else{
       
   676         sqliteFree(pKey);
       
   677       }
       
   678     }else{
       
   679       rc = SQLITE_NOMEM;
       
   680     }
       
   681   }
       
   682   assert( !pCur->pPage->intKey || !pCur->pKey );
       
   683 
       
   684   if( rc==SQLITE_OK ){
       
   685     releasePage(pCur->pPage);
       
   686     pCur->pPage = 0;
       
   687     pCur->eState = CURSOR_REQUIRESEEK;
       
   688   }
       
   689 
       
   690   return rc;
       
   691 }
       
   692 
       
   693 /*
       
   694 ** Save the positions of all cursors except pExcept open on the table 
       
   695 ** with root-page iRoot. Usually, this is called just before cursor
       
   696 ** pExcept is used to modify the table (BtreeDelete() or BtreeInsert()).
       
   697 */
       
   698 static int saveAllCursors(BtShared *pBt, Pgno iRoot, BtCursor *pExcept){
       
   699   BtCursor *p;
       
   700   for(p=pBt->pCursor; p; p=p->pNext){
       
   701     if( p!=pExcept && (0==iRoot || p->pgnoRoot==iRoot) && 
       
   702         p->eState==CURSOR_VALID ){
       
   703       int rc = saveCursorPosition(p);
       
   704       if( SQLITE_OK!=rc ){
       
   705         return rc;
       
   706       }
       
   707     }
       
   708   }
       
   709   return SQLITE_OK;
       
   710 }
       
   711 
       
   712 /*
       
   713 ** Restore the cursor to the position it was in (or as close to as possible)
       
   714 ** when saveCursorPosition() was called. Note that this call deletes the 
       
   715 ** saved position info stored by saveCursorPosition(), so there can be
       
   716 ** at most one effective restoreOrClearCursorPosition() call after each 
       
   717 ** saveCursorPosition().
       
   718 **
       
   719 ** If the second argument argument - doSeek - is false, then instead of 
       
   720 ** returning the cursor to it's saved position, any saved position is deleted
       
   721 ** and the cursor state set to CURSOR_INVALID.
       
   722 */
       
   723 static int restoreOrClearCursorPositionX(BtCursor *pCur, int doSeek){
       
   724   int rc = SQLITE_OK;
       
   725   assert( pCur->eState==CURSOR_REQUIRESEEK );
       
   726   pCur->eState = CURSOR_INVALID;
       
   727   if( doSeek ){
       
   728     rc = sqlite3BtreeMoveto(pCur, pCur->pKey, pCur->nKey, &pCur->skip);
       
   729   }
       
   730   if( rc==SQLITE_OK ){
       
   731     sqliteFree(pCur->pKey);
       
   732     pCur->pKey = 0;
       
   733     assert( CURSOR_VALID==pCur->eState || CURSOR_INVALID==pCur->eState );
       
   734   }
       
   735   return rc;
       
   736 }
       
   737 
       
   738 #define restoreOrClearCursorPosition(p,x) \
       
   739   (p->eState==CURSOR_REQUIRESEEK?restoreOrClearCursorPositionX(p,x):SQLITE_OK)
       
   740 
       
   741 #ifndef SQLITE_OMIT_AUTOVACUUM
       
   742 /*
       
   743 ** These macros define the location of the pointer-map entry for a 
       
   744 ** database page. The first argument to each is the number of usable
       
   745 ** bytes on each page of the database (often 1024). The second is the
       
   746 ** page number to look up in the pointer map.
       
   747 **
       
   748 ** PTRMAP_PAGENO returns the database page number of the pointer-map
       
   749 ** page that stores the required pointer. PTRMAP_PTROFFSET returns
       
   750 ** the offset of the requested map entry.
       
   751 **
       
   752 ** If the pgno argument passed to PTRMAP_PAGENO is a pointer-map page,
       
   753 ** then pgno is returned. So (pgno==PTRMAP_PAGENO(pgsz, pgno)) can be
       
   754 ** used to test if pgno is a pointer-map page. PTRMAP_ISPAGE implements
       
   755 ** this test.
       
   756 */
       
   757 #define PTRMAP_PAGENO(pBt, pgno) ptrmapPageno(pBt, pgno)
       
   758 #define PTRMAP_PTROFFSET(pBt, pgno) (5*(pgno-ptrmapPageno(pBt, pgno)-1))
       
   759 #define PTRMAP_ISPAGE(pBt, pgno) (PTRMAP_PAGENO((pBt),(pgno))==(pgno))
       
   760 
       
   761 static Pgno ptrmapPageno(BtShared *pBt, Pgno pgno){
       
   762   int nPagesPerMapPage = (pBt->usableSize/5)+1;
       
   763   int iPtrMap = (pgno-2)/nPagesPerMapPage;
       
   764   int ret = (iPtrMap*nPagesPerMapPage) + 2; 
       
   765   if( ret==PENDING_BYTE_PAGE(pBt) ){
       
   766     ret++;
       
   767   }
       
   768   return ret;
       
   769 }
       
   770 
       
   771 /*
       
   772 ** The pointer map is a lookup table that identifies the parent page for
       
   773 ** each child page in the database file.  The parent page is the page that
       
   774 ** contains a pointer to the child.  Every page in the database contains
       
   775 ** 0 or 1 parent pages.  (In this context 'database page' refers
       
   776 ** to any page that is not part of the pointer map itself.)  Each pointer map
       
   777 ** entry consists of a single byte 'type' and a 4 byte parent page number.
       
   778 ** The PTRMAP_XXX identifiers below are the valid types.
       
   779 **
       
   780 ** The purpose of the pointer map is to facility moving pages from one
       
   781 ** position in the file to another as part of autovacuum.  When a page
       
   782 ** is moved, the pointer in its parent must be updated to point to the
       
   783 ** new location.  The pointer map is used to locate the parent page quickly.
       
   784 **
       
   785 ** PTRMAP_ROOTPAGE: The database page is a root-page. The page-number is not
       
   786 **                  used in this case.
       
   787 **
       
   788 ** PTRMAP_FREEPAGE: The database page is an unused (free) page. The page-number 
       
   789 **                  is not used in this case.
       
   790 **
       
   791 ** PTRMAP_OVERFLOW1: The database page is the first page in a list of 
       
   792 **                   overflow pages. The page number identifies the page that
       
   793 **                   contains the cell with a pointer to this overflow page.
       
   794 **
       
   795 ** PTRMAP_OVERFLOW2: The database page is the second or later page in a list of
       
   796 **                   overflow pages. The page-number identifies the previous
       
   797 **                   page in the overflow page list.
       
   798 **
       
   799 ** PTRMAP_BTREE: The database page is a non-root btree page. The page number
       
   800 **               identifies the parent page in the btree.
       
   801 */
       
   802 #define PTRMAP_ROOTPAGE 1
       
   803 #define PTRMAP_FREEPAGE 2
       
   804 #define PTRMAP_OVERFLOW1 3
       
   805 #define PTRMAP_OVERFLOW2 4
       
   806 #define PTRMAP_BTREE 5
       
   807 
       
   808 /*
       
   809 ** Write an entry into the pointer map.
       
   810 **
       
   811 ** This routine updates the pointer map entry for page number 'key'
       
   812 ** so that it maps to type 'eType' and parent page number 'pgno'.
       
   813 ** An error code is returned if something goes wrong, otherwise SQLITE_OK.
       
   814 */
       
   815 static int ptrmapPut(BtShared *pBt, Pgno key, u8 eType, Pgno parent){
       
   816   u8 *pPtrmap;    /* The pointer map page */
       
   817   Pgno iPtrmap;   /* The pointer map page number */
       
   818   int offset;     /* Offset in pointer map page */
       
   819   int rc;
       
   820 
       
   821   /* The master-journal page number must never be used as a pointer map page */
       
   822   assert( 0==PTRMAP_ISPAGE(pBt, PENDING_BYTE_PAGE(pBt)) );
       
   823 
       
   824   assert( pBt->autoVacuum );
       
   825   if( key==0 ){
       
   826     return SQLITE_CORRUPT_BKPT;
       
   827   }
       
   828   iPtrmap = PTRMAP_PAGENO(pBt, key);
       
   829   rc = sqlite3pager_get(pBt->pPager, iPtrmap, (void **)&pPtrmap);
       
   830   if( rc!=SQLITE_OK ){
       
   831     return rc;
       
   832   }
       
   833   offset = PTRMAP_PTROFFSET(pBt, key);
       
   834 
       
   835   if( eType!=pPtrmap[offset] || get4byte(&pPtrmap[offset+1])!=parent ){
       
   836     TRACE(("PTRMAP_UPDATE: %d->(%d,%d)\n", key, eType, parent));
       
   837     rc = sqlite3pager_write(pPtrmap);
       
   838     if( rc==SQLITE_OK ){
       
   839       pPtrmap[offset] = eType;
       
   840       put4byte(&pPtrmap[offset+1], parent);
       
   841     }
       
   842   }
       
   843 
       
   844   sqlite3pager_unref(pPtrmap);
       
   845   return rc;
       
   846 }
       
   847 
       
   848 /*
       
   849 ** Read an entry from the pointer map.
       
   850 **
       
   851 ** This routine retrieves the pointer map entry for page 'key', writing
       
   852 ** the type and parent page number to *pEType and *pPgno respectively.
       
   853 ** An error code is returned if something goes wrong, otherwise SQLITE_OK.
       
   854 */
       
   855 static int ptrmapGet(BtShared *pBt, Pgno key, u8 *pEType, Pgno *pPgno){
       
   856   int iPtrmap;       /* Pointer map page index */
       
   857   u8 *pPtrmap;       /* Pointer map page data */
       
   858   int offset;        /* Offset of entry in pointer map */
       
   859   int rc;
       
   860 
       
   861   iPtrmap = PTRMAP_PAGENO(pBt, key);
       
   862   rc = sqlite3pager_get(pBt->pPager, iPtrmap, (void **)&pPtrmap);
       
   863   if( rc!=0 ){
       
   864     return rc;
       
   865   }
       
   866 
       
   867   offset = PTRMAP_PTROFFSET(pBt, key);
       
   868   assert( pEType!=0 );
       
   869   *pEType = pPtrmap[offset];
       
   870   if( pPgno ) *pPgno = get4byte(&pPtrmap[offset+1]);
       
   871 
       
   872   sqlite3pager_unref(pPtrmap);
       
   873   if( *pEType<1 || *pEType>5 ) return SQLITE_CORRUPT_BKPT;
       
   874   return SQLITE_OK;
       
   875 }
       
   876 
       
   877 #endif /* SQLITE_OMIT_AUTOVACUUM */
       
   878 
       
   879 /*
       
   880 ** Given a btree page and a cell index (0 means the first cell on
       
   881 ** the page, 1 means the second cell, and so forth) return a pointer
       
   882 ** to the cell content.
       
   883 **
       
   884 ** This routine works only for pages that do not contain overflow cells.
       
   885 */
       
   886 static u8 *findCell(MemPage *pPage, int iCell){
       
   887   u8 *data = pPage->aData;
       
   888   assert( iCell>=0 );
       
   889   assert( iCell<get2byte(&data[pPage->hdrOffset+3]) );
       
   890   return data + get2byte(&data[pPage->cellOffset+2*iCell]);
       
   891 }
       
   892 
       
   893 /*
       
   894 ** This a more complex version of findCell() that works for
       
   895 ** pages that do contain overflow cells.  See insert
       
   896 */
       
   897 static u8 *findOverflowCell(MemPage *pPage, int iCell){
       
   898   int i;
       
   899   for(i=pPage->nOverflow-1; i>=0; i--){
       
   900     int k;
       
   901     struct _OvflCell *pOvfl;
       
   902     pOvfl = &pPage->aOvfl[i];
       
   903     k = pOvfl->idx;
       
   904     if( k<=iCell ){
       
   905       if( k==iCell ){
       
   906         return pOvfl->pCell;
       
   907       }
       
   908       iCell--;
       
   909     }
       
   910   }
       
   911   return findCell(pPage, iCell);
       
   912 }
       
   913 
       
   914 /*
       
   915 ** Parse a cell content block and fill in the CellInfo structure.  There
       
   916 ** are two versions of this function.  parseCell() takes a cell index
       
   917 ** as the second argument and parseCellPtr() takes a pointer to the
       
   918 ** body of the cell as its second argument.
       
   919 */
       
   920 static void parseCellPtr(
       
   921   MemPage *pPage,         /* Page containing the cell */
       
   922   u8 *pCell,              /* Pointer to the cell text. */
       
   923   CellInfo *pInfo         /* Fill in this structure */
       
   924 ){
       
   925   int n;                  /* Number bytes in cell content header */
       
   926   u32 nPayload;           /* Number of bytes of cell payload */
       
   927 
       
   928   pInfo->pCell = pCell;
       
   929   assert( pPage->leaf==0 || pPage->leaf==1 );
       
   930   n = pPage->childPtrSize;
       
   931   assert( n==4-4*pPage->leaf );
       
   932   if( pPage->hasData ){
       
   933     n += getVarint32(&pCell[n], &nPayload);
       
   934   }else{
       
   935     nPayload = 0;
       
   936   }
       
   937   pInfo->nData = nPayload;
       
   938   if( pPage->intKey ){
       
   939     n += getVarint(&pCell[n], (u64 *)&pInfo->nKey);
       
   940   }else{
       
   941     u32 x;
       
   942     n += getVarint32(&pCell[n], &x);
       
   943     pInfo->nKey = x;
       
   944     nPayload += x;
       
   945   }
       
   946   pInfo->nHeader = n;
       
   947   if( nPayload<=pPage->maxLocal ){
       
   948     /* This is the (easy) common case where the entire payload fits
       
   949     ** on the local page.  No overflow is required.
       
   950     */
       
   951     int nSize;          /* Total size of cell content in bytes */
       
   952     pInfo->nLocal = nPayload;
       
   953     pInfo->iOverflow = 0;
       
   954     nSize = nPayload + n;
       
   955     if( nSize<4 ){
       
   956       nSize = 4;        /* Minimum cell size is 4 */
       
   957     }
       
   958     pInfo->nSize = nSize;
       
   959   }else{
       
   960     /* If the payload will not fit completely on the local page, we have
       
   961     ** to decide how much to store locally and how much to spill onto
       
   962     ** overflow pages.  The strategy is to minimize the amount of unused
       
   963     ** space on overflow pages while keeping the amount of local storage
       
   964     ** in between minLocal and maxLocal.
       
   965     **
       
   966     ** Warning:  changing the way overflow payload is distributed in any
       
   967     ** way will result in an incompatible file format.
       
   968     */
       
   969     int minLocal;  /* Minimum amount of payload held locally */
       
   970     int maxLocal;  /* Maximum amount of payload held locally */
       
   971     int surplus;   /* Overflow payload available for local storage */
       
   972 
       
   973     minLocal = pPage->minLocal;
       
   974     maxLocal = pPage->maxLocal;
       
   975     surplus = minLocal + (nPayload - minLocal)%(pPage->pBt->usableSize - 4);
       
   976     if( surplus <= maxLocal ){
       
   977       pInfo->nLocal = surplus;
       
   978     }else{
       
   979       pInfo->nLocal = minLocal;
       
   980     }
       
   981     pInfo->iOverflow = pInfo->nLocal + n;
       
   982     pInfo->nSize = pInfo->iOverflow + 4;
       
   983   }
       
   984 }
       
   985 static void parseCell(
       
   986   MemPage *pPage,         /* Page containing the cell */
       
   987   int iCell,              /* The cell index.  First cell is 0 */
       
   988   CellInfo *pInfo         /* Fill in this structure */
       
   989 ){
       
   990   parseCellPtr(pPage, findCell(pPage, iCell), pInfo);
       
   991 }
       
   992 
       
   993 /*
       
   994 ** Compute the total number of bytes that a Cell needs in the cell
       
   995 ** data area of the btree-page.  The return number includes the cell
       
   996 ** data header and the local payload, but not any overflow page or
       
   997 ** the space used by the cell pointer.
       
   998 */
       
   999 #ifndef NDEBUG
       
  1000 static int cellSize(MemPage *pPage, int iCell){
       
  1001   CellInfo info;
       
  1002   parseCell(pPage, iCell, &info);
       
  1003   return info.nSize;
       
  1004 }
       
  1005 #endif
       
  1006 static int cellSizePtr(MemPage *pPage, u8 *pCell){
       
  1007   CellInfo info;
       
  1008   parseCellPtr(pPage, pCell, &info);
       
  1009   return info.nSize;
       
  1010 }
       
  1011 
       
  1012 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  1013 /*
       
  1014 ** If the cell pCell, part of page pPage contains a pointer
       
  1015 ** to an overflow page, insert an entry into the pointer-map
       
  1016 ** for the overflow page.
       
  1017 */
       
  1018 static int ptrmapPutOvflPtr(MemPage *pPage, u8 *pCell){
       
  1019   if( pCell ){
       
  1020     CellInfo info;
       
  1021     parseCellPtr(pPage, pCell, &info);
       
  1022     if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){
       
  1023       Pgno ovfl = get4byte(&pCell[info.iOverflow]);
       
  1024       return ptrmapPut(pPage->pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno);
       
  1025     }
       
  1026   }
       
  1027   return SQLITE_OK;
       
  1028 }
       
  1029 /*
       
  1030 ** If the cell with index iCell on page pPage contains a pointer
       
  1031 ** to an overflow page, insert an entry into the pointer-map
       
  1032 ** for the overflow page.
       
  1033 */
       
  1034 static int ptrmapPutOvfl(MemPage *pPage, int iCell){
       
  1035   u8 *pCell;
       
  1036   pCell = findOverflowCell(pPage, iCell);
       
  1037   return ptrmapPutOvflPtr(pPage, pCell);
       
  1038 }
       
  1039 #endif
       
  1040 
       
  1041 
       
  1042 /*
       
  1043 ** Do sanity checking on a page.  Throw an exception if anything is
       
  1044 ** not right.
       
  1045 **
       
  1046 ** This routine is used for internal error checking only.  It is omitted
       
  1047 ** from most builds.
       
  1048 */
       
  1049 #if defined(BTREE_DEBUG) && !defined(NDEBUG) && 0
       
  1050 static void _pageIntegrity(MemPage *pPage){
       
  1051   int usableSize;
       
  1052   u8 *data;
       
  1053   int i, j, idx, c, pc, hdr, nFree;
       
  1054   int cellOffset;
       
  1055   int nCell, cellLimit;
       
  1056   u8 *used;
       
  1057 
       
  1058   used = sqliteMallocRaw( pPage->pBt->pageSize );
       
  1059   if( used==0 ) return;
       
  1060   usableSize = pPage->pBt->usableSize;
       
  1061   assert( pPage->aData==&((unsigned char*)pPage)[-pPage->pBt->pageSize] );
       
  1062   hdr = pPage->hdrOffset;
       
  1063   assert( hdr==(pPage->pgno==1 ? 100 : 0) );
       
  1064   assert( pPage->pgno==sqlite3pager_pagenumber(pPage->aData) );
       
  1065   c = pPage->aData[hdr];
       
  1066   if( pPage->isInit ){
       
  1067     assert( pPage->leaf == ((c & PTF_LEAF)!=0) );
       
  1068     assert( pPage->zeroData == ((c & PTF_ZERODATA)!=0) );
       
  1069     assert( pPage->leafData == ((c & PTF_LEAFDATA)!=0) );
       
  1070     assert( pPage->intKey == ((c & (PTF_INTKEY|PTF_LEAFDATA))!=0) );
       
  1071     assert( pPage->hasData ==
       
  1072              !(pPage->zeroData || (!pPage->leaf && pPage->leafData)) );
       
  1073     assert( pPage->cellOffset==pPage->hdrOffset+12-4*pPage->leaf );
       
  1074     assert( pPage->nCell = get2byte(&pPage->aData[hdr+3]) );
       
  1075   }
       
  1076   data = pPage->aData;
       
  1077   memset(used, 0, usableSize);
       
  1078   for(i=0; i<hdr+10-pPage->leaf*4; i++) used[i] = 1;
       
  1079   nFree = 0;
       
  1080   pc = get2byte(&data[hdr+1]);
       
  1081   while( pc ){
       
  1082     int size;
       
  1083     assert( pc>0 && pc<usableSize-4 );
       
  1084     size = get2byte(&data[pc+2]);
       
  1085     assert( pc+size<=usableSize );
       
  1086     nFree += size;
       
  1087     for(i=pc; i<pc+size; i++){
       
  1088       assert( used[i]==0 );
       
  1089       used[i] = 1;
       
  1090     }
       
  1091     pc = get2byte(&data[pc]);
       
  1092   }
       
  1093   idx = 0;
       
  1094   nCell = get2byte(&data[hdr+3]);
       
  1095   cellLimit = get2byte(&data[hdr+5]);
       
  1096   assert( pPage->isInit==0 
       
  1097          || pPage->nFree==nFree+data[hdr+7]+cellLimit-(cellOffset+2*nCell) );
       
  1098   cellOffset = pPage->cellOffset;
       
  1099   for(i=0; i<nCell; i++){
       
  1100     int size;
       
  1101     pc = get2byte(&data[cellOffset+2*i]);
       
  1102     assert( pc>0 && pc<usableSize-4 );
       
  1103     size = cellSize(pPage, &data[pc]);
       
  1104     assert( pc+size<=usableSize );
       
  1105     for(j=pc; j<pc+size; j++){
       
  1106       assert( used[j]==0 );
       
  1107       used[j] = 1;
       
  1108     }
       
  1109   }
       
  1110   for(i=cellOffset+2*nCell; i<cellimit; i++){
       
  1111     assert( used[i]==0 );
       
  1112     used[i] = 1;
       
  1113   }
       
  1114   nFree = 0;
       
  1115   for(i=0; i<usableSize; i++){
       
  1116     assert( used[i]<=1 );
       
  1117     if( used[i]==0 ) nFree++;
       
  1118   }
       
  1119   assert( nFree==data[hdr+7] );
       
  1120   sqliteFree(used);
       
  1121 }
       
  1122 #define pageIntegrity(X) _pageIntegrity(X)
       
  1123 #else
       
  1124 # define pageIntegrity(X)
       
  1125 #endif
       
  1126 
       
  1127 /* A bunch of assert() statements to check the transaction state variables
       
  1128 ** of handle p (type Btree*) are internally consistent.
       
  1129 */
       
  1130 #define btreeIntegrity(p) \
       
  1131   assert( p->inTrans!=TRANS_NONE || p->pBt->nTransaction<p->pBt->nRef ); \
       
  1132   assert( p->pBt->nTransaction<=p->pBt->nRef ); \
       
  1133   assert( p->pBt->inTransaction!=TRANS_NONE || p->pBt->nTransaction==0 ); \
       
  1134   assert( p->pBt->inTransaction>=p->inTrans ); 
       
  1135 
       
  1136 /*
       
  1137 ** Defragment the page given.  All Cells are moved to the
       
  1138 ** end of the page and all free space is collected into one
       
  1139 ** big FreeBlk that occurs in between the header and cell
       
  1140 ** pointer array and the cell content area.
       
  1141 */
       
  1142 static int defragmentPage(MemPage *pPage){
       
  1143   int i;                     /* Loop counter */
       
  1144   int pc;                    /* Address of a i-th cell */
       
  1145   int addr;                  /* Offset of first byte after cell pointer array */
       
  1146   int hdr;                   /* Offset to the page header */
       
  1147   int size;                  /* Size of a cell */
       
  1148   int usableSize;            /* Number of usable bytes on a page */
       
  1149   int cellOffset;            /* Offset to the cell pointer array */
       
  1150   int brk;                   /* Offset to the cell content area */
       
  1151   int nCell;                 /* Number of cells on the page */
       
  1152   unsigned char *data;       /* The page data */
       
  1153   unsigned char *temp;       /* Temp area for cell content */
       
  1154 
       
  1155   assert( sqlite3pager_iswriteable(pPage->aData) );
       
  1156   assert( pPage->pBt!=0 );
       
  1157   assert( pPage->pBt->usableSize <= SQLITE_MAX_PAGE_SIZE );
       
  1158   assert( pPage->nOverflow==0 );
       
  1159   temp = sqliteMalloc( pPage->pBt->pageSize );
       
  1160   if( temp==0 ) return SQLITE_NOMEM;
       
  1161   data = pPage->aData;
       
  1162   hdr = pPage->hdrOffset;
       
  1163   cellOffset = pPage->cellOffset;
       
  1164   nCell = pPage->nCell;
       
  1165   assert( nCell==get2byte(&data[hdr+3]) );
       
  1166   usableSize = pPage->pBt->usableSize;
       
  1167   brk = get2byte(&data[hdr+5]);
       
  1168   memcpy(&temp[brk], &data[brk], usableSize - brk);
       
  1169   brk = usableSize;
       
  1170   for(i=0; i<nCell; i++){
       
  1171     u8 *pAddr;     /* The i-th cell pointer */
       
  1172     pAddr = &data[cellOffset + i*2];
       
  1173     pc = get2byte(pAddr);
       
  1174     assert( pc<pPage->pBt->usableSize );
       
  1175     size = cellSizePtr(pPage, &temp[pc]);
       
  1176     brk -= size;
       
  1177     memcpy(&data[brk], &temp[pc], size);
       
  1178     put2byte(pAddr, brk);
       
  1179   }
       
  1180   assert( brk>=cellOffset+2*nCell );
       
  1181   put2byte(&data[hdr+5], brk);
       
  1182   data[hdr+1] = 0;
       
  1183   data[hdr+2] = 0;
       
  1184   data[hdr+7] = 0;
       
  1185   addr = cellOffset+2*nCell;
       
  1186   memset(&data[addr], 0, brk-addr);
       
  1187   sqliteFree(temp);
       
  1188   return SQLITE_OK;
       
  1189 }
       
  1190 
       
  1191 /*
       
  1192 ** Allocate nByte bytes of space on a page.
       
  1193 **
       
  1194 ** Return the index into pPage->aData[] of the first byte of
       
  1195 ** the new allocation. Or return 0 if there is not enough free
       
  1196 ** space on the page to satisfy the allocation request.
       
  1197 **
       
  1198 ** If the page contains nBytes of free space but does not contain
       
  1199 ** nBytes of contiguous free space, then this routine automatically
       
  1200 ** calls defragementPage() to consolidate all free space before 
       
  1201 ** allocating the new chunk.
       
  1202 */
       
  1203 static int allocateSpace(MemPage *pPage, int nByte){
       
  1204   int addr, pc, hdr;
       
  1205   int size;
       
  1206   int nFrag;
       
  1207   int top;
       
  1208   int nCell;
       
  1209   int cellOffset;
       
  1210   unsigned char *data;
       
  1211   
       
  1212   data = pPage->aData;
       
  1213   assert( sqlite3pager_iswriteable(data) );
       
  1214   assert( pPage->pBt );
       
  1215   if( nByte<4 ) nByte = 4;
       
  1216   if( pPage->nFree<nByte || pPage->nOverflow>0 ) return 0;
       
  1217   pPage->nFree -= nByte;
       
  1218   hdr = pPage->hdrOffset;
       
  1219 
       
  1220   nFrag = data[hdr+7];
       
  1221   if( nFrag<60 ){
       
  1222     /* Search the freelist looking for a slot big enough to satisfy the
       
  1223     ** space request. */
       
  1224     addr = hdr+1;
       
  1225     while( (pc = get2byte(&data[addr]))>0 ){
       
  1226       size = get2byte(&data[pc+2]);
       
  1227       if( size>=nByte ){
       
  1228         if( size<nByte+4 ){
       
  1229           memcpy(&data[addr], &data[pc], 2);
       
  1230           data[hdr+7] = nFrag + size - nByte;
       
  1231           return pc;
       
  1232         }else{
       
  1233           put2byte(&data[pc+2], size-nByte);
       
  1234           return pc + size - nByte;
       
  1235         }
       
  1236       }
       
  1237       addr = pc;
       
  1238     }
       
  1239   }
       
  1240 
       
  1241   /* Allocate memory from the gap in between the cell pointer array
       
  1242   ** and the cell content area.
       
  1243   */
       
  1244   top = get2byte(&data[hdr+5]);
       
  1245   nCell = get2byte(&data[hdr+3]);
       
  1246   cellOffset = pPage->cellOffset;
       
  1247   if( nFrag>=60 || cellOffset + 2*nCell > top - nByte ){
       
  1248     if( defragmentPage(pPage) ) return 0;
       
  1249     top = get2byte(&data[hdr+5]);
       
  1250   }
       
  1251   top -= nByte;
       
  1252   assert( cellOffset + 2*nCell <= top );
       
  1253   put2byte(&data[hdr+5], top);
       
  1254   return top;
       
  1255 }
       
  1256 
       
  1257 /*
       
  1258 ** Return a section of the pPage->aData to the freelist.
       
  1259 ** The first byte of the new free block is pPage->aDisk[start]
       
  1260 ** and the size of the block is "size" bytes.
       
  1261 **
       
  1262 ** Most of the effort here is involved in coalesing adjacent
       
  1263 ** free blocks into a single big free block.
       
  1264 */
       
  1265 static void freeSpace(MemPage *pPage, int start, int size){
       
  1266   int addr, pbegin, hdr;
       
  1267   unsigned char *data = pPage->aData;
       
  1268 
       
  1269   assert( pPage->pBt!=0 );
       
  1270   assert( sqlite3pager_iswriteable(data) );
       
  1271   assert( start>=pPage->hdrOffset+6+(pPage->leaf?0:4) );
       
  1272   assert( (start + size)<=pPage->pBt->usableSize );
       
  1273   if( size<4 ) size = 4;
       
  1274 
       
  1275 #ifdef SQLITE_SECURE_DELETE
       
  1276   /* Overwrite deleted information with zeros when the SECURE_DELETE 
       
  1277   ** option is enabled at compile-time */
       
  1278   memset(&data[start], 0, size);
       
  1279 #endif
       
  1280 
       
  1281   /* Add the space back into the linked list of freeblocks */
       
  1282   hdr = pPage->hdrOffset;
       
  1283   addr = hdr + 1;
       
  1284   while( (pbegin = get2byte(&data[addr]))<start && pbegin>0 ){
       
  1285     assert( pbegin<=pPage->pBt->usableSize-4 );
       
  1286     assert( pbegin>addr );
       
  1287     addr = pbegin;
       
  1288   }
       
  1289   assert( pbegin<=pPage->pBt->usableSize-4 );
       
  1290   assert( pbegin>addr || pbegin==0 );
       
  1291   put2byte(&data[addr], start);
       
  1292   put2byte(&data[start], pbegin);
       
  1293   put2byte(&data[start+2], size);
       
  1294   pPage->nFree += size;
       
  1295 
       
  1296   /* Coalesce adjacent free blocks */
       
  1297   addr = pPage->hdrOffset + 1;
       
  1298   while( (pbegin = get2byte(&data[addr]))>0 ){
       
  1299     int pnext, psize;
       
  1300     assert( pbegin>addr );
       
  1301     assert( pbegin<=pPage->pBt->usableSize-4 );
       
  1302     pnext = get2byte(&data[pbegin]);
       
  1303     psize = get2byte(&data[pbegin+2]);
       
  1304     if( pbegin + psize + 3 >= pnext && pnext>0 ){
       
  1305       int frag = pnext - (pbegin+psize);
       
  1306       assert( frag<=data[pPage->hdrOffset+7] );
       
  1307       data[pPage->hdrOffset+7] -= frag;
       
  1308       put2byte(&data[pbegin], get2byte(&data[pnext]));
       
  1309       put2byte(&data[pbegin+2], pnext+get2byte(&data[pnext+2])-pbegin);
       
  1310     }else{
       
  1311       addr = pbegin;
       
  1312     }
       
  1313   }
       
  1314 
       
  1315   /* If the cell content area begins with a freeblock, remove it. */
       
  1316   if( data[hdr+1]==data[hdr+5] && data[hdr+2]==data[hdr+6] ){
       
  1317     int top;
       
  1318     pbegin = get2byte(&data[hdr+1]);
       
  1319     memcpy(&data[hdr+1], &data[pbegin], 2);
       
  1320     top = get2byte(&data[hdr+5]);
       
  1321     put2byte(&data[hdr+5], top + get2byte(&data[pbegin+2]));
       
  1322   }
       
  1323 }
       
  1324 
       
  1325 /*
       
  1326 ** Decode the flags byte (the first byte of the header) for a page
       
  1327 ** and initialize fields of the MemPage structure accordingly.
       
  1328 */
       
  1329 static void decodeFlags(MemPage *pPage, int flagByte){
       
  1330   BtShared *pBt;     /* A copy of pPage->pBt */
       
  1331 
       
  1332   assert( pPage->hdrOffset==(pPage->pgno==1 ? 100 : 0) );
       
  1333   pPage->intKey = (flagByte & (PTF_INTKEY|PTF_LEAFDATA))!=0;
       
  1334   pPage->zeroData = (flagByte & PTF_ZERODATA)!=0;
       
  1335   pPage->leaf = (flagByte & PTF_LEAF)!=0;
       
  1336   pPage->childPtrSize = 4*(pPage->leaf==0);
       
  1337   pBt = pPage->pBt;
       
  1338   if( flagByte & PTF_LEAFDATA ){
       
  1339     pPage->leafData = 1;
       
  1340     pPage->maxLocal = pBt->maxLeaf;
       
  1341     pPage->minLocal = pBt->minLeaf;
       
  1342   }else{
       
  1343     pPage->leafData = 0;
       
  1344     pPage->maxLocal = pBt->maxLocal;
       
  1345     pPage->minLocal = pBt->minLocal;
       
  1346   }
       
  1347   pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData));
       
  1348 }
       
  1349 
       
  1350 /*
       
  1351 ** Initialize the auxiliary information for a disk block.
       
  1352 **
       
  1353 ** The pParent parameter must be a pointer to the MemPage which
       
  1354 ** is the parent of the page being initialized.  The root of a
       
  1355 ** BTree has no parent and so for that page, pParent==NULL.
       
  1356 **
       
  1357 ** Return SQLITE_OK on success.  If we see that the page does
       
  1358 ** not contain a well-formed database page, then return 
       
  1359 ** SQLITE_CORRUPT.  Note that a return of SQLITE_OK does not
       
  1360 ** guarantee that the page is well-formed.  It only shows that
       
  1361 ** we failed to detect any corruption.
       
  1362 */
       
  1363 static int initPage(
       
  1364   MemPage *pPage,        /* The page to be initialized */
       
  1365   MemPage *pParent       /* The parent.  Might be NULL */
       
  1366 ){
       
  1367   int pc;            /* Address of a freeblock within pPage->aData[] */
       
  1368   int hdr;           /* Offset to beginning of page header */
       
  1369   u8 *data;          /* Equal to pPage->aData */
       
  1370   BtShared *pBt;        /* The main btree structure */
       
  1371   int usableSize;    /* Amount of usable space on each page */
       
  1372   int cellOffset;    /* Offset from start of page to first cell pointer */
       
  1373   int nFree;         /* Number of unused bytes on the page */
       
  1374   int top;           /* First byte of the cell content area */
       
  1375 
       
  1376   pBt = pPage->pBt;
       
  1377   assert( pBt!=0 );
       
  1378   assert( pParent==0 || pParent->pBt==pBt );
       
  1379   assert( pPage->pgno==sqlite3pager_pagenumber(pPage->aData) );
       
  1380   assert( pPage->aData == &((unsigned char*)pPage)[-pBt->pageSize] );
       
  1381   if( pPage->pParent!=pParent && (pPage->pParent!=0 || pPage->isInit) ){
       
  1382     /* The parent page should never change unless the file is corrupt */
       
  1383     return SQLITE_CORRUPT_BKPT;
       
  1384   }
       
  1385   if( pPage->isInit ) return SQLITE_OK;
       
  1386   if( pPage->pParent==0 && pParent!=0 ){
       
  1387     pPage->pParent = pParent;
       
  1388     sqlite3pager_ref(pParent->aData);
       
  1389   }
       
  1390   hdr = pPage->hdrOffset;
       
  1391   data = pPage->aData;
       
  1392   decodeFlags(pPage, data[hdr]);
       
  1393   pPage->nOverflow = 0;
       
  1394   pPage->idxShift = 0;
       
  1395   usableSize = pBt->usableSize;
       
  1396   pPage->cellOffset = cellOffset = hdr + 12 - 4*pPage->leaf;
       
  1397   top = get2byte(&data[hdr+5]);
       
  1398   pPage->nCell = get2byte(&data[hdr+3]);
       
  1399   if( pPage->nCell>MX_CELL(pBt) ){
       
  1400     /* To many cells for a single page.  The page must be corrupt */
       
  1401     return SQLITE_CORRUPT_BKPT;
       
  1402   }
       
  1403   if( pPage->nCell==0 && pParent!=0 && pParent->pgno!=1 ){
       
  1404     /* All pages must have at least one cell, except for root pages */
       
  1405     return SQLITE_CORRUPT_BKPT;
       
  1406   }
       
  1407 
       
  1408   /* Compute the total free space on the page */
       
  1409   pc = get2byte(&data[hdr+1]);
       
  1410   nFree = data[hdr+7] + top - (cellOffset + 2*pPage->nCell);
       
  1411   while( pc>0 ){
       
  1412     int next, size;
       
  1413     if( pc>usableSize-4 ){
       
  1414       /* Free block is off the page */
       
  1415       return SQLITE_CORRUPT_BKPT; 
       
  1416     }
       
  1417     next = get2byte(&data[pc]);
       
  1418     size = get2byte(&data[pc+2]);
       
  1419     if( next>0 && next<=pc+size+3 ){
       
  1420       /* Free blocks must be in accending order */
       
  1421       return SQLITE_CORRUPT_BKPT; 
       
  1422     }
       
  1423     nFree += size;
       
  1424     pc = next;
       
  1425   }
       
  1426   pPage->nFree = nFree;
       
  1427   if( nFree>=usableSize ){
       
  1428     /* Free space cannot exceed total page size */
       
  1429     return SQLITE_CORRUPT_BKPT; 
       
  1430   }
       
  1431 
       
  1432   pPage->isInit = 1;
       
  1433   pageIntegrity(pPage);
       
  1434   return SQLITE_OK;
       
  1435 }
       
  1436 
       
  1437 /*
       
  1438 ** Set up a raw page so that it looks like a database page holding
       
  1439 ** no entries.
       
  1440 */
       
  1441 static void zeroPage(MemPage *pPage, int flags){
       
  1442   unsigned char *data = pPage->aData;
       
  1443   BtShared *pBt = pPage->pBt;
       
  1444   int hdr = pPage->hdrOffset;
       
  1445   int first;
       
  1446 
       
  1447   assert( sqlite3pager_pagenumber(data)==pPage->pgno );
       
  1448   assert( &data[pBt->pageSize] == (unsigned char*)pPage );
       
  1449   assert( sqlite3pager_iswriteable(data) );
       
  1450   memset(&data[hdr], 0, pBt->usableSize - hdr);
       
  1451   data[hdr] = flags;
       
  1452   first = hdr + 8 + 4*((flags&PTF_LEAF)==0);
       
  1453   memset(&data[hdr+1], 0, 4);
       
  1454   data[hdr+7] = 0;
       
  1455   put2byte(&data[hdr+5], pBt->usableSize);
       
  1456   pPage->nFree = pBt->usableSize - first;
       
  1457   decodeFlags(pPage, flags);
       
  1458   pPage->hdrOffset = hdr;
       
  1459   pPage->cellOffset = first;
       
  1460   pPage->nOverflow = 0;
       
  1461   pPage->idxShift = 0;
       
  1462   pPage->nCell = 0;
       
  1463   pPage->isInit = 1;
       
  1464   pageIntegrity(pPage);
       
  1465 }
       
  1466 
       
  1467 /*
       
  1468 ** Get a page from the pager.  Initialize the MemPage.pBt and
       
  1469 ** MemPage.aData elements if needed.
       
  1470 */
       
  1471 static int getPage(BtShared *pBt, Pgno pgno, MemPage **ppPage){
       
  1472   int rc;
       
  1473   unsigned char *aData;
       
  1474   MemPage *pPage;
       
  1475   rc = sqlite3pager_get(pBt->pPager, pgno, (void**)&aData);
       
  1476   if( rc ) return rc;
       
  1477   pPage = (MemPage*)&aData[pBt->pageSize];
       
  1478   pPage->aData = aData;
       
  1479   pPage->pBt = pBt;
       
  1480   pPage->pgno = pgno;
       
  1481   pPage->hdrOffset = pPage->pgno==1 ? 100 : 0;
       
  1482   *ppPage = pPage;
       
  1483   return SQLITE_OK;
       
  1484 }
       
  1485 
       
  1486 /*
       
  1487 ** Get a page from the pager and initialize it.  This routine
       
  1488 ** is just a convenience wrapper around separate calls to
       
  1489 ** getPage() and initPage().
       
  1490 */
       
  1491 static int getAndInitPage(
       
  1492   BtShared *pBt,          /* The database file */
       
  1493   Pgno pgno,           /* Number of the page to get */
       
  1494   MemPage **ppPage,    /* Write the page pointer here */
       
  1495   MemPage *pParent     /* Parent of the page */
       
  1496 ){
       
  1497   int rc;
       
  1498   if( pgno==0 ){
       
  1499     return SQLITE_CORRUPT_BKPT; 
       
  1500   }
       
  1501   rc = getPage(pBt, pgno, ppPage);
       
  1502   if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){
       
  1503     rc = initPage(*ppPage, pParent);
       
  1504   }
       
  1505   return rc;
       
  1506 }
       
  1507 
       
  1508 /*
       
  1509 ** Release a MemPage.  This should be called once for each prior
       
  1510 ** call to getPage.
       
  1511 */
       
  1512 static void releasePage(MemPage *pPage){
       
  1513   if( pPage ){
       
  1514     assert( pPage->aData );
       
  1515     assert( pPage->pBt );
       
  1516     assert( &pPage->aData[pPage->pBt->pageSize]==(unsigned char*)pPage );
       
  1517     sqlite3pager_unref(pPage->aData);
       
  1518   }
       
  1519 }
       
  1520 
       
  1521 /*
       
  1522 ** This routine is called when the reference count for a page
       
  1523 ** reaches zero.  We need to unref the pParent pointer when that
       
  1524 ** happens.
       
  1525 */
       
  1526 static void pageDestructor(void *pData, int pageSize){
       
  1527   MemPage *pPage;
       
  1528   assert( (pageSize & 7)==0 );
       
  1529   pPage = (MemPage*)&((char*)pData)[pageSize];
       
  1530   if( pPage->pParent ){
       
  1531     MemPage *pParent = pPage->pParent;
       
  1532     pPage->pParent = 0;
       
  1533     releasePage(pParent);
       
  1534   }
       
  1535   pPage->isInit = 0;
       
  1536 }
       
  1537 
       
  1538 /*
       
  1539 ** During a rollback, when the pager reloads information into the cache
       
  1540 ** so that the cache is restored to its original state at the start of
       
  1541 ** the transaction, for each page restored this routine is called.
       
  1542 **
       
  1543 ** This routine needs to reset the extra data section at the end of the
       
  1544 ** page to agree with the restored data.
       
  1545 */
       
  1546 static void pageReinit(void *pData, int pageSize){
       
  1547   MemPage *pPage;
       
  1548   assert( (pageSize & 7)==0 );
       
  1549   pPage = (MemPage*)&((char*)pData)[pageSize];
       
  1550   if( pPage->isInit ){
       
  1551     pPage->isInit = 0;
       
  1552     initPage(pPage, pPage->pParent);
       
  1553   }
       
  1554 }
       
  1555 
       
  1556 /*
       
  1557 ** Open a database file.
       
  1558 ** 
       
  1559 ** zFilename is the name of the database file.  If zFilename is NULL
       
  1560 ** a new database with a random name is created.  This randomly named
       
  1561 ** database file will be deleted when sqlite3BtreeClose() is called.
       
  1562 */
       
  1563 int sqlite3BtreeOpen(
       
  1564   const char *zFilename,  /* Name of the file containing the BTree database */
       
  1565   sqlite3 *pSqlite,       /* Associated database handle */
       
  1566   Btree **ppBtree,        /* Pointer to new Btree object written here */
       
  1567   int flags               /* Options */
       
  1568 ){
       
  1569   BtShared *pBt;          /* Shared part of btree structure */
       
  1570   Btree *p;               /* Handle to return */
       
  1571   int rc;
       
  1572   int nReserve;
       
  1573   unsigned char zDbHeader[100];
       
  1574 #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
       
  1575   const ThreadData *pTsdro;
       
  1576 #endif
       
  1577 
       
  1578   /* Set the variable isMemdb to true for an in-memory database, or 
       
  1579   ** false for a file-based database. This symbol is only required if
       
  1580   ** either of the shared-data or autovacuum features are compiled 
       
  1581   ** into the library.
       
  1582   */
       
  1583 #if !defined(SQLITE_OMIT_SHARED_CACHE) || !defined(SQLITE_OMIT_AUTOVACUUM)
       
  1584   #ifdef SQLITE_OMIT_MEMORYDB
       
  1585     const int isMemdb = 0;
       
  1586   #else
       
  1587     const int isMemdb = zFilename && !strcmp(zFilename, ":memory:");
       
  1588   #endif
       
  1589 #endif
       
  1590 
       
  1591   p = sqliteMalloc(sizeof(Btree));
       
  1592   if( !p ){
       
  1593     return SQLITE_NOMEM;
       
  1594   }
       
  1595   p->inTrans = TRANS_NONE;
       
  1596   p->pSqlite = pSqlite;
       
  1597 
       
  1598   /* Try to find an existing Btree structure opened on zFilename. */
       
  1599 #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
       
  1600   pTsdro = sqlite3ThreadDataReadOnly();
       
  1601   if( pTsdro->useSharedData && zFilename && !isMemdb ){
       
  1602     char *zFullPathname = sqlite3OsFullPathname(zFilename);
       
  1603     if( !zFullPathname ){
       
  1604       sqliteFree(p);
       
  1605       return SQLITE_NOMEM;
       
  1606     }
       
  1607     for(pBt=pTsdro->pBtree; pBt; pBt=pBt->pNext){
       
  1608       assert( pBt->nRef>0 );
       
  1609       if( 0==strcmp(zFullPathname, sqlite3pager_filename(pBt->pPager)) ){
       
  1610         p->pBt = pBt;
       
  1611         *ppBtree = p;
       
  1612         pBt->nRef++;
       
  1613         sqliteFree(zFullPathname);
       
  1614         return SQLITE_OK;
       
  1615       }
       
  1616     }
       
  1617     sqliteFree(zFullPathname);
       
  1618   }
       
  1619 #endif
       
  1620 
       
  1621   /*
       
  1622   ** The following asserts make sure that structures used by the btree are
       
  1623   ** the right size.  This is to guard against size changes that result
       
  1624   ** when compiling on a different architecture.
       
  1625   */
       
  1626   assert( sizeof(i64)==8 || sizeof(i64)==4 );
       
  1627   assert( sizeof(u64)==8 || sizeof(u64)==4 );
       
  1628   assert( sizeof(u32)==4 );
       
  1629   assert( sizeof(u16)==2 );
       
  1630   assert( sizeof(Pgno)==4 );
       
  1631 
       
  1632   pBt = sqliteMalloc( sizeof(*pBt) );
       
  1633   if( pBt==0 ){
       
  1634     *ppBtree = 0;
       
  1635     sqliteFree(p);
       
  1636     return SQLITE_NOMEM;
       
  1637   }
       
  1638   rc = sqlite3pager_open(&pBt->pPager, zFilename, EXTRA_SIZE, flags);
       
  1639   if( rc!=SQLITE_OK ){
       
  1640     if( pBt->pPager ) sqlite3pager_close(pBt->pPager);
       
  1641     sqliteFree(pBt);
       
  1642     sqliteFree(p);
       
  1643     *ppBtree = 0;
       
  1644     return rc;
       
  1645   }
       
  1646   p->pBt = pBt;
       
  1647 
       
  1648   sqlite3pager_set_destructor(pBt->pPager, pageDestructor);
       
  1649   sqlite3pager_set_reiniter(pBt->pPager, pageReinit);
       
  1650   pBt->pCursor = 0;
       
  1651   pBt->pPage1 = 0;
       
  1652   pBt->readOnly = sqlite3pager_isreadonly(pBt->pPager);
       
  1653   sqlite3pager_read_fileheader(pBt->pPager, sizeof(zDbHeader), zDbHeader);
       
  1654   pBt->pageSize = get2byte(&zDbHeader[16]);
       
  1655   if( pBt->pageSize<512 || pBt->pageSize>SQLITE_MAX_PAGE_SIZE
       
  1656        || ((pBt->pageSize-1)&pBt->pageSize)!=0 ){
       
  1657     pBt->pageSize = SQLITE_DEFAULT_PAGE_SIZE;
       
  1658     pBt->maxEmbedFrac = 64;   /* 25% */
       
  1659     pBt->minEmbedFrac = 32;   /* 12.5% */
       
  1660     pBt->minLeafFrac = 32;    /* 12.5% */
       
  1661 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  1662     /* If the magic name ":memory:" will create an in-memory database, then
       
  1663     ** do not set the auto-vacuum flag, even if SQLITE_DEFAULT_AUTOVACUUM
       
  1664     ** is true. On the other hand, if SQLITE_OMIT_MEMORYDB has been defined,
       
  1665     ** then ":memory:" is just a regular file-name. Respect the auto-vacuum
       
  1666     ** default in this case.
       
  1667     */
       
  1668     if( zFilename && !isMemdb ){
       
  1669       pBt->autoVacuum = SQLITE_DEFAULT_AUTOVACUUM;
       
  1670     }
       
  1671 #endif
       
  1672     nReserve = 0;
       
  1673   }else{
       
  1674     nReserve = zDbHeader[20];
       
  1675     pBt->maxEmbedFrac = zDbHeader[21];
       
  1676     pBt->minEmbedFrac = zDbHeader[22];
       
  1677     pBt->minLeafFrac = zDbHeader[23];
       
  1678     pBt->pageSizeFixed = 1;
       
  1679 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  1680     pBt->autoVacuum = (get4byte(&zDbHeader[36 + 4*4])?1:0);
       
  1681 #endif
       
  1682   }
       
  1683   pBt->usableSize = pBt->pageSize - nReserve;
       
  1684   assert( (pBt->pageSize & 7)==0 );  /* 8-byte alignment of pageSize */
       
  1685   sqlite3pager_set_pagesize(pBt->pPager, pBt->pageSize);
       
  1686 
       
  1687 #if !defined(SQLITE_OMIT_SHARED_CACHE) && !defined(SQLITE_OMIT_DISKIO)
       
  1688   /* Add the new btree to the linked list starting at ThreadData.pBtree.
       
  1689   ** There is no chance that a malloc() may fail inside of the 
       
  1690   ** sqlite3ThreadData() call, as the ThreadData structure must have already
       
  1691   ** been allocated for pTsdro->useSharedData to be non-zero.
       
  1692   */
       
  1693   if( pTsdro->useSharedData && zFilename && !isMemdb ){
       
  1694     pBt->pNext = pTsdro->pBtree;
       
  1695     sqlite3ThreadData()->pBtree = pBt;
       
  1696   }
       
  1697 #endif
       
  1698   pBt->nRef = 1;
       
  1699   *ppBtree = p;
       
  1700   return SQLITE_OK;
       
  1701 }
       
  1702 
       
  1703 /*
       
  1704 ** Close an open database and invalidate all cursors.
       
  1705 */
       
  1706 int sqlite3BtreeClose(Btree *p){
       
  1707   BtShared *pBt = p->pBt;
       
  1708   BtCursor *pCur;
       
  1709 
       
  1710 #ifndef SQLITE_OMIT_SHARED_CACHE
       
  1711   ThreadData *pTsd;
       
  1712 #endif
       
  1713 
       
  1714   /* Close all cursors opened via this handle.  */
       
  1715   pCur = pBt->pCursor;
       
  1716   while( pCur ){
       
  1717     BtCursor *pTmp = pCur;
       
  1718     pCur = pCur->pNext;
       
  1719     if( pTmp->pBtree==p ){
       
  1720       sqlite3BtreeCloseCursor(pTmp);
       
  1721     }
       
  1722   }
       
  1723 
       
  1724   /* Rollback any active transaction and free the handle structure.
       
  1725   ** The call to sqlite3BtreeRollback() drops any table-locks held by
       
  1726   ** this handle.
       
  1727   */
       
  1728   sqlite3BtreeRollback(p);
       
  1729   sqliteFree(p);
       
  1730 
       
  1731 #ifndef SQLITE_OMIT_SHARED_CACHE
       
  1732   /* If there are still other outstanding references to the shared-btree
       
  1733   ** structure, return now. The remainder of this procedure cleans 
       
  1734   ** up the shared-btree.
       
  1735   */
       
  1736   assert( pBt->nRef>0 );
       
  1737   pBt->nRef--;
       
  1738   if( pBt->nRef ){
       
  1739     return SQLITE_OK;
       
  1740   }
       
  1741 
       
  1742   /* Remove the shared-btree from the thread wide list. Call 
       
  1743   ** ThreadDataReadOnly() and then cast away the const property of the 
       
  1744   ** pointer to avoid allocating thread data if it is not really required.
       
  1745   */
       
  1746   pTsd = (ThreadData *)sqlite3ThreadDataReadOnly();
       
  1747   if( pTsd->pBtree==pBt ){
       
  1748     assert( pTsd==sqlite3ThreadData() );
       
  1749     pTsd->pBtree = pBt->pNext;
       
  1750   }else{
       
  1751     BtShared *pPrev;
       
  1752     for(pPrev=pTsd->pBtree; pPrev && pPrev->pNext!=pBt; pPrev=pPrev->pNext){}
       
  1753     if( pPrev ){
       
  1754       assert( pTsd==sqlite3ThreadData() );
       
  1755       pPrev->pNext = pBt->pNext;
       
  1756     }
       
  1757   }
       
  1758 #endif
       
  1759 
       
  1760   /* Close the pager and free the shared-btree structure */
       
  1761   assert( !pBt->pCursor );
       
  1762   sqlite3pager_close(pBt->pPager);
       
  1763   if( pBt->xFreeSchema && pBt->pSchema ){
       
  1764     pBt->xFreeSchema(pBt->pSchema);
       
  1765   }
       
  1766   sqliteFree(pBt->pSchema);
       
  1767   sqliteFree(pBt);
       
  1768   return SQLITE_OK;
       
  1769 }
       
  1770 
       
  1771 /*
       
  1772 ** Change the busy handler callback function.
       
  1773 */
       
  1774 int sqlite3BtreeSetBusyHandler(Btree *p, BusyHandler *pHandler){
       
  1775   BtShared *pBt = p->pBt;
       
  1776   pBt->pBusyHandler = pHandler;
       
  1777   sqlite3pager_set_busyhandler(pBt->pPager, pHandler);
       
  1778   return SQLITE_OK;
       
  1779 }
       
  1780 
       
  1781 /*
       
  1782 ** Change the limit on the number of pages allowed in the cache.
       
  1783 **
       
  1784 ** The maximum number of cache pages is set to the absolute
       
  1785 ** value of mxPage.  If mxPage is negative, the pager will
       
  1786 ** operate asynchronously - it will not stop to do fsync()s
       
  1787 ** to insure data is written to the disk surface before
       
  1788 ** continuing.  Transactions still work if synchronous is off,
       
  1789 ** and the database cannot be corrupted if this program
       
  1790 ** crashes.  But if the operating system crashes or there is
       
  1791 ** an abrupt power failure when synchronous is off, the database
       
  1792 ** could be left in an inconsistent and unrecoverable state.
       
  1793 ** Synchronous is on by default so database corruption is not
       
  1794 ** normally a worry.
       
  1795 */
       
  1796 int sqlite3BtreeSetCacheSize(Btree *p, int mxPage){
       
  1797   BtShared *pBt = p->pBt;
       
  1798   sqlite3pager_set_cachesize(pBt->pPager, mxPage);
       
  1799   return SQLITE_OK;
       
  1800 }
       
  1801 
       
  1802 /*
       
  1803 ** Change the way data is synced to disk in order to increase or decrease
       
  1804 ** how well the database resists damage due to OS crashes and power
       
  1805 ** failures.  Level 1 is the same as asynchronous (no syncs() occur and
       
  1806 ** there is a high probability of damage)  Level 2 is the default.  There
       
  1807 ** is a very low but non-zero probability of damage.  Level 3 reduces the
       
  1808 ** probability of damage to near zero but with a write performance reduction.
       
  1809 */
       
  1810 #ifndef SQLITE_OMIT_PAGER_PRAGMAS
       
  1811 int sqlite3BtreeSetSafetyLevel(Btree *p, int level, int fullSync){
       
  1812   BtShared *pBt = p->pBt;
       
  1813   sqlite3pager_set_safety_level(pBt->pPager, level, fullSync);
       
  1814   return SQLITE_OK;
       
  1815 }
       
  1816 #endif
       
  1817 
       
  1818 /*
       
  1819 ** Return TRUE if the given btree is set to safety level 1.  In other
       
  1820 ** words, return TRUE if no sync() occurs on the disk files.
       
  1821 */
       
  1822 int sqlite3BtreeSyncDisabled(Btree *p){
       
  1823   BtShared *pBt = p->pBt;
       
  1824   assert( pBt && pBt->pPager );
       
  1825   return sqlite3pager_nosync(pBt->pPager);
       
  1826 }
       
  1827 
       
  1828 #if !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM)
       
  1829 /*
       
  1830 ** Change the default pages size and the number of reserved bytes per page.
       
  1831 **
       
  1832 ** The page size must be a power of 2 between 512 and 65536.  If the page
       
  1833 ** size supplied does not meet this constraint then the page size is not
       
  1834 ** changed.
       
  1835 **
       
  1836 ** Page sizes are constrained to be a power of two so that the region
       
  1837 ** of the database file used for locking (beginning at PENDING_BYTE,
       
  1838 ** the first byte past the 1GB boundary, 0x40000000) needs to occur
       
  1839 ** at the beginning of a page.
       
  1840 **
       
  1841 ** If parameter nReserve is less than zero, then the number of reserved
       
  1842 ** bytes per page is left unchanged.
       
  1843 */
       
  1844 int sqlite3BtreeSetPageSize(Btree *p, int pageSize, int nReserve){
       
  1845   BtShared *pBt = p->pBt;
       
  1846   if( pBt->pageSizeFixed ){
       
  1847     return SQLITE_READONLY;
       
  1848   }
       
  1849   if( nReserve<0 ){
       
  1850     nReserve = pBt->pageSize - pBt->usableSize;
       
  1851   }
       
  1852   if( pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE &&
       
  1853         ((pageSize-1)&pageSize)==0 ){
       
  1854     assert( (pageSize & 7)==0 );
       
  1855     assert( !pBt->pPage1 && !pBt->pCursor );
       
  1856     pBt->pageSize = sqlite3pager_set_pagesize(pBt->pPager, pageSize);
       
  1857   }
       
  1858   pBt->usableSize = pBt->pageSize - nReserve;
       
  1859   return SQLITE_OK;
       
  1860 }
       
  1861 
       
  1862 /*
       
  1863 ** Return the currently defined page size
       
  1864 */
       
  1865 int sqlite3BtreeGetPageSize(Btree *p){
       
  1866   return p->pBt->pageSize;
       
  1867 }
       
  1868 int sqlite3BtreeGetReserve(Btree *p){
       
  1869   return p->pBt->pageSize - p->pBt->usableSize;
       
  1870 }
       
  1871 #endif /* !defined(SQLITE_OMIT_PAGER_PRAGMAS) || !defined(SQLITE_OMIT_VACUUM) */
       
  1872 
       
  1873 /*
       
  1874 ** Change the 'auto-vacuum' property of the database. If the 'autoVacuum'
       
  1875 ** parameter is non-zero, then auto-vacuum mode is enabled. If zero, it
       
  1876 ** is disabled. The default value for the auto-vacuum property is 
       
  1877 ** determined by the SQLITE_DEFAULT_AUTOVACUUM macro.
       
  1878 */
       
  1879 int sqlite3BtreeSetAutoVacuum(Btree *p, int autoVacuum){
       
  1880   BtShared *pBt = p->pBt;;
       
  1881 #ifdef SQLITE_OMIT_AUTOVACUUM
       
  1882   return SQLITE_READONLY;
       
  1883 #else
       
  1884   if( pBt->pageSizeFixed ){
       
  1885     return SQLITE_READONLY;
       
  1886   }
       
  1887   pBt->autoVacuum = (autoVacuum?1:0);
       
  1888   return SQLITE_OK;
       
  1889 #endif
       
  1890 }
       
  1891 
       
  1892 /*
       
  1893 ** Return the value of the 'auto-vacuum' property. If auto-vacuum is 
       
  1894 ** enabled 1 is returned. Otherwise 0.
       
  1895 */
       
  1896 int sqlite3BtreeGetAutoVacuum(Btree *p){
       
  1897 #ifdef SQLITE_OMIT_AUTOVACUUM
       
  1898   return 0;
       
  1899 #else
       
  1900   return p->pBt->autoVacuum;
       
  1901 #endif
       
  1902 }
       
  1903 
       
  1904 
       
  1905 /*
       
  1906 ** Get a reference to pPage1 of the database file.  This will
       
  1907 ** also acquire a readlock on that file.
       
  1908 **
       
  1909 ** SQLITE_OK is returned on success.  If the file is not a
       
  1910 ** well-formed database file, then SQLITE_CORRUPT is returned.
       
  1911 ** SQLITE_BUSY is returned if the database is locked.  SQLITE_NOMEM
       
  1912 ** is returned if we run out of memory.  SQLITE_PROTOCOL is returned
       
  1913 ** if there is a locking protocol violation.
       
  1914 */
       
  1915 static int lockBtree(BtShared *pBt){
       
  1916   int rc, pageSize;
       
  1917   MemPage *pPage1;
       
  1918   if( pBt->pPage1 ) return SQLITE_OK;
       
  1919   rc = getPage(pBt, 1, &pPage1);
       
  1920   if( rc!=SQLITE_OK ) return rc;
       
  1921   
       
  1922 
       
  1923   /* Do some checking to help insure the file we opened really is
       
  1924   ** a valid database file. 
       
  1925   */
       
  1926   rc = SQLITE_NOTADB;
       
  1927   if( sqlite3pager_pagecount(pBt->pPager)>0 ){
       
  1928     u8 *page1 = pPage1->aData;
       
  1929     if( memcmp(page1, zMagicHeader, 16)!=0 ){
       
  1930       goto page1_init_failed;
       
  1931     }
       
  1932     if( page1[18]>1 || page1[19]>1 ){
       
  1933       goto page1_init_failed;
       
  1934     }
       
  1935     pageSize = get2byte(&page1[16]);
       
  1936     if( ((pageSize-1)&pageSize)!=0 ){
       
  1937       goto page1_init_failed;
       
  1938     }
       
  1939     assert( (pageSize & 7)==0 );
       
  1940     pBt->pageSize = pageSize;
       
  1941     pBt->usableSize = pageSize - page1[20];
       
  1942     if( pBt->usableSize<500 ){
       
  1943       goto page1_init_failed;
       
  1944     }
       
  1945     pBt->maxEmbedFrac = page1[21];
       
  1946     pBt->minEmbedFrac = page1[22];
       
  1947     pBt->minLeafFrac = page1[23];
       
  1948 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  1949     pBt->autoVacuum = (get4byte(&page1[36 + 4*4])?1:0);
       
  1950 #endif
       
  1951   }
       
  1952 
       
  1953   /* maxLocal is the maximum amount of payload to store locally for
       
  1954   ** a cell.  Make sure it is small enough so that at least minFanout
       
  1955   ** cells can will fit on one page.  We assume a 10-byte page header.
       
  1956   ** Besides the payload, the cell must store:
       
  1957   **     2-byte pointer to the cell
       
  1958   **     4-byte child pointer
       
  1959   **     9-byte nKey value
       
  1960   **     4-byte nData value
       
  1961   **     4-byte overflow page pointer
       
  1962   ** So a cell consists of a 2-byte poiner, a header which is as much as
       
  1963   ** 17 bytes long, 0 to N bytes of payload, and an optional 4 byte overflow
       
  1964   ** page pointer.
       
  1965   */
       
  1966   pBt->maxLocal = (pBt->usableSize-12)*pBt->maxEmbedFrac/255 - 23;
       
  1967   pBt->minLocal = (pBt->usableSize-12)*pBt->minEmbedFrac/255 - 23;
       
  1968   pBt->maxLeaf = pBt->usableSize - 35;
       
  1969   pBt->minLeaf = (pBt->usableSize-12)*pBt->minLeafFrac/255 - 23;
       
  1970   if( pBt->minLocal>pBt->maxLocal || pBt->maxLocal<0 ){
       
  1971     goto page1_init_failed;
       
  1972   }
       
  1973   assert( pBt->maxLeaf + 23 <= MX_CELL_SIZE(pBt) );
       
  1974   pBt->pPage1 = pPage1;
       
  1975   return SQLITE_OK;
       
  1976 
       
  1977 page1_init_failed:
       
  1978   releasePage(pPage1);
       
  1979   pBt->pPage1 = 0;
       
  1980   return rc;
       
  1981 }
       
  1982 
       
  1983 /*
       
  1984 ** This routine works like lockBtree() except that it also invokes the
       
  1985 ** busy callback if there is lock contention.
       
  1986 */
       
  1987 static int lockBtreeWithRetry(Btree *pRef){
       
  1988   int rc = SQLITE_OK;
       
  1989   if( pRef->inTrans==TRANS_NONE ){
       
  1990     u8 inTransaction = pRef->pBt->inTransaction;
       
  1991     btreeIntegrity(pRef);
       
  1992     rc = sqlite3BtreeBeginTrans(pRef, 0);
       
  1993     pRef->pBt->inTransaction = inTransaction;
       
  1994     pRef->inTrans = TRANS_NONE;
       
  1995     if( rc==SQLITE_OK ){
       
  1996       pRef->pBt->nTransaction--;
       
  1997     }
       
  1998     btreeIntegrity(pRef);
       
  1999   }
       
  2000   return rc;
       
  2001 }
       
  2002        
       
  2003 
       
  2004 /*
       
  2005 ** If there are no outstanding cursors and we are not in the middle
       
  2006 ** of a transaction but there is a read lock on the database, then
       
  2007 ** this routine unrefs the first page of the database file which 
       
  2008 ** has the effect of releasing the read lock.
       
  2009 **
       
  2010 ** If there are any outstanding cursors, this routine is a no-op.
       
  2011 **
       
  2012 ** If there is a transaction in progress, this routine is a no-op.
       
  2013 */
       
  2014 static void unlockBtreeIfUnused(BtShared *pBt){
       
  2015   if( pBt->inTransaction==TRANS_NONE && pBt->pCursor==0 && pBt->pPage1!=0 ){
       
  2016     if( pBt->pPage1->aData==0 ){
       
  2017       MemPage *pPage = pBt->pPage1;
       
  2018       pPage->aData = &((u8*)pPage)[-pBt->pageSize];
       
  2019       pPage->pBt = pBt;
       
  2020       pPage->pgno = 1;
       
  2021     }
       
  2022     releasePage(pBt->pPage1);
       
  2023     pBt->pPage1 = 0;
       
  2024     pBt->inStmt = 0;
       
  2025   }
       
  2026 }
       
  2027 
       
  2028 /*
       
  2029 ** Create a new database by initializing the first page of the
       
  2030 ** file.
       
  2031 */
       
  2032 static int newDatabase(BtShared *pBt){
       
  2033   MemPage *pP1;
       
  2034   unsigned char *data;
       
  2035   int rc;
       
  2036   if( sqlite3pager_pagecount(pBt->pPager)>0 ) return SQLITE_OK;
       
  2037   pP1 = pBt->pPage1;
       
  2038   assert( pP1!=0 );
       
  2039   data = pP1->aData;
       
  2040   rc = sqlite3pager_write(data);
       
  2041   if( rc ) return rc;
       
  2042   memcpy(data, zMagicHeader, sizeof(zMagicHeader));
       
  2043   assert( sizeof(zMagicHeader)==16 );
       
  2044   put2byte(&data[16], pBt->pageSize);
       
  2045   data[18] = 1;
       
  2046   data[19] = 1;
       
  2047   data[20] = pBt->pageSize - pBt->usableSize;
       
  2048   data[21] = pBt->maxEmbedFrac;
       
  2049   data[22] = pBt->minEmbedFrac;
       
  2050   data[23] = pBt->minLeafFrac;
       
  2051   memset(&data[24], 0, 100-24);
       
  2052   zeroPage(pP1, PTF_INTKEY|PTF_LEAF|PTF_LEAFDATA );
       
  2053   pBt->pageSizeFixed = 1;
       
  2054 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  2055   if( pBt->autoVacuum ){
       
  2056     put4byte(&data[36 + 4*4], 1);
       
  2057   }
       
  2058 #endif
       
  2059   return SQLITE_OK;
       
  2060 }
       
  2061 
       
  2062 /*
       
  2063 ** Attempt to start a new transaction. A write-transaction
       
  2064 ** is started if the second argument is nonzero, otherwise a read-
       
  2065 ** transaction.  If the second argument is 2 or more and exclusive
       
  2066 ** transaction is started, meaning that no other process is allowed
       
  2067 ** to access the database.  A preexisting transaction may not be
       
  2068 ** upgraded to exclusive by calling this routine a second time - the
       
  2069 ** exclusivity flag only works for a new transaction.
       
  2070 **
       
  2071 ** A write-transaction must be started before attempting any 
       
  2072 ** changes to the database.  None of the following routines 
       
  2073 ** will work unless a transaction is started first:
       
  2074 **
       
  2075 **      sqlite3BtreeCreateTable()
       
  2076 **      sqlite3BtreeCreateIndex()
       
  2077 **      sqlite3BtreeClearTable()
       
  2078 **      sqlite3BtreeDropTable()
       
  2079 **      sqlite3BtreeInsert()
       
  2080 **      sqlite3BtreeDelete()
       
  2081 **      sqlite3BtreeUpdateMeta()
       
  2082 **
       
  2083 ** If an initial attempt to acquire the lock fails because of lock contention
       
  2084 ** and the database was previously unlocked, then invoke the busy handler
       
  2085 ** if there is one.  But if there was previously a read-lock, do not
       
  2086 ** invoke the busy handler - just return SQLITE_BUSY.  SQLITE_BUSY is 
       
  2087 ** returned when there is already a read-lock in order to avoid a deadlock.
       
  2088 **
       
  2089 ** Suppose there are two processes A and B.  A has a read lock and B has
       
  2090 ** a reserved lock.  B tries to promote to exclusive but is blocked because
       
  2091 ** of A's read lock.  A tries to promote to reserved but is blocked by B.
       
  2092 ** One or the other of the two processes must give way or there can be
       
  2093 ** no progress.  By returning SQLITE_BUSY and not invoking the busy callback
       
  2094 ** when A already has a read lock, we encourage A to give up and let B
       
  2095 ** proceed.
       
  2096 */
       
  2097 int sqlite3BtreeBeginTrans(Btree *p, int wrflag){
       
  2098   BtShared *pBt = p->pBt;
       
  2099   int rc = SQLITE_OK;
       
  2100 
       
  2101   btreeIntegrity(p);
       
  2102 
       
  2103   /* If the btree is already in a write-transaction, or it
       
  2104   ** is already in a read-transaction and a read-transaction
       
  2105   ** is requested, this is a no-op.
       
  2106   */
       
  2107   if( p->inTrans==TRANS_WRITE || (p->inTrans==TRANS_READ && !wrflag) ){
       
  2108     return SQLITE_OK;
       
  2109   }
       
  2110 
       
  2111   /* Write transactions are not possible on a read-only database */
       
  2112   if( pBt->readOnly && wrflag ){
       
  2113     return SQLITE_READONLY;
       
  2114   }
       
  2115 
       
  2116   /* If another database handle has already opened a write transaction 
       
  2117   ** on this shared-btree structure and a second write transaction is
       
  2118   ** requested, return SQLITE_BUSY.
       
  2119   */
       
  2120   if( pBt->inTransaction==TRANS_WRITE && wrflag ){
       
  2121     return SQLITE_BUSY;
       
  2122   }
       
  2123 
       
  2124   do {
       
  2125     if( pBt->pPage1==0 ){
       
  2126       rc = lockBtree(pBt);
       
  2127     }
       
  2128   
       
  2129     if( rc==SQLITE_OK && wrflag ){
       
  2130       rc = sqlite3pager_begin(pBt->pPage1->aData, wrflag>1);
       
  2131       if( rc==SQLITE_OK ){
       
  2132         rc = newDatabase(pBt);
       
  2133       }
       
  2134     }
       
  2135   
       
  2136     if( rc==SQLITE_OK ){
       
  2137       if( wrflag ) pBt->inStmt = 0;
       
  2138     }else{
       
  2139       unlockBtreeIfUnused(pBt);
       
  2140     }
       
  2141   }while( rc==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE &&
       
  2142           sqlite3InvokeBusyHandler(pBt->pBusyHandler) );
       
  2143 
       
  2144   if( rc==SQLITE_OK ){
       
  2145     if( p->inTrans==TRANS_NONE ){
       
  2146       pBt->nTransaction++;
       
  2147     }
       
  2148     p->inTrans = (wrflag?TRANS_WRITE:TRANS_READ);
       
  2149     if( p->inTrans>pBt->inTransaction ){
       
  2150       pBt->inTransaction = p->inTrans;
       
  2151     }
       
  2152   }
       
  2153 
       
  2154   btreeIntegrity(p);
       
  2155   return rc;
       
  2156 }
       
  2157 
       
  2158 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  2159 
       
  2160 /*
       
  2161 ** Set the pointer-map entries for all children of page pPage. Also, if
       
  2162 ** pPage contains cells that point to overflow pages, set the pointer
       
  2163 ** map entries for the overflow pages as well.
       
  2164 */
       
  2165 static int setChildPtrmaps(MemPage *pPage){
       
  2166   int i;                             /* Counter variable */
       
  2167   int nCell;                         /* Number of cells in page pPage */
       
  2168   int rc = SQLITE_OK;                /* Return code */
       
  2169   BtShared *pBt = pPage->pBt;
       
  2170   int isInitOrig = pPage->isInit;
       
  2171   Pgno pgno = pPage->pgno;
       
  2172 
       
  2173   initPage(pPage, 0);
       
  2174   nCell = pPage->nCell;
       
  2175 
       
  2176   for(i=0; i<nCell; i++){
       
  2177     u8 *pCell = findCell(pPage, i);
       
  2178 
       
  2179     rc = ptrmapPutOvflPtr(pPage, pCell);
       
  2180     if( rc!=SQLITE_OK ){
       
  2181       goto set_child_ptrmaps_out;
       
  2182     }
       
  2183 
       
  2184     if( !pPage->leaf ){
       
  2185       Pgno childPgno = get4byte(pCell);
       
  2186       rc = ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno);
       
  2187       if( rc!=SQLITE_OK ) goto set_child_ptrmaps_out;
       
  2188     }
       
  2189   }
       
  2190 
       
  2191   if( !pPage->leaf ){
       
  2192     Pgno childPgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
       
  2193     rc = ptrmapPut(pBt, childPgno, PTRMAP_BTREE, pgno);
       
  2194   }
       
  2195 
       
  2196 set_child_ptrmaps_out:
       
  2197   pPage->isInit = isInitOrig;
       
  2198   return rc;
       
  2199 }
       
  2200 
       
  2201 /*
       
  2202 ** Somewhere on pPage, which is guarenteed to be a btree page, not an overflow
       
  2203 ** page, is a pointer to page iFrom. Modify this pointer so that it points to
       
  2204 ** iTo. Parameter eType describes the type of pointer to be modified, as 
       
  2205 ** follows:
       
  2206 **
       
  2207 ** PTRMAP_BTREE:     pPage is a btree-page. The pointer points at a child 
       
  2208 **                   page of pPage.
       
  2209 **
       
  2210 ** PTRMAP_OVERFLOW1: pPage is a btree-page. The pointer points at an overflow
       
  2211 **                   page pointed to by one of the cells on pPage.
       
  2212 **
       
  2213 ** PTRMAP_OVERFLOW2: pPage is an overflow-page. The pointer points at the next
       
  2214 **                   overflow page in the list.
       
  2215 */
       
  2216 static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){
       
  2217   if( eType==PTRMAP_OVERFLOW2 ){
       
  2218     /* The pointer is always the first 4 bytes of the page in this case.  */
       
  2219     if( get4byte(pPage->aData)!=iFrom ){
       
  2220       return SQLITE_CORRUPT_BKPT;
       
  2221     }
       
  2222     put4byte(pPage->aData, iTo);
       
  2223   }else{
       
  2224     int isInitOrig = pPage->isInit;
       
  2225     int i;
       
  2226     int nCell;
       
  2227 
       
  2228     initPage(pPage, 0);
       
  2229     nCell = pPage->nCell;
       
  2230 
       
  2231     for(i=0; i<nCell; i++){
       
  2232       u8 *pCell = findCell(pPage, i);
       
  2233       if( eType==PTRMAP_OVERFLOW1 ){
       
  2234         CellInfo info;
       
  2235         parseCellPtr(pPage, pCell, &info);
       
  2236         if( info.iOverflow ){
       
  2237           if( iFrom==get4byte(&pCell[info.iOverflow]) ){
       
  2238             put4byte(&pCell[info.iOverflow], iTo);
       
  2239             break;
       
  2240           }
       
  2241         }
       
  2242       }else{
       
  2243         if( get4byte(pCell)==iFrom ){
       
  2244           put4byte(pCell, iTo);
       
  2245           break;
       
  2246         }
       
  2247       }
       
  2248     }
       
  2249   
       
  2250     if( i==nCell ){
       
  2251       if( eType!=PTRMAP_BTREE || 
       
  2252           get4byte(&pPage->aData[pPage->hdrOffset+8])!=iFrom ){
       
  2253         return SQLITE_CORRUPT_BKPT;
       
  2254       }
       
  2255       put4byte(&pPage->aData[pPage->hdrOffset+8], iTo);
       
  2256     }
       
  2257 
       
  2258     pPage->isInit = isInitOrig;
       
  2259   }
       
  2260   return SQLITE_OK;
       
  2261 }
       
  2262 
       
  2263 
       
  2264 /*
       
  2265 ** Move the open database page pDbPage to location iFreePage in the 
       
  2266 ** database. The pDbPage reference remains valid.
       
  2267 */
       
  2268 static int relocatePage(
       
  2269   BtShared *pBt,           /* Btree */
       
  2270   MemPage *pDbPage,        /* Open page to move */
       
  2271   u8 eType,                /* Pointer map 'type' entry for pDbPage */
       
  2272   Pgno iPtrPage,           /* Pointer map 'page-no' entry for pDbPage */
       
  2273   Pgno iFreePage           /* The location to move pDbPage to */
       
  2274 ){
       
  2275   MemPage *pPtrPage;   /* The page that contains a pointer to pDbPage */
       
  2276   Pgno iDbPage = pDbPage->pgno;
       
  2277   Pager *pPager = pBt->pPager;
       
  2278   int rc;
       
  2279 
       
  2280   assert( eType==PTRMAP_OVERFLOW2 || eType==PTRMAP_OVERFLOW1 || 
       
  2281       eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE );
       
  2282 
       
  2283   /* Move page iDbPage from it's current location to page number iFreePage */
       
  2284   TRACE(("AUTOVACUUM: Moving %d to free page %d (ptr page %d type %d)\n", 
       
  2285       iDbPage, iFreePage, iPtrPage, eType));
       
  2286   rc = sqlite3pager_movepage(pPager, pDbPage->aData, iFreePage);
       
  2287   if( rc!=SQLITE_OK ){
       
  2288     return rc;
       
  2289   }
       
  2290   pDbPage->pgno = iFreePage;
       
  2291 
       
  2292   /* If pDbPage was a btree-page, then it may have child pages and/or cells
       
  2293   ** that point to overflow pages. The pointer map entries for all these
       
  2294   ** pages need to be changed.
       
  2295   **
       
  2296   ** If pDbPage is an overflow page, then the first 4 bytes may store a
       
  2297   ** pointer to a subsequent overflow page. If this is the case, then
       
  2298   ** the pointer map needs to be updated for the subsequent overflow page.
       
  2299   */
       
  2300   if( eType==PTRMAP_BTREE || eType==PTRMAP_ROOTPAGE ){
       
  2301     rc = setChildPtrmaps(pDbPage);
       
  2302     if( rc!=SQLITE_OK ){
       
  2303       return rc;
       
  2304     }
       
  2305   }else{
       
  2306     Pgno nextOvfl = get4byte(pDbPage->aData);
       
  2307     if( nextOvfl!=0 ){
       
  2308       rc = ptrmapPut(pBt, nextOvfl, PTRMAP_OVERFLOW2, iFreePage);
       
  2309       if( rc!=SQLITE_OK ){
       
  2310         return rc;
       
  2311       }
       
  2312     }
       
  2313   }
       
  2314 
       
  2315   /* Fix the database pointer on page iPtrPage that pointed at iDbPage so
       
  2316   ** that it points at iFreePage. Also fix the pointer map entry for
       
  2317   ** iPtrPage.
       
  2318   */
       
  2319   if( eType!=PTRMAP_ROOTPAGE ){
       
  2320     rc = getPage(pBt, iPtrPage, &pPtrPage);
       
  2321     if( rc!=SQLITE_OK ){
       
  2322       return rc;
       
  2323     }
       
  2324     rc = sqlite3pager_write(pPtrPage->aData);
       
  2325     if( rc!=SQLITE_OK ){
       
  2326       releasePage(pPtrPage);
       
  2327       return rc;
       
  2328     }
       
  2329     rc = modifyPagePointer(pPtrPage, iDbPage, iFreePage, eType);
       
  2330     releasePage(pPtrPage);
       
  2331     if( rc==SQLITE_OK ){
       
  2332       rc = ptrmapPut(pBt, iFreePage, eType, iPtrPage);
       
  2333     }
       
  2334   }
       
  2335   return rc;
       
  2336 }
       
  2337 
       
  2338 /* Forward declaration required by autoVacuumCommit(). */
       
  2339 static int allocatePage(BtShared *, MemPage **, Pgno *, Pgno, u8);
       
  2340 
       
  2341 /*
       
  2342 ** This routine is called prior to sqlite3pager_commit when a transaction
       
  2343 ** is commited for an auto-vacuum database.
       
  2344 */
       
  2345 static int autoVacuumCommit(BtShared *pBt, Pgno *nTrunc){
       
  2346   Pager *pPager = pBt->pPager;
       
  2347   Pgno nFreeList;            /* Number of pages remaining on the free-list. */
       
  2348   int nPtrMap;               /* Number of pointer-map pages deallocated */
       
  2349   Pgno origSize;             /* Pages in the database file */
       
  2350   Pgno finSize;              /* Pages in the database file after truncation */
       
  2351   int rc;                    /* Return code */
       
  2352   u8 eType;
       
  2353   int pgsz = pBt->pageSize;  /* Page size for this database */
       
  2354   Pgno iDbPage;              /* The database page to move */
       
  2355   MemPage *pDbMemPage = 0;   /* "" */
       
  2356   Pgno iPtrPage;             /* The page that contains a pointer to iDbPage */
       
  2357   Pgno iFreePage;            /* The free-list page to move iDbPage to */
       
  2358   MemPage *pFreeMemPage = 0; /* "" */
       
  2359 
       
  2360 #ifndef NDEBUG
       
  2361   int nRef = sqlite3pager_refcount(pPager);
       
  2362 #endif
       
  2363 
       
  2364   assert( pBt->autoVacuum );
       
  2365   if( PTRMAP_ISPAGE(pBt, sqlite3pager_pagecount(pPager)) ){
       
  2366     return SQLITE_CORRUPT_BKPT;
       
  2367   }
       
  2368 
       
  2369   /* Figure out how many free-pages are in the database. If there are no
       
  2370   ** free pages, then auto-vacuum is a no-op.
       
  2371   */
       
  2372   nFreeList = get4byte(&pBt->pPage1->aData[36]);
       
  2373   if( nFreeList==0 ){
       
  2374     *nTrunc = 0;
       
  2375     return SQLITE_OK;
       
  2376   }
       
  2377 
       
  2378   /* This block figures out how many pages there are in the database
       
  2379   ** now (variable origSize), and how many there will be after the
       
  2380   ** truncation (variable finSize).
       
  2381   **
       
  2382   ** The final size is the original size, less the number of free pages
       
  2383   ** in the database, less any pointer-map pages that will no longer
       
  2384   ** be required, less 1 if the pending-byte page was part of the database
       
  2385   ** but is not after the truncation.
       
  2386   **/
       
  2387   origSize = sqlite3pager_pagecount(pPager);
       
  2388   if( origSize==PENDING_BYTE_PAGE(pBt) ){
       
  2389     origSize--;
       
  2390   }
       
  2391   nPtrMap = (nFreeList-origSize+PTRMAP_PAGENO(pBt, origSize)+pgsz/5)/(pgsz/5);
       
  2392   finSize = origSize - nFreeList - nPtrMap;
       
  2393   if( origSize>PENDING_BYTE_PAGE(pBt) && finSize<=PENDING_BYTE_PAGE(pBt) ){
       
  2394     finSize--;
       
  2395   }
       
  2396   while( PTRMAP_ISPAGE(pBt, finSize) || finSize==PENDING_BYTE_PAGE(pBt) ){
       
  2397     finSize--;
       
  2398   }
       
  2399   TRACE(("AUTOVACUUM: Begin (db size %d->%d)\n", origSize, finSize));
       
  2400 
       
  2401   /* Variable 'finSize' will be the size of the file in pages after
       
  2402   ** the auto-vacuum has completed (the current file size minus the number
       
  2403   ** of pages on the free list). Loop through the pages that lie beyond
       
  2404   ** this mark, and if they are not already on the free list, move them
       
  2405   ** to a free page earlier in the file (somewhere before finSize).
       
  2406   */
       
  2407   for( iDbPage=finSize+1; iDbPage<=origSize; iDbPage++ ){
       
  2408     /* If iDbPage is a pointer map page, or the pending-byte page, skip it. */
       
  2409     if( PTRMAP_ISPAGE(pBt, iDbPage) || iDbPage==PENDING_BYTE_PAGE(pBt) ){
       
  2410       continue;
       
  2411     }
       
  2412 
       
  2413     rc = ptrmapGet(pBt, iDbPage, &eType, &iPtrPage);
       
  2414     if( rc!=SQLITE_OK ) goto autovacuum_out;
       
  2415     if( eType==PTRMAP_ROOTPAGE ){
       
  2416       rc = SQLITE_CORRUPT_BKPT;
       
  2417       goto autovacuum_out;
       
  2418     }
       
  2419 
       
  2420     /* If iDbPage is free, do not swap it.  */
       
  2421     if( eType==PTRMAP_FREEPAGE ){
       
  2422       continue;
       
  2423     }
       
  2424     rc = getPage(pBt, iDbPage, &pDbMemPage);
       
  2425     if( rc!=SQLITE_OK ) goto autovacuum_out;
       
  2426 
       
  2427     /* Find the next page in the free-list that is not already at the end 
       
  2428     ** of the file. A page can be pulled off the free list using the 
       
  2429     ** allocatePage() routine.
       
  2430     */
       
  2431     do{
       
  2432       if( pFreeMemPage ){
       
  2433         releasePage(pFreeMemPage);
       
  2434         pFreeMemPage = 0;
       
  2435       }
       
  2436       rc = allocatePage(pBt, &pFreeMemPage, &iFreePage, 0, 0);
       
  2437       if( rc!=SQLITE_OK ){
       
  2438         releasePage(pDbMemPage);
       
  2439         goto autovacuum_out;
       
  2440       }
       
  2441       assert( iFreePage<=origSize );
       
  2442     }while( iFreePage>finSize );
       
  2443     releasePage(pFreeMemPage);
       
  2444     pFreeMemPage = 0;
       
  2445 
       
  2446     /* Relocate the page into the body of the file. Note that although the 
       
  2447     ** page has moved within the database file, the pDbMemPage pointer 
       
  2448     ** remains valid. This means that this function can run without
       
  2449     ** invalidating cursors open on the btree. This is important in 
       
  2450     ** shared-cache mode.
       
  2451     */
       
  2452     rc = relocatePage(pBt, pDbMemPage, eType, iPtrPage, iFreePage);
       
  2453     releasePage(pDbMemPage);
       
  2454     if( rc!=SQLITE_OK ) goto autovacuum_out;
       
  2455   }
       
  2456 
       
  2457   /* The entire free-list has been swapped to the end of the file. So
       
  2458   ** truncate the database file to finSize pages and consider the
       
  2459   ** free-list empty.
       
  2460   */
       
  2461   rc = sqlite3pager_write(pBt->pPage1->aData);
       
  2462   if( rc!=SQLITE_OK ) goto autovacuum_out;
       
  2463   put4byte(&pBt->pPage1->aData[32], 0);
       
  2464   put4byte(&pBt->pPage1->aData[36], 0);
       
  2465   *nTrunc = finSize;
       
  2466   assert( finSize!=PENDING_BYTE_PAGE(pBt) );
       
  2467 
       
  2468 autovacuum_out:
       
  2469   assert( nRef==sqlite3pager_refcount(pPager) );
       
  2470   if( rc!=SQLITE_OK ){
       
  2471     sqlite3pager_rollback(pPager);
       
  2472   }
       
  2473   return rc;
       
  2474 }
       
  2475 #endif
       
  2476 
       
  2477 /*
       
  2478 ** Commit the transaction currently in progress.
       
  2479 **
       
  2480 ** This will release the write lock on the database file.  If there
       
  2481 ** are no active cursors, it also releases the read lock.
       
  2482 */
       
  2483 int sqlite3BtreeCommit(Btree *p){
       
  2484   BtShared *pBt = p->pBt;
       
  2485 
       
  2486   btreeIntegrity(p);
       
  2487 
       
  2488   /* If the handle has a write-transaction open, commit the shared-btrees 
       
  2489   ** transaction and set the shared state to TRANS_READ.
       
  2490   */
       
  2491   if( p->inTrans==TRANS_WRITE ){
       
  2492     int rc;
       
  2493     assert( pBt->inTransaction==TRANS_WRITE );
       
  2494     assert( pBt->nTransaction>0 );
       
  2495     rc = sqlite3pager_commit(pBt->pPager);
       
  2496     if( rc!=SQLITE_OK ){
       
  2497       return rc;
       
  2498     }
       
  2499     pBt->inTransaction = TRANS_READ;
       
  2500     pBt->inStmt = 0;
       
  2501   }
       
  2502   unlockAllTables(p);
       
  2503 
       
  2504   /* If the handle has any kind of transaction open, decrement the transaction
       
  2505   ** count of the shared btree. If the transaction count reaches 0, set
       
  2506   ** the shared state to TRANS_NONE. The unlockBtreeIfUnused() call below
       
  2507   ** will unlock the pager.
       
  2508   */
       
  2509   if( p->inTrans!=TRANS_NONE ){
       
  2510     pBt->nTransaction--;
       
  2511     if( 0==pBt->nTransaction ){
       
  2512       pBt->inTransaction = TRANS_NONE;
       
  2513     }
       
  2514   }
       
  2515 
       
  2516   /* Set the handles current transaction state to TRANS_NONE and unlock
       
  2517   ** the pager if this call closed the only read or write transaction.
       
  2518   */
       
  2519   p->inTrans = TRANS_NONE;
       
  2520   unlockBtreeIfUnused(pBt);
       
  2521 
       
  2522   btreeIntegrity(p);
       
  2523   return SQLITE_OK;
       
  2524 }
       
  2525 
       
  2526 #ifndef NDEBUG
       
  2527 /*
       
  2528 ** Return the number of write-cursors open on this handle. This is for use
       
  2529 ** in assert() expressions, so it is only compiled if NDEBUG is not
       
  2530 ** defined.
       
  2531 */
       
  2532 static int countWriteCursors(BtShared *pBt){
       
  2533   BtCursor *pCur;
       
  2534   int r = 0;
       
  2535   for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
       
  2536     if( pCur->wrFlag ) r++; 
       
  2537   }
       
  2538   return r;
       
  2539 }
       
  2540 #endif
       
  2541 
       
  2542 #if defined(SQLITE_TEST) || defined(SQLITE_DEBUG)
       
  2543 /*
       
  2544 ** Print debugging information about all cursors to standard output.
       
  2545 */
       
  2546 void sqlite3BtreeCursorList(Btree *p){
       
  2547   BtCursor *pCur;
       
  2548   BtShared *pBt = p->pBt;
       
  2549   for(pCur=pBt->pCursor; pCur; pCur=pCur->pNext){
       
  2550     MemPage *pPage = pCur->pPage;
       
  2551     char *zMode = pCur->wrFlag ? "rw" : "ro";
       
  2552     sqlite3DebugPrintf("CURSOR %p rooted at %4d(%s) currently at %d.%d%s\n",
       
  2553        pCur, pCur->pgnoRoot, zMode,
       
  2554        pPage ? pPage->pgno : 0, pCur->idx,
       
  2555        (pCur->eState==CURSOR_VALID) ? "" : " eof"
       
  2556     );
       
  2557   }
       
  2558 }
       
  2559 #endif
       
  2560 
       
  2561 /*
       
  2562 ** Rollback the transaction in progress.  All cursors will be
       
  2563 ** invalided by this operation.  Any attempt to use a cursor
       
  2564 ** that was open at the beginning of this operation will result
       
  2565 ** in an error.
       
  2566 **
       
  2567 ** This will release the write lock on the database file.  If there
       
  2568 ** are no active cursors, it also releases the read lock.
       
  2569 */
       
  2570 int sqlite3BtreeRollback(Btree *p){
       
  2571   int rc;
       
  2572   BtShared *pBt = p->pBt;
       
  2573   MemPage *pPage1;
       
  2574 
       
  2575   rc = saveAllCursors(pBt, 0, 0);
       
  2576 #ifndef SQLITE_OMIT_SHARED_CACHE
       
  2577   if( rc!=SQLITE_OK ){
       
  2578     /* This is a horrible situation. An IO or malloc() error occured whilst
       
  2579     ** trying to save cursor positions. If this is an automatic rollback (as
       
  2580     ** the result of a constraint, malloc() failure or IO error) then 
       
  2581     ** the cache may be internally inconsistent (not contain valid trees) so
       
  2582     ** we cannot simply return the error to the caller. Instead, abort 
       
  2583     ** all queries that may be using any of the cursors that failed to save.
       
  2584     */
       
  2585     while( pBt->pCursor ){
       
  2586       sqlite3 *db = pBt->pCursor->pBtree->pSqlite;
       
  2587       if( db ){
       
  2588         sqlite3AbortOtherActiveVdbes(db, 0);
       
  2589       }
       
  2590     }
       
  2591   }
       
  2592 #endif
       
  2593   btreeIntegrity(p);
       
  2594   unlockAllTables(p);
       
  2595 
       
  2596   if( p->inTrans==TRANS_WRITE ){
       
  2597     int rc2;
       
  2598 
       
  2599     assert( TRANS_WRITE==pBt->inTransaction );
       
  2600     rc2 = sqlite3pager_rollback(pBt->pPager);
       
  2601     if( rc2!=SQLITE_OK ){
       
  2602       rc = rc2;
       
  2603     }
       
  2604 
       
  2605     /* The rollback may have destroyed the pPage1->aData value.  So
       
  2606     ** call getPage() on page 1 again to make sure pPage1->aData is
       
  2607     ** set correctly. */
       
  2608     if( getPage(pBt, 1, &pPage1)==SQLITE_OK ){
       
  2609       releasePage(pPage1);
       
  2610     }
       
  2611     assert( countWriteCursors(pBt)==0 );
       
  2612     pBt->inTransaction = TRANS_READ;
       
  2613   }
       
  2614 
       
  2615   if( p->inTrans!=TRANS_NONE ){
       
  2616     assert( pBt->nTransaction>0 );
       
  2617     pBt->nTransaction--;
       
  2618     if( 0==pBt->nTransaction ){
       
  2619       pBt->inTransaction = TRANS_NONE;
       
  2620     }
       
  2621   }
       
  2622 
       
  2623   p->inTrans = TRANS_NONE;
       
  2624   pBt->inStmt = 0;
       
  2625   unlockBtreeIfUnused(pBt);
       
  2626 
       
  2627   btreeIntegrity(p);
       
  2628   return rc;
       
  2629 }
       
  2630 
       
  2631 /*
       
  2632 ** Start a statement subtransaction.  The subtransaction can
       
  2633 ** can be rolled back independently of the main transaction.
       
  2634 ** You must start a transaction before starting a subtransaction.
       
  2635 ** The subtransaction is ended automatically if the main transaction
       
  2636 ** commits or rolls back.
       
  2637 **
       
  2638 ** Only one subtransaction may be active at a time.  It is an error to try
       
  2639 ** to start a new subtransaction if another subtransaction is already active.
       
  2640 **
       
  2641 ** Statement subtransactions are used around individual SQL statements
       
  2642 ** that are contained within a BEGIN...COMMIT block.  If a constraint
       
  2643 ** error occurs within the statement, the effect of that one statement
       
  2644 ** can be rolled back without having to rollback the entire transaction.
       
  2645 */
       
  2646 int sqlite3BtreeBeginStmt(Btree *p){
       
  2647   int rc;
       
  2648   BtShared *pBt = p->pBt;
       
  2649   if( (p->inTrans!=TRANS_WRITE) || pBt->inStmt ){
       
  2650     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
       
  2651   }
       
  2652   assert( pBt->inTransaction==TRANS_WRITE );
       
  2653   rc = pBt->readOnly ? SQLITE_OK : sqlite3pager_stmt_begin(pBt->pPager);
       
  2654   pBt->inStmt = 1;
       
  2655   return rc;
       
  2656 }
       
  2657 
       
  2658 
       
  2659 /*
       
  2660 ** Commit the statment subtransaction currently in progress.  If no
       
  2661 ** subtransaction is active, this is a no-op.
       
  2662 */
       
  2663 int sqlite3BtreeCommitStmt(Btree *p){
       
  2664   int rc;
       
  2665   BtShared *pBt = p->pBt;
       
  2666   if( pBt->inStmt && !pBt->readOnly ){
       
  2667     rc = sqlite3pager_stmt_commit(pBt->pPager);
       
  2668   }else{
       
  2669     rc = SQLITE_OK;
       
  2670   }
       
  2671   pBt->inStmt = 0;
       
  2672   return rc;
       
  2673 }
       
  2674 
       
  2675 /*
       
  2676 ** Rollback the active statement subtransaction.  If no subtransaction
       
  2677 ** is active this routine is a no-op.
       
  2678 **
       
  2679 ** All cursors will be invalidated by this operation.  Any attempt
       
  2680 ** to use a cursor that was open at the beginning of this operation
       
  2681 ** will result in an error.
       
  2682 */
       
  2683 int sqlite3BtreeRollbackStmt(Btree *p){
       
  2684   int rc = SQLITE_OK;
       
  2685   BtShared *pBt = p->pBt;
       
  2686   sqlite3MallocDisallow();
       
  2687   if( pBt->inStmt && !pBt->readOnly ){
       
  2688     rc = sqlite3pager_stmt_rollback(pBt->pPager);
       
  2689     assert( countWriteCursors(pBt)==0 );
       
  2690     pBt->inStmt = 0;
       
  2691   }
       
  2692   sqlite3MallocAllow();
       
  2693   return rc;
       
  2694 }
       
  2695 
       
  2696 /*
       
  2697 ** Default key comparison function to be used if no comparison function
       
  2698 ** is specified on the sqlite3BtreeCursor() call.
       
  2699 */
       
  2700 static int dfltCompare(
       
  2701   void *NotUsed,             /* User data is not used */
       
  2702   int n1, const void *p1,    /* First key to compare */
       
  2703   int n2, const void *p2     /* Second key to compare */
       
  2704 ){
       
  2705   int c;
       
  2706   c = memcmp(p1, p2, n1<n2 ? n1 : n2);
       
  2707   if( c==0 ){
       
  2708     c = n1 - n2;
       
  2709   }
       
  2710   return c;
       
  2711 }
       
  2712 
       
  2713 /*
       
  2714 ** Create a new cursor for the BTree whose root is on the page
       
  2715 ** iTable.  The act of acquiring a cursor gets a read lock on 
       
  2716 ** the database file.
       
  2717 **
       
  2718 ** If wrFlag==0, then the cursor can only be used for reading.
       
  2719 ** If wrFlag==1, then the cursor can be used for reading or for
       
  2720 ** writing if other conditions for writing are also met.  These
       
  2721 ** are the conditions that must be met in order for writing to
       
  2722 ** be allowed:
       
  2723 **
       
  2724 ** 1:  The cursor must have been opened with wrFlag==1
       
  2725 **
       
  2726 ** 2:  No other cursors may be open with wrFlag==0 on the same table
       
  2727 **
       
  2728 ** 3:  The database must be writable (not on read-only media)
       
  2729 **
       
  2730 ** 4:  There must be an active transaction.
       
  2731 **
       
  2732 ** Condition 2 warrants further discussion.  If any cursor is opened
       
  2733 ** on a table with wrFlag==0, that prevents all other cursors from
       
  2734 ** writing to that table.  This is a kind of "read-lock".  When a cursor
       
  2735 ** is opened with wrFlag==0 it is guaranteed that the table will not
       
  2736 ** change as long as the cursor is open.  This allows the cursor to
       
  2737 ** do a sequential scan of the table without having to worry about
       
  2738 ** entries being inserted or deleted during the scan.  Cursors should
       
  2739 ** be opened with wrFlag==0 only if this read-lock property is needed.
       
  2740 ** That is to say, cursors should be opened with wrFlag==0 only if they
       
  2741 ** intend to use the sqlite3BtreeNext() system call.  All other cursors
       
  2742 ** should be opened with wrFlag==1 even if they never really intend
       
  2743 ** to write.
       
  2744 ** 
       
  2745 ** No checking is done to make sure that page iTable really is the
       
  2746 ** root page of a b-tree.  If it is not, then the cursor acquired
       
  2747 ** will not work correctly.
       
  2748 **
       
  2749 ** The comparison function must be logically the same for every cursor
       
  2750 ** on a particular table.  Changing the comparison function will result
       
  2751 ** in incorrect operations.  If the comparison function is NULL, a
       
  2752 ** default comparison function is used.  The comparison function is
       
  2753 ** always ignored for INTKEY tables.
       
  2754 */
       
  2755 int sqlite3BtreeCursor(
       
  2756   Btree *p,                                   /* The btree */
       
  2757   int iTable,                                 /* Root page of table to open */
       
  2758   int wrFlag,                                 /* 1 to write. 0 read-only */
       
  2759   int (*xCmp)(void*,int,const void*,int,const void*), /* Key Comparison func */
       
  2760   void *pArg,                                 /* First arg to xCompare() */
       
  2761   BtCursor **ppCur                            /* Write new cursor here */
       
  2762 ){
       
  2763   int rc;
       
  2764   BtCursor *pCur;
       
  2765   BtShared *pBt = p->pBt;
       
  2766 
       
  2767   *ppCur = 0;
       
  2768   if( wrFlag ){
       
  2769     if( pBt->readOnly ){
       
  2770       return SQLITE_READONLY;
       
  2771     }
       
  2772     if( checkReadLocks(p, iTable, 0) ){
       
  2773       return SQLITE_LOCKED;
       
  2774     }
       
  2775   }
       
  2776 
       
  2777   if( pBt->pPage1==0 ){
       
  2778     rc = lockBtreeWithRetry(p);
       
  2779     if( rc!=SQLITE_OK ){
       
  2780       return rc;
       
  2781     }
       
  2782   }
       
  2783   pCur = sqliteMalloc( sizeof(*pCur) );
       
  2784   if( pCur==0 ){
       
  2785     rc = SQLITE_NOMEM;
       
  2786     goto create_cursor_exception;
       
  2787   }
       
  2788   pCur->pgnoRoot = (Pgno)iTable;
       
  2789   if( iTable==1 && sqlite3pager_pagecount(pBt->pPager)==0 ){
       
  2790     rc = SQLITE_EMPTY;
       
  2791     goto create_cursor_exception;
       
  2792   }
       
  2793   rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->pPage, 0);
       
  2794   if( rc!=SQLITE_OK ){
       
  2795     goto create_cursor_exception;
       
  2796   }
       
  2797 
       
  2798   /* Now that no other errors can occur, finish filling in the BtCursor
       
  2799   ** variables, link the cursor into the BtShared list and set *ppCur (the
       
  2800   ** output argument to this function).
       
  2801   */
       
  2802   pCur->xCompare = xCmp ? xCmp : dfltCompare;
       
  2803   pCur->pArg = pArg;
       
  2804   pCur->pBtree = p;
       
  2805   pCur->wrFlag = wrFlag;
       
  2806   pCur->pNext = pBt->pCursor;
       
  2807   if( pCur->pNext ){
       
  2808     pCur->pNext->pPrev = pCur;
       
  2809   }
       
  2810   pBt->pCursor = pCur;
       
  2811   pCur->eState = CURSOR_INVALID;
       
  2812   *ppCur = pCur;
       
  2813 
       
  2814   return SQLITE_OK;
       
  2815 create_cursor_exception:
       
  2816   if( pCur ){
       
  2817     releasePage(pCur->pPage);
       
  2818     sqliteFree(pCur);
       
  2819   }
       
  2820   unlockBtreeIfUnused(pBt);
       
  2821   return rc;
       
  2822 }
       
  2823 
       
  2824 #if 0  /* Not Used */
       
  2825 /*
       
  2826 ** Change the value of the comparison function used by a cursor.
       
  2827 */
       
  2828 void sqlite3BtreeSetCompare(
       
  2829   BtCursor *pCur,     /* The cursor to whose comparison function is changed */
       
  2830   int(*xCmp)(void*,int,const void*,int,const void*), /* New comparison func */
       
  2831   void *pArg          /* First argument to xCmp() */
       
  2832 ){
       
  2833   pCur->xCompare = xCmp ? xCmp : dfltCompare;
       
  2834   pCur->pArg = pArg;
       
  2835 }
       
  2836 #endif
       
  2837 
       
  2838 /*
       
  2839 ** Close a cursor.  The read lock on the database file is released
       
  2840 ** when the last cursor is closed.
       
  2841 */
       
  2842 int sqlite3BtreeCloseCursor(BtCursor *pCur){
       
  2843   BtShared *pBt = pCur->pBtree->pBt;
       
  2844   restoreOrClearCursorPosition(pCur, 0);
       
  2845   if( pCur->pPrev ){
       
  2846     pCur->pPrev->pNext = pCur->pNext;
       
  2847   }else{
       
  2848     pBt->pCursor = pCur->pNext;
       
  2849   }
       
  2850   if( pCur->pNext ){
       
  2851     pCur->pNext->pPrev = pCur->pPrev;
       
  2852   }
       
  2853   releasePage(pCur->pPage);
       
  2854   unlockBtreeIfUnused(pBt);
       
  2855   sqliteFree(pCur);
       
  2856   return SQLITE_OK;
       
  2857 }
       
  2858 
       
  2859 /*
       
  2860 ** Make a temporary cursor by filling in the fields of pTempCur.
       
  2861 ** The temporary cursor is not on the cursor list for the Btree.
       
  2862 */
       
  2863 static void getTempCursor(BtCursor *pCur, BtCursor *pTempCur){
       
  2864   memcpy(pTempCur, pCur, sizeof(*pCur));
       
  2865   pTempCur->pNext = 0;
       
  2866   pTempCur->pPrev = 0;
       
  2867   if( pTempCur->pPage ){
       
  2868     sqlite3pager_ref(pTempCur->pPage->aData);
       
  2869   }
       
  2870 }
       
  2871 
       
  2872 /*
       
  2873 ** Delete a temporary cursor such as was made by the CreateTemporaryCursor()
       
  2874 ** function above.
       
  2875 */
       
  2876 static void releaseTempCursor(BtCursor *pCur){
       
  2877   if( pCur->pPage ){
       
  2878     sqlite3pager_unref(pCur->pPage->aData);
       
  2879   }
       
  2880 }
       
  2881 
       
  2882 /*
       
  2883 ** Make sure the BtCursor.info field of the given cursor is valid.
       
  2884 ** If it is not already valid, call parseCell() to fill it in.
       
  2885 **
       
  2886 ** BtCursor.info is a cache of the information in the current cell.
       
  2887 ** Using this cache reduces the number of calls to parseCell().
       
  2888 */
       
  2889 static void getCellInfo(BtCursor *pCur){
       
  2890   if( pCur->info.nSize==0 ){
       
  2891     parseCell(pCur->pPage, pCur->idx, &pCur->info);
       
  2892   }else{
       
  2893 #ifndef NDEBUG
       
  2894     CellInfo info;
       
  2895     memset(&info, 0, sizeof(info));
       
  2896     parseCell(pCur->pPage, pCur->idx, &info);
       
  2897     assert( memcmp(&info, &pCur->info, sizeof(info))==0 );
       
  2898 #endif
       
  2899   }
       
  2900 }
       
  2901 
       
  2902 /*
       
  2903 ** Set *pSize to the size of the buffer needed to hold the value of
       
  2904 ** the key for the current entry.  If the cursor is not pointing
       
  2905 ** to a valid entry, *pSize is set to 0. 
       
  2906 **
       
  2907 ** For a table with the INTKEY flag set, this routine returns the key
       
  2908 ** itself, not the number of bytes in the key.
       
  2909 */
       
  2910 int sqlite3BtreeKeySize(BtCursor *pCur, i64 *pSize){
       
  2911   int rc = restoreOrClearCursorPosition(pCur, 1);
       
  2912   if( rc==SQLITE_OK ){
       
  2913     assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID );
       
  2914     if( pCur->eState==CURSOR_INVALID ){
       
  2915       *pSize = 0;
       
  2916     }else{
       
  2917       getCellInfo(pCur);
       
  2918       *pSize = pCur->info.nKey;
       
  2919     }
       
  2920   }
       
  2921   return rc;
       
  2922 }
       
  2923 
       
  2924 /*
       
  2925 ** Set *pSize to the number of bytes of data in the entry the
       
  2926 ** cursor currently points to.  Always return SQLITE_OK.
       
  2927 ** Failure is not possible.  If the cursor is not currently
       
  2928 ** pointing to an entry (which can happen, for example, if
       
  2929 ** the database is empty) then *pSize is set to 0.
       
  2930 */
       
  2931 int sqlite3BtreeDataSize(BtCursor *pCur, u32 *pSize){
       
  2932   int rc = restoreOrClearCursorPosition(pCur, 1);
       
  2933   if( rc==SQLITE_OK ){
       
  2934     assert( pCur->eState==CURSOR_INVALID || pCur->eState==CURSOR_VALID );
       
  2935     if( pCur->eState==CURSOR_INVALID ){
       
  2936       /* Not pointing at a valid entry - set *pSize to 0. */
       
  2937       *pSize = 0;
       
  2938     }else{
       
  2939       getCellInfo(pCur);
       
  2940       *pSize = pCur->info.nData;
       
  2941     }
       
  2942   }
       
  2943   return rc;
       
  2944 }
       
  2945 
       
  2946 /*
       
  2947 ** Read payload information from the entry that the pCur cursor is
       
  2948 ** pointing to.  Begin reading the payload at "offset" and read
       
  2949 ** a total of "amt" bytes.  Put the result in zBuf.
       
  2950 **
       
  2951 ** This routine does not make a distinction between key and data.
       
  2952 ** It just reads bytes from the payload area.  Data might appear
       
  2953 ** on the main page or be scattered out on multiple overflow pages.
       
  2954 */
       
  2955 static int getPayload(
       
  2956   BtCursor *pCur,      /* Cursor pointing to entry to read from */
       
  2957   int offset,          /* Begin reading this far into payload */
       
  2958   int amt,             /* Read this many bytes */
       
  2959   unsigned char *pBuf, /* Write the bytes into this buffer */ 
       
  2960   int skipKey          /* offset begins at data if this is true */
       
  2961 ){
       
  2962   unsigned char *aPayload;
       
  2963   Pgno nextPage;
       
  2964   int rc;
       
  2965   MemPage *pPage;
       
  2966   BtShared *pBt;
       
  2967   int ovflSize;
       
  2968   u32 nKey;
       
  2969 
       
  2970   assert( pCur!=0 && pCur->pPage!=0 );
       
  2971   assert( pCur->eState==CURSOR_VALID );
       
  2972   pBt = pCur->pBtree->pBt;
       
  2973   pPage = pCur->pPage;
       
  2974   pageIntegrity(pPage);
       
  2975   assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
       
  2976   getCellInfo(pCur);
       
  2977   aPayload = pCur->info.pCell + pCur->info.nHeader;
       
  2978   if( pPage->intKey ){
       
  2979     nKey = 0;
       
  2980   }else{
       
  2981     nKey = pCur->info.nKey;
       
  2982   }
       
  2983   assert( offset>=0 );
       
  2984   if( skipKey ){
       
  2985     offset += nKey;
       
  2986   }
       
  2987   if( offset+amt > nKey+pCur->info.nData ){
       
  2988     return SQLITE_ERROR;
       
  2989   }
       
  2990   if( offset<pCur->info.nLocal ){
       
  2991     int a = amt;
       
  2992     if( a+offset>pCur->info.nLocal ){
       
  2993       a = pCur->info.nLocal - offset;
       
  2994     }
       
  2995     memcpy(pBuf, &aPayload[offset], a);
       
  2996     if( a==amt ){
       
  2997       return SQLITE_OK;
       
  2998     }
       
  2999     offset = 0;
       
  3000     pBuf += a;
       
  3001     amt -= a;
       
  3002   }else{
       
  3003     offset -= pCur->info.nLocal;
       
  3004   }
       
  3005   ovflSize = pBt->usableSize - 4;
       
  3006   if( amt>0 ){
       
  3007     nextPage = get4byte(&aPayload[pCur->info.nLocal]);
       
  3008     while( amt>0 && nextPage ){
       
  3009       rc = sqlite3pager_get(pBt->pPager, nextPage, (void**)&aPayload);
       
  3010       if( rc!=0 ){
       
  3011         return rc;
       
  3012       }
       
  3013       nextPage = get4byte(aPayload);
       
  3014       if( offset<ovflSize ){
       
  3015         int a = amt;
       
  3016         if( a + offset > ovflSize ){
       
  3017           a = ovflSize - offset;
       
  3018         }
       
  3019         memcpy(pBuf, &aPayload[offset+4], a);
       
  3020         offset = 0;
       
  3021         amt -= a;
       
  3022         pBuf += a;
       
  3023       }else{
       
  3024         offset -= ovflSize;
       
  3025       }
       
  3026       sqlite3pager_unref(aPayload);
       
  3027     }
       
  3028   }
       
  3029 
       
  3030   if( amt>0 ){
       
  3031     return SQLITE_CORRUPT_BKPT;
       
  3032   }
       
  3033   return SQLITE_OK;
       
  3034 }
       
  3035 
       
  3036 /*
       
  3037 ** Read part of the key associated with cursor pCur.  Exactly
       
  3038 ** "amt" bytes will be transfered into pBuf[].  The transfer
       
  3039 ** begins at "offset".
       
  3040 **
       
  3041 ** Return SQLITE_OK on success or an error code if anything goes
       
  3042 ** wrong.  An error is returned if "offset+amt" is larger than
       
  3043 ** the available payload.
       
  3044 */
       
  3045 int sqlite3BtreeKey(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
       
  3046   int rc = restoreOrClearCursorPosition(pCur, 1);
       
  3047   if( rc==SQLITE_OK ){
       
  3048     assert( pCur->eState==CURSOR_VALID );
       
  3049     assert( pCur->pPage!=0 );
       
  3050     if( pCur->pPage->intKey ){
       
  3051       return SQLITE_CORRUPT_BKPT;
       
  3052     }
       
  3053     assert( pCur->pPage->intKey==0 );
       
  3054     assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
       
  3055     rc = getPayload(pCur, offset, amt, (unsigned char*)pBuf, 0);
       
  3056   }
       
  3057   return rc;
       
  3058 }
       
  3059 
       
  3060 /*
       
  3061 ** Read part of the data associated with cursor pCur.  Exactly
       
  3062 ** "amt" bytes will be transfered into pBuf[].  The transfer
       
  3063 ** begins at "offset".
       
  3064 **
       
  3065 ** Return SQLITE_OK on success or an error code if anything goes
       
  3066 ** wrong.  An error is returned if "offset+amt" is larger than
       
  3067 ** the available payload.
       
  3068 */
       
  3069 int sqlite3BtreeData(BtCursor *pCur, u32 offset, u32 amt, void *pBuf){
       
  3070   int rc = restoreOrClearCursorPosition(pCur, 1);
       
  3071   if( rc==SQLITE_OK ){
       
  3072     assert( pCur->eState==CURSOR_VALID );
       
  3073     assert( pCur->pPage!=0 );
       
  3074     assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
       
  3075     rc = getPayload(pCur, offset, amt, pBuf, 1);
       
  3076   }
       
  3077   return rc;
       
  3078 }
       
  3079 
       
  3080 /*
       
  3081 ** Return a pointer to payload information from the entry that the 
       
  3082 ** pCur cursor is pointing to.  The pointer is to the beginning of
       
  3083 ** the key if skipKey==0 and it points to the beginning of data if
       
  3084 ** skipKey==1.  The number of bytes of available key/data is written
       
  3085 ** into *pAmt.  If *pAmt==0, then the value returned will not be
       
  3086 ** a valid pointer.
       
  3087 **
       
  3088 ** This routine is an optimization.  It is common for the entire key
       
  3089 ** and data to fit on the local page and for there to be no overflow
       
  3090 ** pages.  When that is so, this routine can be used to access the
       
  3091 ** key and data without making a copy.  If the key and/or data spills
       
  3092 ** onto overflow pages, then getPayload() must be used to reassembly
       
  3093 ** the key/data and copy it into a preallocated buffer.
       
  3094 **
       
  3095 ** The pointer returned by this routine looks directly into the cached
       
  3096 ** page of the database.  The data might change or move the next time
       
  3097 ** any btree routine is called.
       
  3098 */
       
  3099 static const unsigned char *fetchPayload(
       
  3100   BtCursor *pCur,      /* Cursor pointing to entry to read from */
       
  3101   int *pAmt,           /* Write the number of available bytes here */
       
  3102   int skipKey          /* read beginning at data if this is true */
       
  3103 ){
       
  3104   unsigned char *aPayload;
       
  3105   MemPage *pPage;
       
  3106   u32 nKey;
       
  3107   int nLocal;
       
  3108 
       
  3109   assert( pCur!=0 && pCur->pPage!=0 );
       
  3110   assert( pCur->eState==CURSOR_VALID );
       
  3111   pPage = pCur->pPage;
       
  3112   pageIntegrity(pPage);
       
  3113   assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
       
  3114   getCellInfo(pCur);
       
  3115   aPayload = pCur->info.pCell;
       
  3116   aPayload += pCur->info.nHeader;
       
  3117   if( pPage->intKey ){
       
  3118     nKey = 0;
       
  3119   }else{
       
  3120     nKey = pCur->info.nKey;
       
  3121   }
       
  3122   if( skipKey ){
       
  3123     aPayload += nKey;
       
  3124     nLocal = pCur->info.nLocal - nKey;
       
  3125   }else{
       
  3126     nLocal = pCur->info.nLocal;
       
  3127     if( nLocal>nKey ){
       
  3128       nLocal = nKey;
       
  3129     }
       
  3130   }
       
  3131   *pAmt = nLocal;
       
  3132   return aPayload;
       
  3133 }
       
  3134 
       
  3135 
       
  3136 /*
       
  3137 ** For the entry that cursor pCur is point to, return as
       
  3138 ** many bytes of the key or data as are available on the local
       
  3139 ** b-tree page.  Write the number of available bytes into *pAmt.
       
  3140 **
       
  3141 ** The pointer returned is ephemeral.  The key/data may move
       
  3142 ** or be destroyed on the next call to any Btree routine.
       
  3143 **
       
  3144 ** These routines is used to get quick access to key and data
       
  3145 ** in the common case where no overflow pages are used.
       
  3146 */
       
  3147 const void *sqlite3BtreeKeyFetch(BtCursor *pCur, int *pAmt){
       
  3148   if( pCur->eState==CURSOR_VALID ){
       
  3149     return (const void*)fetchPayload(pCur, pAmt, 0);
       
  3150   }
       
  3151   return 0;
       
  3152 }
       
  3153 const void *sqlite3BtreeDataFetch(BtCursor *pCur, int *pAmt){
       
  3154   if( pCur->eState==CURSOR_VALID ){
       
  3155     return (const void*)fetchPayload(pCur, pAmt, 1);
       
  3156   }
       
  3157   return 0;
       
  3158 }
       
  3159 
       
  3160 
       
  3161 /*
       
  3162 ** Move the cursor down to a new child page.  The newPgno argument is the
       
  3163 ** page number of the child page to move to.
       
  3164 */
       
  3165 static int moveToChild(BtCursor *pCur, u32 newPgno){
       
  3166   int rc;
       
  3167   MemPage *pNewPage;
       
  3168   MemPage *pOldPage;
       
  3169   BtShared *pBt = pCur->pBtree->pBt;
       
  3170 
       
  3171   assert( pCur->eState==CURSOR_VALID );
       
  3172   rc = getAndInitPage(pBt, newPgno, &pNewPage, pCur->pPage);
       
  3173   if( rc ) return rc;
       
  3174   pageIntegrity(pNewPage);
       
  3175   pNewPage->idxParent = pCur->idx;
       
  3176   pOldPage = pCur->pPage;
       
  3177   pOldPage->idxShift = 0;
       
  3178   releasePage(pOldPage);
       
  3179   pCur->pPage = pNewPage;
       
  3180   pCur->idx = 0;
       
  3181   pCur->info.nSize = 0;
       
  3182   if( pNewPage->nCell<1 ){
       
  3183     return SQLITE_CORRUPT_BKPT;
       
  3184   }
       
  3185   return SQLITE_OK;
       
  3186 }
       
  3187 
       
  3188 /*
       
  3189 ** Return true if the page is the virtual root of its table.
       
  3190 **
       
  3191 ** The virtual root page is the root page for most tables.  But
       
  3192 ** for the table rooted on page 1, sometime the real root page
       
  3193 ** is empty except for the right-pointer.  In such cases the
       
  3194 ** virtual root page is the page that the right-pointer of page
       
  3195 ** 1 is pointing to.
       
  3196 */
       
  3197 static int isRootPage(MemPage *pPage){
       
  3198   MemPage *pParent = pPage->pParent;
       
  3199   if( pParent==0 ) return 1;
       
  3200   if( pParent->pgno>1 ) return 0;
       
  3201   if( get2byte(&pParent->aData[pParent->hdrOffset+3])==0 ) return 1;
       
  3202   return 0;
       
  3203 }
       
  3204 
       
  3205 /*
       
  3206 ** Move the cursor up to the parent page.
       
  3207 **
       
  3208 ** pCur->idx is set to the cell index that contains the pointer
       
  3209 ** to the page we are coming from.  If we are coming from the
       
  3210 ** right-most child page then pCur->idx is set to one more than
       
  3211 ** the largest cell index.
       
  3212 */
       
  3213 static void moveToParent(BtCursor *pCur){
       
  3214   MemPage *pParent;
       
  3215   MemPage *pPage;
       
  3216   int idxParent;
       
  3217 
       
  3218   assert( pCur->eState==CURSOR_VALID );
       
  3219   pPage = pCur->pPage;
       
  3220   assert( pPage!=0 );
       
  3221   assert( !isRootPage(pPage) );
       
  3222   pageIntegrity(pPage);
       
  3223   pParent = pPage->pParent;
       
  3224   assert( pParent!=0 );
       
  3225   pageIntegrity(pParent);
       
  3226   idxParent = pPage->idxParent;
       
  3227   sqlite3pager_ref(pParent->aData);
       
  3228   releasePage(pPage);
       
  3229   pCur->pPage = pParent;
       
  3230   pCur->info.nSize = 0;
       
  3231   assert( pParent->idxShift==0 );
       
  3232   pCur->idx = idxParent;
       
  3233 }
       
  3234 
       
  3235 /*
       
  3236 ** Move the cursor to the root page
       
  3237 */
       
  3238 static int moveToRoot(BtCursor *pCur){
       
  3239   MemPage *pRoot;
       
  3240   int rc = SQLITE_OK;
       
  3241   BtShared *pBt = pCur->pBtree->pBt;
       
  3242 
       
  3243   restoreOrClearCursorPosition(pCur, 0);
       
  3244   pRoot = pCur->pPage;
       
  3245   if( pRoot && pRoot->pgno==pCur->pgnoRoot ){
       
  3246     assert( pRoot->isInit );
       
  3247   }else{
       
  3248     if( 
       
  3249       SQLITE_OK!=(rc = getAndInitPage(pBt, pCur->pgnoRoot, &pRoot, 0))
       
  3250     ){
       
  3251       pCur->eState = CURSOR_INVALID;
       
  3252       return rc;
       
  3253     }
       
  3254     releasePage(pCur->pPage);
       
  3255     pageIntegrity(pRoot);
       
  3256     pCur->pPage = pRoot;
       
  3257   }
       
  3258   pCur->idx = 0;
       
  3259   pCur->info.nSize = 0;
       
  3260   if( pRoot->nCell==0 && !pRoot->leaf ){
       
  3261     Pgno subpage;
       
  3262     assert( pRoot->pgno==1 );
       
  3263     subpage = get4byte(&pRoot->aData[pRoot->hdrOffset+8]);
       
  3264     assert( subpage>0 );
       
  3265     pCur->eState = CURSOR_VALID;
       
  3266     rc = moveToChild(pCur, subpage);
       
  3267   }
       
  3268   pCur->eState = ((pCur->pPage->nCell>0)?CURSOR_VALID:CURSOR_INVALID);
       
  3269   return rc;
       
  3270 }
       
  3271 
       
  3272 /*
       
  3273 ** Move the cursor down to the left-most leaf entry beneath the
       
  3274 ** entry to which it is currently pointing.
       
  3275 **
       
  3276 ** The left-most leaf is the one with the smallest key - the first
       
  3277 ** in ascending order.
       
  3278 */
       
  3279 static int moveToLeftmost(BtCursor *pCur){
       
  3280   Pgno pgno;
       
  3281   int rc;
       
  3282   MemPage *pPage;
       
  3283 
       
  3284   assert( pCur->eState==CURSOR_VALID );
       
  3285   while( !(pPage = pCur->pPage)->leaf ){
       
  3286     assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
       
  3287     pgno = get4byte(findCell(pPage, pCur->idx));
       
  3288     rc = moveToChild(pCur, pgno);
       
  3289     if( rc ) return rc;
       
  3290   }
       
  3291   return SQLITE_OK;
       
  3292 }
       
  3293 
       
  3294 /*
       
  3295 ** Move the cursor down to the right-most leaf entry beneath the
       
  3296 ** page to which it is currently pointing.  Notice the difference
       
  3297 ** between moveToLeftmost() and moveToRightmost().  moveToLeftmost()
       
  3298 ** finds the left-most entry beneath the *entry* whereas moveToRightmost()
       
  3299 ** finds the right-most entry beneath the *page*.
       
  3300 **
       
  3301 ** The right-most entry is the one with the largest key - the last
       
  3302 ** key in ascending order.
       
  3303 */
       
  3304 static int moveToRightmost(BtCursor *pCur){
       
  3305   Pgno pgno;
       
  3306   int rc;
       
  3307   MemPage *pPage;
       
  3308 
       
  3309   assert( pCur->eState==CURSOR_VALID );
       
  3310   while( !(pPage = pCur->pPage)->leaf ){
       
  3311     pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
       
  3312     pCur->idx = pPage->nCell;
       
  3313     rc = moveToChild(pCur, pgno);
       
  3314     if( rc ) return rc;
       
  3315   }
       
  3316   pCur->idx = pPage->nCell - 1;
       
  3317   pCur->info.nSize = 0;
       
  3318   return SQLITE_OK;
       
  3319 }
       
  3320 
       
  3321 /* Move the cursor to the first entry in the table.  Return SQLITE_OK
       
  3322 ** on success.  Set *pRes to 0 if the cursor actually points to something
       
  3323 ** or set *pRes to 1 if the table is empty.
       
  3324 */
       
  3325 int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
       
  3326   int rc;
       
  3327   rc = moveToRoot(pCur);
       
  3328   if( rc ) return rc;
       
  3329   if( pCur->eState==CURSOR_INVALID ){
       
  3330     assert( pCur->pPage->nCell==0 );
       
  3331     *pRes = 1;
       
  3332     return SQLITE_OK;
       
  3333   }
       
  3334   assert( pCur->pPage->nCell>0 );
       
  3335   *pRes = 0;
       
  3336   rc = moveToLeftmost(pCur);
       
  3337   return rc;
       
  3338 }
       
  3339 
       
  3340 /* Move the cursor to the last entry in the table.  Return SQLITE_OK
       
  3341 ** on success.  Set *pRes to 0 if the cursor actually points to something
       
  3342 ** or set *pRes to 1 if the table is empty.
       
  3343 */
       
  3344 int sqlite3BtreeLast(BtCursor *pCur, int *pRes){
       
  3345   int rc;
       
  3346   rc = moveToRoot(pCur);
       
  3347   if( rc ) return rc;
       
  3348   if( CURSOR_INVALID==pCur->eState ){
       
  3349     assert( pCur->pPage->nCell==0 );
       
  3350     *pRes = 1;
       
  3351     return SQLITE_OK;
       
  3352   }
       
  3353   assert( pCur->eState==CURSOR_VALID );
       
  3354   *pRes = 0;
       
  3355   rc = moveToRightmost(pCur);
       
  3356   return rc;
       
  3357 }
       
  3358 
       
  3359 /* Move the cursor so that it points to an entry near pKey/nKey.
       
  3360 ** Return a success code.
       
  3361 **
       
  3362 ** For INTKEY tables, only the nKey parameter is used.  pKey is
       
  3363 ** ignored.  For other tables, nKey is the number of bytes of data
       
  3364 ** in pKey.  The comparison function specified when the cursor was
       
  3365 ** created is used to compare keys.
       
  3366 **
       
  3367 ** If an exact match is not found, then the cursor is always
       
  3368 ** left pointing at a leaf page which would hold the entry if it
       
  3369 ** were present.  The cursor might point to an entry that comes
       
  3370 ** before or after the key.
       
  3371 **
       
  3372 ** The result of comparing the key with the entry to which the
       
  3373 ** cursor is written to *pRes if pRes!=NULL.  The meaning of
       
  3374 ** this value is as follows:
       
  3375 **
       
  3376 **     *pRes<0      The cursor is left pointing at an entry that
       
  3377 **                  is smaller than pKey or if the table is empty
       
  3378 **                  and the cursor is therefore left point to nothing.
       
  3379 **
       
  3380 **     *pRes==0     The cursor is left pointing at an entry that
       
  3381 **                  exactly matches pKey.
       
  3382 **
       
  3383 **     *pRes>0      The cursor is left pointing at an entry that
       
  3384 **                  is larger than pKey.
       
  3385 */
       
  3386 int sqlite3BtreeMoveto(BtCursor *pCur, const void *pKey, i64 nKey, int *pRes){
       
  3387   int rc;
       
  3388   int tryRightmost;
       
  3389   rc = moveToRoot(pCur);
       
  3390   if( rc ) return rc;
       
  3391   assert( pCur->pPage );
       
  3392   assert( pCur->pPage->isInit );
       
  3393   tryRightmost = pCur->pPage->intKey;
       
  3394   if( pCur->eState==CURSOR_INVALID ){
       
  3395     *pRes = -1;
       
  3396     assert( pCur->pPage->nCell==0 );
       
  3397     return SQLITE_OK;
       
  3398   }
       
  3399    for(;;){
       
  3400     int lwr, upr;
       
  3401     Pgno chldPg;
       
  3402     MemPage *pPage = pCur->pPage;
       
  3403     int c = -1;  /* pRes return if table is empty must be -1 */
       
  3404     lwr = 0;
       
  3405     upr = pPage->nCell-1;
       
  3406     if( !pPage->intKey && pKey==0 ){
       
  3407       return SQLITE_CORRUPT_BKPT;
       
  3408     }
       
  3409     pageIntegrity(pPage);
       
  3410     while( lwr<=upr ){
       
  3411       void *pCellKey;
       
  3412       i64 nCellKey;
       
  3413       pCur->idx = (lwr+upr)/2;
       
  3414       pCur->info.nSize = 0;
       
  3415       if( pPage->intKey ){
       
  3416         u8 *pCell;
       
  3417         if( tryRightmost ){
       
  3418           pCur->idx = upr;
       
  3419         }
       
  3420         pCell = findCell(pPage, pCur->idx) + pPage->childPtrSize;
       
  3421         if( pPage->hasData ){
       
  3422           u32 dummy;
       
  3423           pCell += getVarint32(pCell, &dummy);
       
  3424         }
       
  3425         getVarint(pCell, (u64 *)&nCellKey);
       
  3426         if( nCellKey<nKey ){
       
  3427           c = -1;
       
  3428         }else if( nCellKey>nKey ){
       
  3429           c = +1;
       
  3430           tryRightmost = 0;
       
  3431         }else{
       
  3432           c = 0;
       
  3433         }
       
  3434       }else{
       
  3435         int available;
       
  3436         pCellKey = (void *)fetchPayload(pCur, &available, 0);
       
  3437         nCellKey = pCur->info.nKey;
       
  3438         if( available>=nCellKey ){
       
  3439           c = pCur->xCompare(pCur->pArg, nCellKey, pCellKey, nKey, pKey);
       
  3440         }else{
       
  3441           pCellKey = sqliteMallocRaw( nCellKey );
       
  3442           if( pCellKey==0 ) return SQLITE_NOMEM;
       
  3443           rc = sqlite3BtreeKey(pCur, 0, nCellKey, (void *)pCellKey);
       
  3444           c = pCur->xCompare(pCur->pArg, nCellKey, pCellKey, nKey, pKey);
       
  3445           sqliteFree(pCellKey);
       
  3446           if( rc ) return rc;
       
  3447         }
       
  3448       }
       
  3449       if( c==0 ){
       
  3450         if( pPage->leafData && !pPage->leaf ){
       
  3451           lwr = pCur->idx;
       
  3452           upr = lwr - 1;
       
  3453           break;
       
  3454         }else{
       
  3455           if( pRes ) *pRes = 0;
       
  3456           return SQLITE_OK;
       
  3457         }
       
  3458       }
       
  3459       if( c<0 ){
       
  3460         lwr = pCur->idx+1;
       
  3461       }else{
       
  3462         upr = pCur->idx-1;
       
  3463       }
       
  3464     }
       
  3465     assert( lwr==upr+1 );
       
  3466     assert( pPage->isInit );
       
  3467     if( pPage->leaf ){
       
  3468       chldPg = 0;
       
  3469     }else if( lwr>=pPage->nCell ){
       
  3470       chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]);
       
  3471     }else{
       
  3472       chldPg = get4byte(findCell(pPage, lwr));
       
  3473     }
       
  3474     if( chldPg==0 ){
       
  3475       assert( pCur->idx>=0 && pCur->idx<pCur->pPage->nCell );
       
  3476       if( pRes ) *pRes = c;
       
  3477       return SQLITE_OK;
       
  3478     }
       
  3479     pCur->idx = lwr;
       
  3480     pCur->info.nSize = 0;
       
  3481     rc = moveToChild(pCur, chldPg);
       
  3482     if( rc ){
       
  3483       return rc;
       
  3484     }
       
  3485   }
       
  3486   /* NOT REACHED */
       
  3487 }
       
  3488 
       
  3489 /*
       
  3490 ** Return TRUE if the cursor is not pointing at an entry of the table.
       
  3491 **
       
  3492 ** TRUE will be returned after a call to sqlite3BtreeNext() moves
       
  3493 ** past the last entry in the table or sqlite3BtreePrev() moves past
       
  3494 ** the first entry.  TRUE is also returned if the table is empty.
       
  3495 */
       
  3496 int sqlite3BtreeEof(BtCursor *pCur){
       
  3497   /* TODO: What if the cursor is in CURSOR_REQUIRESEEK but all table entries
       
  3498   ** have been deleted? This API will need to change to return an error code
       
  3499   ** as well as the boolean result value.
       
  3500   */
       
  3501   return (CURSOR_VALID!=pCur->eState);
       
  3502 }
       
  3503 
       
  3504 /*
       
  3505 ** Advance the cursor to the next entry in the database.  If
       
  3506 ** successful then set *pRes=0.  If the cursor
       
  3507 ** was already pointing to the last entry in the database before
       
  3508 ** this routine was called, then set *pRes=1.
       
  3509 */
       
  3510 int sqlite3BtreeNext(BtCursor *pCur, int *pRes){
       
  3511   int rc;
       
  3512   MemPage *pPage;
       
  3513 
       
  3514 #ifndef SQLITE_OMIT_SHARED_CACHE
       
  3515   rc = restoreOrClearCursorPosition(pCur, 1);
       
  3516   if( rc!=SQLITE_OK ){
       
  3517     return rc;
       
  3518   }
       
  3519   if( pCur->skip>0 ){
       
  3520     pCur->skip = 0;
       
  3521     *pRes = 0;
       
  3522     return SQLITE_OK;
       
  3523   }
       
  3524   pCur->skip = 0;
       
  3525 #endif 
       
  3526 
       
  3527   assert( pRes!=0 );
       
  3528   pPage = pCur->pPage;
       
  3529   if( CURSOR_INVALID==pCur->eState ){
       
  3530     *pRes = 1;
       
  3531     return SQLITE_OK;
       
  3532   }
       
  3533   assert( pPage->isInit );
       
  3534   assert( pCur->idx<pPage->nCell );
       
  3535 
       
  3536   pCur->idx++;
       
  3537   pCur->info.nSize = 0;
       
  3538   if( pCur->idx>=pPage->nCell ){
       
  3539     if( !pPage->leaf ){
       
  3540       rc = moveToChild(pCur, get4byte(&pPage->aData[pPage->hdrOffset+8]));
       
  3541       if( rc ) return rc;
       
  3542       rc = moveToLeftmost(pCur);
       
  3543       *pRes = 0;
       
  3544       return rc;
       
  3545     }
       
  3546     do{
       
  3547       if( isRootPage(pPage) ){
       
  3548         *pRes = 1;
       
  3549         pCur->eState = CURSOR_INVALID;
       
  3550         return SQLITE_OK;
       
  3551       }
       
  3552       moveToParent(pCur);
       
  3553       pPage = pCur->pPage;
       
  3554     }while( pCur->idx>=pPage->nCell );
       
  3555     *pRes = 0;
       
  3556     if( pPage->leafData ){
       
  3557       rc = sqlite3BtreeNext(pCur, pRes);
       
  3558     }else{
       
  3559       rc = SQLITE_OK;
       
  3560     }
       
  3561     return rc;
       
  3562   }
       
  3563   *pRes = 0;
       
  3564   if( pPage->leaf ){
       
  3565     return SQLITE_OK;
       
  3566   }
       
  3567   rc = moveToLeftmost(pCur);
       
  3568   return rc;
       
  3569 }
       
  3570 
       
  3571 /*
       
  3572 ** Step the cursor to the back to the previous entry in the database.  If
       
  3573 ** successful then set *pRes=0.  If the cursor
       
  3574 ** was already pointing to the first entry in the database before
       
  3575 ** this routine was called, then set *pRes=1.
       
  3576 */
       
  3577 int sqlite3BtreePrevious(BtCursor *pCur, int *pRes){
       
  3578   int rc;
       
  3579   Pgno pgno;
       
  3580   MemPage *pPage;
       
  3581 
       
  3582 #ifndef SQLITE_OMIT_SHARED_CACHE
       
  3583   rc = restoreOrClearCursorPosition(pCur, 1);
       
  3584   if( rc!=SQLITE_OK ){
       
  3585     return rc;
       
  3586   }
       
  3587   if( pCur->skip<0 ){
       
  3588     pCur->skip = 0;
       
  3589     *pRes = 0;
       
  3590     return SQLITE_OK;
       
  3591   }
       
  3592   pCur->skip = 0;
       
  3593 #endif
       
  3594 
       
  3595   if( CURSOR_INVALID==pCur->eState ){
       
  3596     *pRes = 1;
       
  3597     return SQLITE_OK;
       
  3598   }
       
  3599 
       
  3600   pPage = pCur->pPage;
       
  3601   assert( pPage->isInit );
       
  3602   assert( pCur->idx>=0 );
       
  3603   if( !pPage->leaf ){
       
  3604     pgno = get4byte( findCell(pPage, pCur->idx) );
       
  3605     rc = moveToChild(pCur, pgno);
       
  3606     if( rc ) return rc;
       
  3607     rc = moveToRightmost(pCur);
       
  3608   }else{
       
  3609     while( pCur->idx==0 ){
       
  3610       if( isRootPage(pPage) ){
       
  3611         pCur->eState = CURSOR_INVALID;
       
  3612         *pRes = 1;
       
  3613         return SQLITE_OK;
       
  3614       }
       
  3615       moveToParent(pCur);
       
  3616       pPage = pCur->pPage;
       
  3617     }
       
  3618     pCur->idx--;
       
  3619     pCur->info.nSize = 0;
       
  3620     if( pPage->leafData && !pPage->leaf ){
       
  3621       rc = sqlite3BtreePrevious(pCur, pRes);
       
  3622     }else{
       
  3623       rc = SQLITE_OK;
       
  3624     }
       
  3625   }
       
  3626   *pRes = 0;
       
  3627   return rc;
       
  3628 }
       
  3629 
       
  3630 /*
       
  3631 ** Allocate a new page from the database file.
       
  3632 **
       
  3633 ** The new page is marked as dirty.  (In other words, sqlite3pager_write()
       
  3634 ** has already been called on the new page.)  The new page has also
       
  3635 ** been referenced and the calling routine is responsible for calling
       
  3636 ** sqlite3pager_unref() on the new page when it is done.
       
  3637 **
       
  3638 ** SQLITE_OK is returned on success.  Any other return value indicates
       
  3639 ** an error.  *ppPage and *pPgno are undefined in the event of an error.
       
  3640 ** Do not invoke sqlite3pager_unref() on *ppPage if an error is returned.
       
  3641 **
       
  3642 ** If the "nearby" parameter is not 0, then a (feeble) effort is made to 
       
  3643 ** locate a page close to the page number "nearby".  This can be used in an
       
  3644 ** attempt to keep related pages close to each other in the database file,
       
  3645 ** which in turn can make database access faster.
       
  3646 **
       
  3647 ** If the "exact" parameter is not 0, and the page-number nearby exists 
       
  3648 ** anywhere on the free-list, then it is guarenteed to be returned. This
       
  3649 ** is only used by auto-vacuum databases when allocating a new table.
       
  3650 */
       
  3651 static int allocatePage(
       
  3652   BtShared *pBt, 
       
  3653   MemPage **ppPage, 
       
  3654   Pgno *pPgno, 
       
  3655   Pgno nearby,
       
  3656   u8 exact
       
  3657 ){
       
  3658   MemPage *pPage1;
       
  3659   int rc;
       
  3660   int n;     /* Number of pages on the freelist */
       
  3661   int k;     /* Number of leaves on the trunk of the freelist */
       
  3662 
       
  3663   pPage1 = pBt->pPage1;
       
  3664   n = get4byte(&pPage1->aData[36]);
       
  3665   if( n>0 ){
       
  3666     /* There are pages on the freelist.  Reuse one of those pages. */
       
  3667     MemPage *pTrunk = 0;
       
  3668     Pgno iTrunk;
       
  3669     MemPage *pPrevTrunk = 0;
       
  3670     u8 searchList = 0; /* If the free-list must be searched for 'nearby' */
       
  3671     
       
  3672     /* If the 'exact' parameter was true and a query of the pointer-map
       
  3673     ** shows that the page 'nearby' is somewhere on the free-list, then
       
  3674     ** the entire-list will be searched for that page.
       
  3675     */
       
  3676 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  3677     if( exact ){
       
  3678       u8 eType;
       
  3679       assert( nearby>0 );
       
  3680       assert( pBt->autoVacuum );
       
  3681       rc = ptrmapGet(pBt, nearby, &eType, 0);
       
  3682       if( rc ) return rc;
       
  3683       if( eType==PTRMAP_FREEPAGE ){
       
  3684         searchList = 1;
       
  3685       }
       
  3686       *pPgno = nearby;
       
  3687     }
       
  3688 #endif
       
  3689 
       
  3690     /* Decrement the free-list count by 1. Set iTrunk to the index of the
       
  3691     ** first free-list trunk page. iPrevTrunk is initially 1.
       
  3692     */
       
  3693     rc = sqlite3pager_write(pPage1->aData);
       
  3694     if( rc ) return rc;
       
  3695     put4byte(&pPage1->aData[36], n-1);
       
  3696 
       
  3697     /* The code within this loop is run only once if the 'searchList' variable
       
  3698     ** is not true. Otherwise, it runs once for each trunk-page on the
       
  3699     ** free-list until the page 'nearby' is located.
       
  3700     */
       
  3701     do {
       
  3702       pPrevTrunk = pTrunk;
       
  3703       if( pPrevTrunk ){
       
  3704         iTrunk = get4byte(&pPrevTrunk->aData[0]);
       
  3705       }else{
       
  3706         iTrunk = get4byte(&pPage1->aData[32]);
       
  3707       }
       
  3708       rc = getPage(pBt, iTrunk, &pTrunk);
       
  3709       if( rc ){
       
  3710         releasePage(pPrevTrunk);
       
  3711         return rc;
       
  3712       }
       
  3713 
       
  3714       /* TODO: This should move to after the loop? */
       
  3715       rc = sqlite3pager_write(pTrunk->aData);
       
  3716       if( rc ){
       
  3717         releasePage(pTrunk);
       
  3718         releasePage(pPrevTrunk);
       
  3719         return rc;
       
  3720       }
       
  3721 
       
  3722       k = get4byte(&pTrunk->aData[4]);
       
  3723       if( k==0 && !searchList ){
       
  3724         /* The trunk has no leaves and the list is not being searched. 
       
  3725         ** So extract the trunk page itself and use it as the newly 
       
  3726         ** allocated page */
       
  3727         assert( pPrevTrunk==0 );
       
  3728         *pPgno = iTrunk;
       
  3729         memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
       
  3730         *ppPage = pTrunk;
       
  3731         pTrunk = 0;
       
  3732         TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
       
  3733       }else if( k>pBt->usableSize/4 - 8 ){
       
  3734         /* Value of k is out of range.  Database corruption */
       
  3735         return SQLITE_CORRUPT_BKPT;
       
  3736 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  3737       }else if( searchList && nearby==iTrunk ){
       
  3738         /* The list is being searched and this trunk page is the page
       
  3739         ** to allocate, regardless of whether it has leaves.
       
  3740         */
       
  3741         assert( *pPgno==iTrunk );
       
  3742         *ppPage = pTrunk;
       
  3743         searchList = 0;
       
  3744         if( k==0 ){
       
  3745           if( !pPrevTrunk ){
       
  3746             memcpy(&pPage1->aData[32], &pTrunk->aData[0], 4);
       
  3747           }else{
       
  3748             memcpy(&pPrevTrunk->aData[0], &pTrunk->aData[0], 4);
       
  3749           }
       
  3750         }else{
       
  3751           /* The trunk page is required by the caller but it contains 
       
  3752           ** pointers to free-list leaves. The first leaf becomes a trunk
       
  3753           ** page in this case.
       
  3754           */
       
  3755           MemPage *pNewTrunk;
       
  3756           Pgno iNewTrunk = get4byte(&pTrunk->aData[8]);
       
  3757           rc = getPage(pBt, iNewTrunk, &pNewTrunk);
       
  3758           if( rc!=SQLITE_OK ){
       
  3759             releasePage(pTrunk);
       
  3760             releasePage(pPrevTrunk);
       
  3761             return rc;
       
  3762           }
       
  3763           rc = sqlite3pager_write(pNewTrunk->aData);
       
  3764           if( rc!=SQLITE_OK ){
       
  3765             releasePage(pNewTrunk);
       
  3766             releasePage(pTrunk);
       
  3767             releasePage(pPrevTrunk);
       
  3768             return rc;
       
  3769           }
       
  3770           memcpy(&pNewTrunk->aData[0], &pTrunk->aData[0], 4);
       
  3771           put4byte(&pNewTrunk->aData[4], k-1);
       
  3772           memcpy(&pNewTrunk->aData[8], &pTrunk->aData[12], (k-1)*4);
       
  3773           if( !pPrevTrunk ){
       
  3774             put4byte(&pPage1->aData[32], iNewTrunk);
       
  3775           }else{
       
  3776             put4byte(&pPrevTrunk->aData[0], iNewTrunk);
       
  3777           }
       
  3778           releasePage(pNewTrunk);
       
  3779         }
       
  3780         pTrunk = 0;
       
  3781         TRACE(("ALLOCATE: %d trunk - %d free pages left\n", *pPgno, n-1));
       
  3782 #endif
       
  3783       }else{
       
  3784         /* Extract a leaf from the trunk */
       
  3785         int closest;
       
  3786         Pgno iPage;
       
  3787         unsigned char *aData = pTrunk->aData;
       
  3788         if( nearby>0 ){
       
  3789           int i, dist;
       
  3790           closest = 0;
       
  3791           dist = get4byte(&aData[8]) - nearby;
       
  3792           if( dist<0 ) dist = -dist;
       
  3793           for(i=1; i<k; i++){
       
  3794             int d2 = get4byte(&aData[8+i*4]) - nearby;
       
  3795             if( d2<0 ) d2 = -d2;
       
  3796             if( d2<dist ){
       
  3797               closest = i;
       
  3798               dist = d2;
       
  3799             }
       
  3800           }
       
  3801         }else{
       
  3802           closest = 0;
       
  3803         }
       
  3804 
       
  3805         iPage = get4byte(&aData[8+closest*4]);
       
  3806         if( !searchList || iPage==nearby ){
       
  3807           *pPgno = iPage;
       
  3808           if( *pPgno>sqlite3pager_pagecount(pBt->pPager) ){
       
  3809             /* Free page off the end of the file */
       
  3810             return SQLITE_CORRUPT_BKPT;
       
  3811           }
       
  3812           TRACE(("ALLOCATE: %d was leaf %d of %d on trunk %d"
       
  3813                  ": %d more free pages\n",
       
  3814                  *pPgno, closest+1, k, pTrunk->pgno, n-1));
       
  3815           if( closest<k-1 ){
       
  3816             memcpy(&aData[8+closest*4], &aData[4+k*4], 4);
       
  3817           }
       
  3818           put4byte(&aData[4], k-1);
       
  3819           rc = getPage(pBt, *pPgno, ppPage);
       
  3820           if( rc==SQLITE_OK ){
       
  3821             sqlite3pager_dont_rollback((*ppPage)->aData);
       
  3822             rc = sqlite3pager_write((*ppPage)->aData);
       
  3823             if( rc!=SQLITE_OK ){
       
  3824               releasePage(*ppPage);
       
  3825             }
       
  3826           }
       
  3827           searchList = 0;
       
  3828         }
       
  3829       }
       
  3830       releasePage(pPrevTrunk);
       
  3831     }while( searchList );
       
  3832     releasePage(pTrunk);
       
  3833   }else{
       
  3834     /* There are no pages on the freelist, so create a new page at the
       
  3835     ** end of the file */
       
  3836     *pPgno = sqlite3pager_pagecount(pBt->pPager) + 1;
       
  3837 
       
  3838 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  3839     if( pBt->autoVacuum && PTRMAP_ISPAGE(pBt, *pPgno) ){
       
  3840       /* If *pPgno refers to a pointer-map page, allocate two new pages
       
  3841       ** at the end of the file instead of one. The first allocated page
       
  3842       ** becomes a new pointer-map page, the second is used by the caller.
       
  3843       */
       
  3844       TRACE(("ALLOCATE: %d from end of file (pointer-map page)\n", *pPgno));
       
  3845       assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
       
  3846       (*pPgno)++;
       
  3847     }
       
  3848 #endif
       
  3849 
       
  3850     assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
       
  3851     rc = getPage(pBt, *pPgno, ppPage);
       
  3852     if( rc ) return rc;
       
  3853     rc = sqlite3pager_write((*ppPage)->aData);
       
  3854     if( rc!=SQLITE_OK ){
       
  3855       releasePage(*ppPage);
       
  3856     }
       
  3857     TRACE(("ALLOCATE: %d from end of file\n", *pPgno));
       
  3858   }
       
  3859 
       
  3860   assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
       
  3861   return rc;
       
  3862 }
       
  3863 
       
  3864 /*
       
  3865 ** Add a page of the database file to the freelist.
       
  3866 **
       
  3867 ** sqlite3pager_unref() is NOT called for pPage.
       
  3868 */
       
  3869 static int freePage(MemPage *pPage){
       
  3870   BtShared *pBt = pPage->pBt;
       
  3871   MemPage *pPage1 = pBt->pPage1;
       
  3872   int rc, n, k;
       
  3873 
       
  3874   /* Prepare the page for freeing */
       
  3875   assert( pPage->pgno>1 );
       
  3876   pPage->isInit = 0;
       
  3877   releasePage(pPage->pParent);
       
  3878   pPage->pParent = 0;
       
  3879 
       
  3880   /* Increment the free page count on pPage1 */
       
  3881   rc = sqlite3pager_write(pPage1->aData);
       
  3882   if( rc ) return rc;
       
  3883   n = get4byte(&pPage1->aData[36]);
       
  3884   put4byte(&pPage1->aData[36], n+1);
       
  3885 
       
  3886 #ifdef SQLITE_SECURE_DELETE
       
  3887   /* If the SQLITE_SECURE_DELETE compile-time option is enabled, then
       
  3888   ** always fully overwrite deleted information with zeros.
       
  3889   */
       
  3890   rc = sqlite3pager_write(pPage->aData);
       
  3891   if( rc ) return rc;
       
  3892   memset(pPage->aData, 0, pPage->pBt->pageSize);
       
  3893 #endif
       
  3894 
       
  3895 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  3896   /* If the database supports auto-vacuum, write an entry in the pointer-map
       
  3897   ** to indicate that the page is free.
       
  3898   */
       
  3899   if( pBt->autoVacuum ){
       
  3900     rc = ptrmapPut(pBt, pPage->pgno, PTRMAP_FREEPAGE, 0);
       
  3901     if( rc ) return rc;
       
  3902   }
       
  3903 #endif
       
  3904 
       
  3905   if( n==0 ){
       
  3906     /* This is the first free page */
       
  3907     rc = sqlite3pager_write(pPage->aData);
       
  3908     if( rc ) return rc;
       
  3909     memset(pPage->aData, 0, 8);
       
  3910     put4byte(&pPage1->aData[32], pPage->pgno);
       
  3911     TRACE(("FREE-PAGE: %d first\n", pPage->pgno));
       
  3912   }else{
       
  3913     /* Other free pages already exist.  Retrive the first trunk page
       
  3914     ** of the freelist and find out how many leaves it has. */
       
  3915     MemPage *pTrunk;
       
  3916     rc = getPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk);
       
  3917     if( rc ) return rc;
       
  3918     k = get4byte(&pTrunk->aData[4]);
       
  3919     if( k>=pBt->usableSize/4 - 8 ){
       
  3920       /* The trunk is full.  Turn the page being freed into a new
       
  3921       ** trunk page with no leaves. */
       
  3922       rc = sqlite3pager_write(pPage->aData);
       
  3923       if( rc ) return rc;
       
  3924       put4byte(pPage->aData, pTrunk->pgno);
       
  3925       put4byte(&pPage->aData[4], 0);
       
  3926       put4byte(&pPage1->aData[32], pPage->pgno);
       
  3927       TRACE(("FREE-PAGE: %d new trunk page replacing %d\n",
       
  3928               pPage->pgno, pTrunk->pgno));
       
  3929     }else{
       
  3930       /* Add the newly freed page as a leaf on the current trunk */
       
  3931       rc = sqlite3pager_write(pTrunk->aData);
       
  3932       if( rc ) return rc;
       
  3933       put4byte(&pTrunk->aData[4], k+1);
       
  3934       put4byte(&pTrunk->aData[8+k*4], pPage->pgno);
       
  3935 #ifndef SQLITE_SECURE_DELETE
       
  3936       sqlite3pager_dont_write(pBt->pPager, pPage->pgno);
       
  3937 #endif
       
  3938       TRACE(("FREE-PAGE: %d leaf on trunk page %d\n",pPage->pgno,pTrunk->pgno));
       
  3939     }
       
  3940     releasePage(pTrunk);
       
  3941   }
       
  3942   return rc;
       
  3943 }
       
  3944 
       
  3945 /*
       
  3946 ** Free any overflow pages associated with the given Cell.
       
  3947 */
       
  3948 static int clearCell(MemPage *pPage, unsigned char *pCell){
       
  3949   BtShared *pBt = pPage->pBt;
       
  3950   CellInfo info;
       
  3951   Pgno ovflPgno;
       
  3952   int rc;
       
  3953 
       
  3954   parseCellPtr(pPage, pCell, &info);
       
  3955   if( info.iOverflow==0 ){
       
  3956     return SQLITE_OK;  /* No overflow pages. Return without doing anything */
       
  3957   }
       
  3958   ovflPgno = get4byte(&pCell[info.iOverflow]);
       
  3959   while( ovflPgno!=0 ){
       
  3960     MemPage *pOvfl;
       
  3961     if( ovflPgno>sqlite3pager_pagecount(pBt->pPager) ){
       
  3962       return SQLITE_CORRUPT_BKPT;
       
  3963     }
       
  3964     rc = getPage(pBt, ovflPgno, &pOvfl);
       
  3965     if( rc ) return rc;
       
  3966     ovflPgno = get4byte(pOvfl->aData);
       
  3967     rc = freePage(pOvfl);
       
  3968     sqlite3pager_unref(pOvfl->aData);
       
  3969     if( rc ) return rc;
       
  3970   }
       
  3971   return SQLITE_OK;
       
  3972 }
       
  3973 
       
  3974 /*
       
  3975 ** Create the byte sequence used to represent a cell on page pPage
       
  3976 ** and write that byte sequence into pCell[].  Overflow pages are
       
  3977 ** allocated and filled in as necessary.  The calling procedure
       
  3978 ** is responsible for making sure sufficient space has been allocated
       
  3979 ** for pCell[].
       
  3980 **
       
  3981 ** Note that pCell does not necessary need to point to the pPage->aData
       
  3982 ** area.  pCell might point to some temporary storage.  The cell will
       
  3983 ** be constructed in this temporary area then copied into pPage->aData
       
  3984 ** later.
       
  3985 */
       
  3986 static int fillInCell(
       
  3987   MemPage *pPage,                /* The page that contains the cell */
       
  3988   unsigned char *pCell,          /* Complete text of the cell */
       
  3989   const void *pKey, i64 nKey,    /* The key */
       
  3990   const void *pData,int nData,   /* The data */
       
  3991   int *pnSize                    /* Write cell size here */
       
  3992 ){
       
  3993   int nPayload;
       
  3994   const u8 *pSrc;
       
  3995   int nSrc, n, rc;
       
  3996   int spaceLeft;
       
  3997   MemPage *pOvfl = 0;
       
  3998   MemPage *pToRelease = 0;
       
  3999   unsigned char *pPrior;
       
  4000   unsigned char *pPayload;
       
  4001   BtShared *pBt = pPage->pBt;
       
  4002   Pgno pgnoOvfl = 0;
       
  4003   int nHeader;
       
  4004   CellInfo info;
       
  4005 
       
  4006   /* Fill in the header. */
       
  4007   nHeader = 0;
       
  4008   if( !pPage->leaf ){
       
  4009     nHeader += 4;
       
  4010   }
       
  4011   if( pPage->hasData ){
       
  4012     nHeader += putVarint(&pCell[nHeader], nData);
       
  4013   }else{
       
  4014     nData = 0;
       
  4015   }
       
  4016   nHeader += putVarint(&pCell[nHeader], *(u64*)&nKey);
       
  4017   parseCellPtr(pPage, pCell, &info);
       
  4018   assert( info.nHeader==nHeader );
       
  4019   assert( info.nKey==nKey );
       
  4020   assert( info.nData==nData );
       
  4021   
       
  4022   /* Fill in the payload */
       
  4023   nPayload = nData;
       
  4024   if( pPage->intKey ){
       
  4025     pSrc = pData;
       
  4026     nSrc = nData;
       
  4027     nData = 0;
       
  4028   }else{
       
  4029     nPayload += nKey;
       
  4030     pSrc = pKey;
       
  4031     nSrc = nKey;
       
  4032   }
       
  4033   *pnSize = info.nSize;
       
  4034   spaceLeft = info.nLocal;
       
  4035   pPayload = &pCell[nHeader];
       
  4036   pPrior = &pCell[info.iOverflow];
       
  4037 
       
  4038   while( nPayload>0 ){
       
  4039     if( spaceLeft==0 ){
       
  4040 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  4041       Pgno pgnoPtrmap = pgnoOvfl; /* Overflow page pointer-map entry page */
       
  4042 #endif
       
  4043       rc = allocatePage(pBt, &pOvfl, &pgnoOvfl, pgnoOvfl, 0);
       
  4044 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  4045       /* If the database supports auto-vacuum, and the second or subsequent
       
  4046       ** overflow page is being allocated, add an entry to the pointer-map
       
  4047       ** for that page now. The entry for the first overflow page will be
       
  4048       ** added later, by the insertCell() routine.
       
  4049       */
       
  4050       if( pBt->autoVacuum && pgnoPtrmap!=0 && rc==SQLITE_OK ){
       
  4051         rc = ptrmapPut(pBt, pgnoOvfl, PTRMAP_OVERFLOW2, pgnoPtrmap);
       
  4052       }
       
  4053 #endif
       
  4054       if( rc ){
       
  4055         releasePage(pToRelease);
       
  4056         /* clearCell(pPage, pCell); */
       
  4057         return rc;
       
  4058       }
       
  4059       put4byte(pPrior, pgnoOvfl);
       
  4060       releasePage(pToRelease);
       
  4061       pToRelease = pOvfl;
       
  4062       pPrior = pOvfl->aData;
       
  4063       put4byte(pPrior, 0);
       
  4064       pPayload = &pOvfl->aData[4];
       
  4065       spaceLeft = pBt->usableSize - 4;
       
  4066     }
       
  4067     n = nPayload;
       
  4068     if( n>spaceLeft ) n = spaceLeft;
       
  4069     if( n>nSrc ) n = nSrc;
       
  4070     assert( pSrc );
       
  4071     memcpy(pPayload, pSrc, n);
       
  4072     nPayload -= n;
       
  4073     pPayload += n;
       
  4074     pSrc += n;
       
  4075     nSrc -= n;
       
  4076     spaceLeft -= n;
       
  4077     if( nSrc==0 ){
       
  4078       nSrc = nData;
       
  4079       pSrc = pData;
       
  4080     }
       
  4081   }
       
  4082   releasePage(pToRelease);
       
  4083   return SQLITE_OK;
       
  4084 }
       
  4085 
       
  4086 /*
       
  4087 ** Change the MemPage.pParent pointer on the page whose number is
       
  4088 ** given in the second argument so that MemPage.pParent holds the
       
  4089 ** pointer in the third argument.
       
  4090 */
       
  4091 static int reparentPage(BtShared *pBt, Pgno pgno, MemPage *pNewParent, int idx){
       
  4092   MemPage *pThis;
       
  4093   unsigned char *aData;
       
  4094 
       
  4095   assert( pNewParent!=0 );
       
  4096   if( pgno==0 ) return SQLITE_OK;
       
  4097   assert( pBt->pPager!=0 );
       
  4098   aData = sqlite3pager_lookup(pBt->pPager, pgno);
       
  4099   if( aData ){
       
  4100     pThis = (MemPage*)&aData[pBt->pageSize];
       
  4101     assert( pThis->aData==aData );
       
  4102     if( pThis->isInit ){
       
  4103       if( pThis->pParent!=pNewParent ){
       
  4104         if( pThis->pParent ) sqlite3pager_unref(pThis->pParent->aData);
       
  4105         pThis->pParent = pNewParent;
       
  4106         sqlite3pager_ref(pNewParent->aData);
       
  4107       }
       
  4108       pThis->idxParent = idx;
       
  4109     }
       
  4110     sqlite3pager_unref(aData);
       
  4111   }
       
  4112 
       
  4113 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  4114   if( pBt->autoVacuum ){
       
  4115     return ptrmapPut(pBt, pgno, PTRMAP_BTREE, pNewParent->pgno);
       
  4116   }
       
  4117 #endif
       
  4118   return SQLITE_OK;
       
  4119 }
       
  4120 
       
  4121 
       
  4122 
       
  4123 /*
       
  4124 ** Change the pParent pointer of all children of pPage to point back
       
  4125 ** to pPage.
       
  4126 **
       
  4127 ** In other words, for every child of pPage, invoke reparentPage()
       
  4128 ** to make sure that each child knows that pPage is its parent.
       
  4129 **
       
  4130 ** This routine gets called after you memcpy() one page into
       
  4131 ** another.
       
  4132 */
       
  4133 static int reparentChildPages(MemPage *pPage){
       
  4134   int i;
       
  4135   BtShared *pBt = pPage->pBt;
       
  4136   int rc = SQLITE_OK;
       
  4137 
       
  4138   if( pPage->leaf ) return SQLITE_OK;
       
  4139 
       
  4140   for(i=0; i<pPage->nCell; i++){
       
  4141     u8 *pCell = findCell(pPage, i);
       
  4142     if( !pPage->leaf ){
       
  4143       rc = reparentPage(pBt, get4byte(pCell), pPage, i);
       
  4144       if( rc!=SQLITE_OK ) return rc;
       
  4145     }
       
  4146   }
       
  4147   if( !pPage->leaf ){
       
  4148     rc = reparentPage(pBt, get4byte(&pPage->aData[pPage->hdrOffset+8]), 
       
  4149        pPage, i);
       
  4150     pPage->idxShift = 0;
       
  4151   }
       
  4152   return rc;
       
  4153 }
       
  4154 
       
  4155 /*
       
  4156 ** Remove the i-th cell from pPage.  This routine effects pPage only.
       
  4157 ** The cell content is not freed or deallocated.  It is assumed that
       
  4158 ** the cell content has been copied someplace else.  This routine just
       
  4159 ** removes the reference to the cell from pPage.
       
  4160 **
       
  4161 ** "sz" must be the number of bytes in the cell.
       
  4162 */
       
  4163 static void dropCell(MemPage *pPage, int idx, int sz){
       
  4164   int i;          /* Loop counter */
       
  4165   int pc;         /* Offset to cell content of cell being deleted */
       
  4166   u8 *data;       /* pPage->aData */
       
  4167   u8 *ptr;        /* Used to move bytes around within data[] */
       
  4168 
       
  4169   assert( idx>=0 && idx<pPage->nCell );
       
  4170   assert( sz==cellSize(pPage, idx) );
       
  4171   assert( sqlite3pager_iswriteable(pPage->aData) );
       
  4172   data = pPage->aData;
       
  4173   ptr = &data[pPage->cellOffset + 2*idx];
       
  4174   pc = get2byte(ptr);
       
  4175   assert( pc>10 && pc+sz<=pPage->pBt->usableSize );
       
  4176   freeSpace(pPage, pc, sz);
       
  4177   for(i=idx+1; i<pPage->nCell; i++, ptr+=2){
       
  4178     ptr[0] = ptr[2];
       
  4179     ptr[1] = ptr[3];
       
  4180   }
       
  4181   pPage->nCell--;
       
  4182   put2byte(&data[pPage->hdrOffset+3], pPage->nCell);
       
  4183   pPage->nFree += 2;
       
  4184   pPage->idxShift = 1;
       
  4185 }
       
  4186 
       
  4187 /*
       
  4188 ** Insert a new cell on pPage at cell index "i".  pCell points to the
       
  4189 ** content of the cell.
       
  4190 **
       
  4191 ** If the cell content will fit on the page, then put it there.  If it
       
  4192 ** will not fit, then make a copy of the cell content into pTemp if
       
  4193 ** pTemp is not null.  Regardless of pTemp, allocate a new entry
       
  4194 ** in pPage->aOvfl[] and make it point to the cell content (either
       
  4195 ** in pTemp or the original pCell) and also record its index. 
       
  4196 ** Allocating a new entry in pPage->aCell[] implies that 
       
  4197 ** pPage->nOverflow is incremented.
       
  4198 **
       
  4199 ** If nSkip is non-zero, then do not copy the first nSkip bytes of the
       
  4200 ** cell. The caller will overwrite them after this function returns. If
       
  4201 ** nSkip is non-zero, then pCell may not point to an invalid memory location 
       
  4202 ** (but pCell+nSkip is always valid).
       
  4203 */
       
  4204 static int insertCell(
       
  4205   MemPage *pPage,   /* Page into which we are copying */
       
  4206   int i,            /* New cell becomes the i-th cell of the page */
       
  4207   u8 *pCell,        /* Content of the new cell */
       
  4208   int sz,           /* Bytes of content in pCell */
       
  4209   u8 *pTemp,        /* Temp storage space for pCell, if needed */
       
  4210   u8 nSkip          /* Do not write the first nSkip bytes of the cell */
       
  4211 ){
       
  4212   int idx;          /* Where to write new cell content in data[] */
       
  4213   int j;            /* Loop counter */
       
  4214   int top;          /* First byte of content for any cell in data[] */
       
  4215   int end;          /* First byte past the last cell pointer in data[] */
       
  4216   int ins;          /* Index in data[] where new cell pointer is inserted */
       
  4217   int hdr;          /* Offset into data[] of the page header */
       
  4218   int cellOffset;   /* Address of first cell pointer in data[] */
       
  4219   u8 *data;         /* The content of the whole page */
       
  4220   u8 *ptr;          /* Used for moving information around in data[] */
       
  4221 
       
  4222   assert( i>=0 && i<=pPage->nCell+pPage->nOverflow );
       
  4223   assert( sz==cellSizePtr(pPage, pCell) );
       
  4224   assert( sqlite3pager_iswriteable(pPage->aData) );
       
  4225   if( pPage->nOverflow || sz+2>pPage->nFree ){
       
  4226     if( pTemp ){
       
  4227       memcpy(pTemp+nSkip, pCell+nSkip, sz-nSkip);
       
  4228       pCell = pTemp;
       
  4229     }
       
  4230     j = pPage->nOverflow++;
       
  4231     assert( j<sizeof(pPage->aOvfl)/sizeof(pPage->aOvfl[0]) );
       
  4232     pPage->aOvfl[j].pCell = pCell;
       
  4233     pPage->aOvfl[j].idx = i;
       
  4234     pPage->nFree = 0;
       
  4235   }else{
       
  4236     data = pPage->aData;
       
  4237     hdr = pPage->hdrOffset;
       
  4238     top = get2byte(&data[hdr+5]);
       
  4239     cellOffset = pPage->cellOffset;
       
  4240     end = cellOffset + 2*pPage->nCell + 2;
       
  4241     ins = cellOffset + 2*i;
       
  4242     if( end > top - sz ){
       
  4243       int rc = defragmentPage(pPage);
       
  4244       if( rc!=SQLITE_OK ) return rc;
       
  4245       top = get2byte(&data[hdr+5]);
       
  4246       assert( end + sz <= top );
       
  4247     }
       
  4248     idx = allocateSpace(pPage, sz);
       
  4249     assert( idx>0 );
       
  4250     assert( end <= get2byte(&data[hdr+5]) );
       
  4251     pPage->nCell++;
       
  4252     pPage->nFree -= 2;
       
  4253     memcpy(&data[idx+nSkip], pCell+nSkip, sz-nSkip);
       
  4254     for(j=end-2, ptr=&data[j]; j>ins; j-=2, ptr-=2){
       
  4255       ptr[0] = ptr[-2];
       
  4256       ptr[1] = ptr[-1];
       
  4257     }
       
  4258     put2byte(&data[ins], idx);
       
  4259     put2byte(&data[hdr+3], pPage->nCell);
       
  4260     pPage->idxShift = 1;
       
  4261     pageIntegrity(pPage);
       
  4262 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  4263     if( pPage->pBt->autoVacuum ){
       
  4264       /* The cell may contain a pointer to an overflow page. If so, write
       
  4265       ** the entry for the overflow page into the pointer map.
       
  4266       */
       
  4267       CellInfo info;
       
  4268       parseCellPtr(pPage, pCell, &info);
       
  4269       if( (info.nData+(pPage->intKey?0:info.nKey))>info.nLocal ){
       
  4270         Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]);
       
  4271         int rc = ptrmapPut(pPage->pBt, pgnoOvfl, PTRMAP_OVERFLOW1, pPage->pgno);
       
  4272         if( rc!=SQLITE_OK ) return rc;
       
  4273       }
       
  4274     }
       
  4275 #endif
       
  4276   }
       
  4277 
       
  4278   return SQLITE_OK;
       
  4279 }
       
  4280 
       
  4281 /*
       
  4282 ** Add a list of cells to a page.  The page should be initially empty.
       
  4283 ** The cells are guaranteed to fit on the page.
       
  4284 */
       
  4285 static void assemblePage(
       
  4286   MemPage *pPage,   /* The page to be assemblied */
       
  4287   int nCell,        /* The number of cells to add to this page */
       
  4288   u8 **apCell,      /* Pointers to cell bodies */
       
  4289   int *aSize        /* Sizes of the cells */
       
  4290 ){
       
  4291   int i;            /* Loop counter */
       
  4292   int totalSize;    /* Total size of all cells */
       
  4293   int hdr;          /* Index of page header */
       
  4294   int cellptr;      /* Address of next cell pointer */
       
  4295   int cellbody;     /* Address of next cell body */
       
  4296   u8 *data;         /* Data for the page */
       
  4297 
       
  4298   assert( pPage->nOverflow==0 );
       
  4299   totalSize = 0;
       
  4300   for(i=0; i<nCell; i++){
       
  4301     totalSize += aSize[i];
       
  4302   }
       
  4303   assert( totalSize+2*nCell<=pPage->nFree );
       
  4304   assert( pPage->nCell==0 );
       
  4305   cellptr = pPage->cellOffset;
       
  4306   data = pPage->aData;
       
  4307   hdr = pPage->hdrOffset;
       
  4308   put2byte(&data[hdr+3], nCell);
       
  4309   if( nCell ){
       
  4310     cellbody = allocateSpace(pPage, totalSize);
       
  4311     assert( cellbody>0 );
       
  4312     assert( pPage->nFree >= 2*nCell );
       
  4313     pPage->nFree -= 2*nCell;
       
  4314     for(i=0; i<nCell; i++){
       
  4315       put2byte(&data[cellptr], cellbody);
       
  4316       memcpy(&data[cellbody], apCell[i], aSize[i]);
       
  4317       cellptr += 2;
       
  4318       cellbody += aSize[i];
       
  4319     }
       
  4320     assert( cellbody==pPage->pBt->usableSize );
       
  4321   }
       
  4322   pPage->nCell = nCell;
       
  4323 }
       
  4324 
       
  4325 /*
       
  4326 ** The following parameters determine how many adjacent pages get involved
       
  4327 ** in a balancing operation.  NN is the number of neighbors on either side
       
  4328 ** of the page that participate in the balancing operation.  NB is the
       
  4329 ** total number of pages that participate, including the target page and
       
  4330 ** NN neighbors on either side.
       
  4331 **
       
  4332 ** The minimum value of NN is 1 (of course).  Increasing NN above 1
       
  4333 ** (to 2 or 3) gives a modest improvement in SELECT and DELETE performance
       
  4334 ** in exchange for a larger degradation in INSERT and UPDATE performance.
       
  4335 ** The value of NN appears to give the best results overall.
       
  4336 */
       
  4337 #define NN 1             /* Number of neighbors on either side of pPage */
       
  4338 #define NB (NN*2+1)      /* Total pages involved in the balance */
       
  4339 
       
  4340 /* Forward reference */
       
  4341 static int balance(MemPage*, int);
       
  4342 
       
  4343 #ifndef SQLITE_OMIT_QUICKBALANCE
       
  4344 /*
       
  4345 ** This version of balance() handles the common special case where
       
  4346 ** a new entry is being inserted on the extreme right-end of the
       
  4347 ** tree, in other words, when the new entry will become the largest
       
  4348 ** entry in the tree.
       
  4349 **
       
  4350 ** Instead of trying balance the 3 right-most leaf pages, just add
       
  4351 ** a new page to the right-hand side and put the one new entry in
       
  4352 ** that page.  This leaves the right side of the tree somewhat
       
  4353 ** unbalanced.  But odds are that we will be inserting new entries
       
  4354 ** at the end soon afterwards so the nearly empty page will quickly
       
  4355 ** fill up.  On average.
       
  4356 **
       
  4357 ** pPage is the leaf page which is the right-most page in the tree.
       
  4358 ** pParent is its parent.  pPage must have a single overflow entry
       
  4359 ** which is also the right-most entry on the page.
       
  4360 */
       
  4361 static int balance_quick(MemPage *pPage, MemPage *pParent){
       
  4362   int rc;
       
  4363   MemPage *pNew;
       
  4364   Pgno pgnoNew;
       
  4365   u8 *pCell;
       
  4366   int szCell;
       
  4367   CellInfo info;
       
  4368   BtShared *pBt = pPage->pBt;
       
  4369   int parentIdx = pParent->nCell;   /* pParent new divider cell index */
       
  4370   int parentSize;                   /* Size of new divider cell */
       
  4371   u8 parentCell[64];                /* Space for the new divider cell */
       
  4372 
       
  4373   /* Allocate a new page. Insert the overflow cell from pPage
       
  4374   ** into it. Then remove the overflow cell from pPage.
       
  4375   */
       
  4376   rc = allocatePage(pBt, &pNew, &pgnoNew, 0, 0);
       
  4377   if( rc!=SQLITE_OK ){
       
  4378     return rc;
       
  4379   }
       
  4380   pCell = pPage->aOvfl[0].pCell;
       
  4381   szCell = cellSizePtr(pPage, pCell);
       
  4382   zeroPage(pNew, pPage->aData[0]);
       
  4383   assemblePage(pNew, 1, &pCell, &szCell);
       
  4384   pPage->nOverflow = 0;
       
  4385 
       
  4386   /* Set the parent of the newly allocated page to pParent. */
       
  4387   pNew->pParent = pParent;
       
  4388   sqlite3pager_ref(pParent->aData);
       
  4389 
       
  4390   /* pPage is currently the right-child of pParent. Change this
       
  4391   ** so that the right-child is the new page allocated above and
       
  4392   ** pPage is the next-to-right child. 
       
  4393   */
       
  4394   assert( pPage->nCell>0 );
       
  4395   parseCellPtr(pPage, findCell(pPage, pPage->nCell-1), &info);
       
  4396   rc = fillInCell(pParent, parentCell, 0, info.nKey, 0, 0, &parentSize);
       
  4397   if( rc!=SQLITE_OK ){
       
  4398     return rc;
       
  4399   }
       
  4400   assert( parentSize<64 );
       
  4401   rc = insertCell(pParent, parentIdx, parentCell, parentSize, 0, 4);
       
  4402   if( rc!=SQLITE_OK ){
       
  4403     return rc;
       
  4404   }
       
  4405   put4byte(findOverflowCell(pParent,parentIdx), pPage->pgno);
       
  4406   put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew);
       
  4407 
       
  4408 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  4409   /* If this is an auto-vacuum database, update the pointer map
       
  4410   ** with entries for the new page, and any pointer from the 
       
  4411   ** cell on the page to an overflow page.
       
  4412   */
       
  4413   if( pBt->autoVacuum ){
       
  4414     rc = ptrmapPut(pBt, pgnoNew, PTRMAP_BTREE, pParent->pgno);
       
  4415     if( rc!=SQLITE_OK ){
       
  4416       return rc;
       
  4417     }
       
  4418     rc = ptrmapPutOvfl(pNew, 0);
       
  4419     if( rc!=SQLITE_OK ){
       
  4420       return rc;
       
  4421     }
       
  4422   }
       
  4423 #endif
       
  4424 
       
  4425   /* Release the reference to the new page and balance the parent page,
       
  4426   ** in case the divider cell inserted caused it to become overfull.
       
  4427   */
       
  4428   releasePage(pNew);
       
  4429   return balance(pParent, 0);
       
  4430 }
       
  4431 #endif /* SQLITE_OMIT_QUICKBALANCE */
       
  4432 
       
  4433 /*
       
  4434 ** The ISAUTOVACUUM macro is used within balance_nonroot() to determine
       
  4435 ** if the database supports auto-vacuum or not. Because it is used
       
  4436 ** within an expression that is an argument to another macro 
       
  4437 ** (sqliteMallocRaw), it is not possible to use conditional compilation.
       
  4438 ** So, this macro is defined instead.
       
  4439 */
       
  4440 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  4441 #define ISAUTOVACUUM (pBt->autoVacuum)
       
  4442 #else
       
  4443 #define ISAUTOVACUUM 0
       
  4444 #endif
       
  4445 
       
  4446 /*
       
  4447 ** This routine redistributes Cells on pPage and up to NN*2 siblings
       
  4448 ** of pPage so that all pages have about the same amount of free space.
       
  4449 ** Usually NN siblings on either side of pPage is used in the balancing,
       
  4450 ** though more siblings might come from one side if pPage is the first
       
  4451 ** or last child of its parent.  If pPage has fewer than 2*NN siblings
       
  4452 ** (something which can only happen if pPage is the root page or a 
       
  4453 ** child of root) then all available siblings participate in the balancing.
       
  4454 **
       
  4455 ** The number of siblings of pPage might be increased or decreased by one or
       
  4456 ** two in an effort to keep pages nearly full but not over full. The root page
       
  4457 ** is special and is allowed to be nearly empty. If pPage is 
       
  4458 ** the root page, then the depth of the tree might be increased
       
  4459 ** or decreased by one, as necessary, to keep the root page from being
       
  4460 ** overfull or completely empty.
       
  4461 **
       
  4462 ** Note that when this routine is called, some of the Cells on pPage
       
  4463 ** might not actually be stored in pPage->aData[].  This can happen
       
  4464 ** if the page is overfull.  Part of the job of this routine is to
       
  4465 ** make sure all Cells for pPage once again fit in pPage->aData[].
       
  4466 **
       
  4467 ** In the course of balancing the siblings of pPage, the parent of pPage
       
  4468 ** might become overfull or underfull.  If that happens, then this routine
       
  4469 ** is called recursively on the parent.
       
  4470 **
       
  4471 ** If this routine fails for any reason, it might leave the database
       
  4472 ** in a corrupted state.  So if this routine fails, the database should
       
  4473 ** be rolled back.
       
  4474 */
       
  4475 static int balance_nonroot(MemPage *pPage){
       
  4476   MemPage *pParent;            /* The parent of pPage */
       
  4477   BtShared *pBt;                  /* The whole database */
       
  4478   int nCell = 0;               /* Number of cells in apCell[] */
       
  4479   int nMaxCells = 0;           /* Allocated size of apCell, szCell, aFrom. */
       
  4480   int nOld;                    /* Number of pages in apOld[] */
       
  4481   int nNew;                    /* Number of pages in apNew[] */
       
  4482   int nDiv;                    /* Number of cells in apDiv[] */
       
  4483   int i, j, k;                 /* Loop counters */
       
  4484   int idx;                     /* Index of pPage in pParent->aCell[] */
       
  4485   int nxDiv;                   /* Next divider slot in pParent->aCell[] */
       
  4486   int rc;                      /* The return code */
       
  4487   int leafCorrection;          /* 4 if pPage is a leaf.  0 if not */
       
  4488   int leafData;                /* True if pPage is a leaf of a LEAFDATA tree */
       
  4489   int usableSpace;             /* Bytes in pPage beyond the header */
       
  4490   int pageFlags;               /* Value of pPage->aData[0] */
       
  4491   int subtotal;                /* Subtotal of bytes in cells on one page */
       
  4492   int iSpace = 0;              /* First unused byte of aSpace[] */
       
  4493   MemPage *apOld[NB];          /* pPage and up to two siblings */
       
  4494   Pgno pgnoOld[NB];            /* Page numbers for each page in apOld[] */
       
  4495   MemPage *apCopy[NB];         /* Private copies of apOld[] pages */
       
  4496   MemPage *apNew[NB+2];        /* pPage and up to NB siblings after balancing */
       
  4497   Pgno pgnoNew[NB+2];          /* Page numbers for each page in apNew[] */
       
  4498   u8 *apDiv[NB];               /* Divider cells in pParent */
       
  4499   int cntNew[NB+2];            /* Index in aCell[] of cell after i-th page */
       
  4500   int szNew[NB+2];             /* Combined size of cells place on i-th page */
       
  4501   u8 **apCell = 0;             /* All cells begin balanced */
       
  4502   int *szCell;                 /* Local size of all cells in apCell[] */
       
  4503   u8 *aCopy[NB];               /* Space for holding data of apCopy[] */
       
  4504   u8 *aSpace;                  /* Space to hold copies of dividers cells */
       
  4505 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  4506   u8 *aFrom = 0;
       
  4507 #endif
       
  4508 
       
  4509   /* 
       
  4510   ** Find the parent page.
       
  4511   */
       
  4512   assert( pPage->isInit );
       
  4513   assert( sqlite3pager_iswriteable(pPage->aData) );
       
  4514   pBt = pPage->pBt;
       
  4515   pParent = pPage->pParent;
       
  4516   assert( pParent );
       
  4517   if( SQLITE_OK!=(rc = sqlite3pager_write(pParent->aData)) ){
       
  4518     return rc;
       
  4519   }
       
  4520   TRACE(("BALANCE: begin page %d child of %d\n", pPage->pgno, pParent->pgno));
       
  4521 
       
  4522 #ifndef SQLITE_OMIT_QUICKBALANCE
       
  4523   /*
       
  4524   ** A special case:  If a new entry has just been inserted into a
       
  4525   ** table (that is, a btree with integer keys and all data at the leaves)
       
  4526   ** and the new entry is the right-most entry in the tree (it has the
       
  4527   ** largest key) then use the special balance_quick() routine for
       
  4528   ** balancing.  balance_quick() is much faster and results in a tighter
       
  4529   ** packing of data in the common case.
       
  4530   */
       
  4531   if( pPage->leaf &&
       
  4532       pPage->intKey &&
       
  4533       pPage->leafData &&
       
  4534       pPage->nOverflow==1 &&
       
  4535       pPage->aOvfl[0].idx==pPage->nCell &&
       
  4536       pPage->pParent->pgno!=1 &&
       
  4537       get4byte(&pParent->aData[pParent->hdrOffset+8])==pPage->pgno
       
  4538   ){
       
  4539     /*
       
  4540     ** TODO: Check the siblings to the left of pPage. It may be that
       
  4541     ** they are not full and no new page is required.
       
  4542     */
       
  4543     return balance_quick(pPage, pParent);
       
  4544   }
       
  4545 #endif
       
  4546 
       
  4547   /*
       
  4548   ** Find the cell in the parent page whose left child points back
       
  4549   ** to pPage.  The "idx" variable is the index of that cell.  If pPage
       
  4550   ** is the rightmost child of pParent then set idx to pParent->nCell 
       
  4551   */
       
  4552   if( pParent->idxShift ){
       
  4553     Pgno pgno;
       
  4554     pgno = pPage->pgno;
       
  4555     assert( pgno==sqlite3pager_pagenumber(pPage->aData) );
       
  4556     for(idx=0; idx<pParent->nCell; idx++){
       
  4557       if( get4byte(findCell(pParent, idx))==pgno ){
       
  4558         break;
       
  4559       }
       
  4560     }
       
  4561     assert( idx<pParent->nCell
       
  4562              || get4byte(&pParent->aData[pParent->hdrOffset+8])==pgno );
       
  4563   }else{
       
  4564     idx = pPage->idxParent;
       
  4565   }
       
  4566 
       
  4567   /*
       
  4568   ** Initialize variables so that it will be safe to jump
       
  4569   ** directly to balance_cleanup at any moment.
       
  4570   */
       
  4571   nOld = nNew = 0;
       
  4572   sqlite3pager_ref(pParent->aData);
       
  4573 
       
  4574   /*
       
  4575   ** Find sibling pages to pPage and the cells in pParent that divide
       
  4576   ** the siblings.  An attempt is made to find NN siblings on either
       
  4577   ** side of pPage.  More siblings are taken from one side, however, if
       
  4578   ** pPage there are fewer than NN siblings on the other side.  If pParent
       
  4579   ** has NB or fewer children then all children of pParent are taken.
       
  4580   */
       
  4581   nxDiv = idx - NN;
       
  4582   if( nxDiv + NB > pParent->nCell ){
       
  4583     nxDiv = pParent->nCell - NB + 1;
       
  4584   }
       
  4585   if( nxDiv<0 ){
       
  4586     nxDiv = 0;
       
  4587   }
       
  4588   nDiv = 0;
       
  4589   for(i=0, k=nxDiv; i<NB; i++, k++){
       
  4590     if( k<pParent->nCell ){
       
  4591       apDiv[i] = findCell(pParent, k);
       
  4592       nDiv++;
       
  4593       assert( !pParent->leaf );
       
  4594       pgnoOld[i] = get4byte(apDiv[i]);
       
  4595     }else if( k==pParent->nCell ){
       
  4596       pgnoOld[i] = get4byte(&pParent->aData[pParent->hdrOffset+8]);
       
  4597     }else{
       
  4598       break;
       
  4599     }
       
  4600     rc = getAndInitPage(pBt, pgnoOld[i], &apOld[i], pParent);
       
  4601     if( rc ) goto balance_cleanup;
       
  4602     apOld[i]->idxParent = k;
       
  4603     apCopy[i] = 0;
       
  4604     assert( i==nOld );
       
  4605     nOld++;
       
  4606     nMaxCells += 1+apOld[i]->nCell+apOld[i]->nOverflow;
       
  4607   }
       
  4608 
       
  4609   /* Make nMaxCells a multiple of 2 in order to preserve 8-byte
       
  4610   ** alignment */
       
  4611   nMaxCells = (nMaxCells + 1)&~1;
       
  4612 
       
  4613   /*
       
  4614   ** Allocate space for memory structures
       
  4615   */
       
  4616   apCell = sqliteMallocRaw( 
       
  4617        nMaxCells*sizeof(u8*)                           /* apCell */
       
  4618      + nMaxCells*sizeof(int)                           /* szCell */
       
  4619      + ROUND8(sizeof(MemPage))*NB                      /* aCopy */
       
  4620      + pBt->pageSize*(5+NB)                            /* aSpace */
       
  4621      + (ISAUTOVACUUM ? nMaxCells : 0)                  /* aFrom */
       
  4622   );
       
  4623   if( apCell==0 ){
       
  4624     rc = SQLITE_NOMEM;
       
  4625     goto balance_cleanup;
       
  4626   }
       
  4627   szCell = (int*)&apCell[nMaxCells];
       
  4628   aCopy[0] = (u8*)&szCell[nMaxCells];
       
  4629   assert( ((aCopy[0] - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
       
  4630   for(i=1; i<NB; i++){
       
  4631     aCopy[i] = &aCopy[i-1][pBt->pageSize+ROUND8(sizeof(MemPage))];
       
  4632     assert( ((aCopy[i] - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
       
  4633   }
       
  4634   aSpace = &aCopy[NB-1][pBt->pageSize+ROUND8(sizeof(MemPage))];
       
  4635   assert( ((aSpace - (u8*)apCell) & 7)==0 ); /* 8-byte alignment required */
       
  4636 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  4637   if( pBt->autoVacuum ){
       
  4638     aFrom = &aSpace[5*pBt->pageSize];
       
  4639   }
       
  4640 #endif
       
  4641   
       
  4642   /*
       
  4643   ** Make copies of the content of pPage and its siblings into aOld[].
       
  4644   ** The rest of this function will use data from the copies rather
       
  4645   ** that the original pages since the original pages will be in the
       
  4646   ** process of being overwritten.
       
  4647   */
       
  4648   for(i=0; i<nOld; i++){
       
  4649     MemPage *p = apCopy[i] = (MemPage*)&aCopy[i][pBt->pageSize];
       
  4650     p->aData = &((u8*)p)[-pBt->pageSize];
       
  4651     memcpy(p->aData, apOld[i]->aData, pBt->pageSize + sizeof(MemPage));
       
  4652     /* The memcpy() above changes the value of p->aData so we have to
       
  4653     ** set it again. */
       
  4654     p->aData = &((u8*)p)[-pBt->pageSize];
       
  4655   }
       
  4656 
       
  4657   /*
       
  4658   ** Load pointers to all cells on sibling pages and the divider cells
       
  4659   ** into the local apCell[] array.  Make copies of the divider cells
       
  4660   ** into space obtained form aSpace[] and remove the the divider Cells
       
  4661   ** from pParent.
       
  4662   **
       
  4663   ** If the siblings are on leaf pages, then the child pointers of the
       
  4664   ** divider cells are stripped from the cells before they are copied
       
  4665   ** into aSpace[].  In this way, all cells in apCell[] are without
       
  4666   ** child pointers.  If siblings are not leaves, then all cell in
       
  4667   ** apCell[] include child pointers.  Either way, all cells in apCell[]
       
  4668   ** are alike.
       
  4669   **
       
  4670   ** leafCorrection:  4 if pPage is a leaf.  0 if pPage is not a leaf.
       
  4671   **       leafData:  1 if pPage holds key+data and pParent holds only keys.
       
  4672   */
       
  4673   nCell = 0;
       
  4674   leafCorrection = pPage->leaf*4;
       
  4675   leafData = pPage->leafData && pPage->leaf;
       
  4676   for(i=0; i<nOld; i++){
       
  4677     MemPage *pOld = apCopy[i];
       
  4678     int limit = pOld->nCell+pOld->nOverflow;
       
  4679     for(j=0; j<limit; j++){
       
  4680       assert( nCell<nMaxCells );
       
  4681       apCell[nCell] = findOverflowCell(pOld, j);
       
  4682       szCell[nCell] = cellSizePtr(pOld, apCell[nCell]);
       
  4683 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  4684       if( pBt->autoVacuum ){
       
  4685         int a;
       
  4686         aFrom[nCell] = i;
       
  4687         for(a=0; a<pOld->nOverflow; a++){
       
  4688           if( pOld->aOvfl[a].pCell==apCell[nCell] ){
       
  4689             aFrom[nCell] = 0xFF;
       
  4690             break;
       
  4691           }
       
  4692         }
       
  4693       }
       
  4694 #endif
       
  4695       nCell++;
       
  4696     }
       
  4697     if( i<nOld-1 ){
       
  4698       int sz = cellSizePtr(pParent, apDiv[i]);
       
  4699       if( leafData ){
       
  4700         /* With the LEAFDATA flag, pParent cells hold only INTKEYs that
       
  4701         ** are duplicates of keys on the child pages.  We need to remove
       
  4702         ** the divider cells from pParent, but the dividers cells are not
       
  4703         ** added to apCell[] because they are duplicates of child cells.
       
  4704         */
       
  4705         dropCell(pParent, nxDiv, sz);
       
  4706       }else{
       
  4707         u8 *pTemp;
       
  4708         assert( nCell<nMaxCells );
       
  4709         szCell[nCell] = sz;
       
  4710         pTemp = &aSpace[iSpace];
       
  4711         iSpace += sz;
       
  4712         assert( iSpace<=pBt->pageSize*5 );
       
  4713         memcpy(pTemp, apDiv[i], sz);
       
  4714         apCell[nCell] = pTemp+leafCorrection;
       
  4715 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  4716         if( pBt->autoVacuum ){
       
  4717           aFrom[nCell] = 0xFF;
       
  4718         }
       
  4719 #endif
       
  4720         dropCell(pParent, nxDiv, sz);
       
  4721         szCell[nCell] -= leafCorrection;
       
  4722         assert( get4byte(pTemp)==pgnoOld[i] );
       
  4723         if( !pOld->leaf ){
       
  4724           assert( leafCorrection==0 );
       
  4725           /* The right pointer of the child page pOld becomes the left
       
  4726           ** pointer of the divider cell */
       
  4727           memcpy(apCell[nCell], &pOld->aData[pOld->hdrOffset+8], 4);
       
  4728         }else{
       
  4729           assert( leafCorrection==4 );
       
  4730         }
       
  4731         nCell++;
       
  4732       }
       
  4733     }
       
  4734   }
       
  4735 
       
  4736   /*
       
  4737   ** Figure out the number of pages needed to hold all nCell cells.
       
  4738   ** Store this number in "k".  Also compute szNew[] which is the total
       
  4739   ** size of all cells on the i-th page and cntNew[] which is the index
       
  4740   ** in apCell[] of the cell that divides page i from page i+1.  
       
  4741   ** cntNew[k] should equal nCell.
       
  4742   **
       
  4743   ** Values computed by this block:
       
  4744   **
       
  4745   **           k: The total number of sibling pages
       
  4746   **    szNew[i]: Spaced used on the i-th sibling page.
       
  4747   **   cntNew[i]: Index in apCell[] and szCell[] for the first cell to
       
  4748   **              the right of the i-th sibling page.
       
  4749   ** usableSpace: Number of bytes of space available on each sibling.
       
  4750   ** 
       
  4751   */
       
  4752   usableSpace = pBt->usableSize - 12 + leafCorrection;
       
  4753   for(subtotal=k=i=0; i<nCell; i++){
       
  4754     assert( i<nMaxCells );
       
  4755     subtotal += szCell[i] + 2;
       
  4756     if( subtotal > usableSpace ){
       
  4757       szNew[k] = subtotal - szCell[i];
       
  4758       cntNew[k] = i;
       
  4759       if( leafData ){ i--; }
       
  4760       subtotal = 0;
       
  4761       k++;
       
  4762     }
       
  4763   }
       
  4764   szNew[k] = subtotal;
       
  4765   cntNew[k] = nCell;
       
  4766   k++;
       
  4767 
       
  4768   /*
       
  4769   ** The packing computed by the previous block is biased toward the siblings
       
  4770   ** on the left side.  The left siblings are always nearly full, while the
       
  4771   ** right-most sibling might be nearly empty.  This block of code attempts
       
  4772   ** to adjust the packing of siblings to get a better balance.
       
  4773   **
       
  4774   ** This adjustment is more than an optimization.  The packing above might
       
  4775   ** be so out of balance as to be illegal.  For example, the right-most
       
  4776   ** sibling might be completely empty.  This adjustment is not optional.
       
  4777   */
       
  4778   for(i=k-1; i>0; i--){
       
  4779     int szRight = szNew[i];  /* Size of sibling on the right */
       
  4780     int szLeft = szNew[i-1]; /* Size of sibling on the left */
       
  4781     int r;              /* Index of right-most cell in left sibling */
       
  4782     int d;              /* Index of first cell to the left of right sibling */
       
  4783 
       
  4784     r = cntNew[i-1] - 1;
       
  4785     d = r + 1 - leafData;
       
  4786     assert( d<nMaxCells );
       
  4787     assert( r<nMaxCells );
       
  4788     while( szRight==0 || szRight+szCell[d]+2<=szLeft-(szCell[r]+2) ){
       
  4789       szRight += szCell[d] + 2;
       
  4790       szLeft -= szCell[r] + 2;
       
  4791       cntNew[i-1]--;
       
  4792       r = cntNew[i-1] - 1;
       
  4793       d = r + 1 - leafData;
       
  4794     }
       
  4795     szNew[i] = szRight;
       
  4796     szNew[i-1] = szLeft;
       
  4797   }
       
  4798 
       
  4799   /* Either we found one or more cells (cntnew[0])>0) or we are the
       
  4800   ** a virtual root page.  A virtual root page is when the real root
       
  4801   ** page is page 1 and we are the only child of that page.
       
  4802   */
       
  4803   assert( cntNew[0]>0 || (pParent->pgno==1 && pParent->nCell==0) );
       
  4804 
       
  4805   /*
       
  4806   ** Allocate k new pages.  Reuse old pages where possible.
       
  4807   */
       
  4808   assert( pPage->pgno>1 );
       
  4809   pageFlags = pPage->aData[0];
       
  4810   for(i=0; i<k; i++){
       
  4811     MemPage *pNew;
       
  4812     if( i<nOld ){
       
  4813       pNew = apNew[i] = apOld[i];
       
  4814       pgnoNew[i] = pgnoOld[i];
       
  4815       apOld[i] = 0;
       
  4816       rc = sqlite3pager_write(pNew->aData);
       
  4817       if( rc ) goto balance_cleanup;
       
  4818     }else{
       
  4819       assert( i>0 );
       
  4820       rc = allocatePage(pBt, &pNew, &pgnoNew[i], pgnoNew[i-1], 0);
       
  4821       if( rc ) goto balance_cleanup;
       
  4822       apNew[i] = pNew;
       
  4823     }
       
  4824     nNew++;
       
  4825     zeroPage(pNew, pageFlags);
       
  4826   }
       
  4827 
       
  4828   /* Free any old pages that were not reused as new pages.
       
  4829   */
       
  4830   while( i<nOld ){
       
  4831     rc = freePage(apOld[i]);
       
  4832     if( rc ) goto balance_cleanup;
       
  4833     releasePage(apOld[i]);
       
  4834     apOld[i] = 0;
       
  4835     i++;
       
  4836   }
       
  4837 
       
  4838   /*
       
  4839   ** Put the new pages in accending order.  This helps to
       
  4840   ** keep entries in the disk file in order so that a scan
       
  4841   ** of the table is a linear scan through the file.  That
       
  4842   ** in turn helps the operating system to deliver pages
       
  4843   ** from the disk more rapidly.
       
  4844   **
       
  4845   ** An O(n^2) insertion sort algorithm is used, but since
       
  4846   ** n is never more than NB (a small constant), that should
       
  4847   ** not be a problem.
       
  4848   **
       
  4849   ** When NB==3, this one optimization makes the database
       
  4850   ** about 25% faster for large insertions and deletions.
       
  4851   */
       
  4852   for(i=0; i<k-1; i++){
       
  4853     int minV = pgnoNew[i];
       
  4854     int minI = i;
       
  4855     for(j=i+1; j<k; j++){
       
  4856       if( pgnoNew[j]<(unsigned)minV ){
       
  4857         minI = j;
       
  4858         minV = pgnoNew[j];
       
  4859       }
       
  4860     }
       
  4861     if( minI>i ){
       
  4862       int t;
       
  4863       MemPage *pT;
       
  4864       t = pgnoNew[i];
       
  4865       pT = apNew[i];
       
  4866       pgnoNew[i] = pgnoNew[minI];
       
  4867       apNew[i] = apNew[minI];
       
  4868       pgnoNew[minI] = t;
       
  4869       apNew[minI] = pT;
       
  4870     }
       
  4871   }
       
  4872   TRACE(("BALANCE: old: %d %d %d  new: %d(%d) %d(%d) %d(%d) %d(%d) %d(%d)\n",
       
  4873     pgnoOld[0], 
       
  4874     nOld>=2 ? pgnoOld[1] : 0,
       
  4875     nOld>=3 ? pgnoOld[2] : 0,
       
  4876     pgnoNew[0], szNew[0],
       
  4877     nNew>=2 ? pgnoNew[1] : 0, nNew>=2 ? szNew[1] : 0,
       
  4878     nNew>=3 ? pgnoNew[2] : 0, nNew>=3 ? szNew[2] : 0,
       
  4879     nNew>=4 ? pgnoNew[3] : 0, nNew>=4 ? szNew[3] : 0,
       
  4880     nNew>=5 ? pgnoNew[4] : 0, nNew>=5 ? szNew[4] : 0));
       
  4881 
       
  4882   /*
       
  4883   ** Evenly distribute the data in apCell[] across the new pages.
       
  4884   ** Insert divider cells into pParent as necessary.
       
  4885   */
       
  4886   j = 0;
       
  4887   for(i=0; i<nNew; i++){
       
  4888     /* Assemble the new sibling page. */
       
  4889     MemPage *pNew = apNew[i];
       
  4890     assert( j<nMaxCells );
       
  4891     assert( pNew->pgno==pgnoNew[i] );
       
  4892     assemblePage(pNew, cntNew[i]-j, &apCell[j], &szCell[j]);
       
  4893     assert( pNew->nCell>0 || (nNew==1 && cntNew[0]==0) );
       
  4894     assert( pNew->nOverflow==0 );
       
  4895 
       
  4896 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  4897     /* If this is an auto-vacuum database, update the pointer map entries
       
  4898     ** that point to the siblings that were rearranged. These can be: left
       
  4899     ** children of cells, the right-child of the page, or overflow pages
       
  4900     ** pointed to by cells.
       
  4901     */
       
  4902     if( pBt->autoVacuum ){
       
  4903       for(k=j; k<cntNew[i]; k++){
       
  4904         assert( k<nMaxCells );
       
  4905         if( aFrom[k]==0xFF || apCopy[aFrom[k]]->pgno!=pNew->pgno ){
       
  4906           rc = ptrmapPutOvfl(pNew, k-j);
       
  4907           if( rc!=SQLITE_OK ){
       
  4908             goto balance_cleanup;
       
  4909           }
       
  4910         }
       
  4911       }
       
  4912     }
       
  4913 #endif
       
  4914 
       
  4915     j = cntNew[i];
       
  4916 
       
  4917     /* If the sibling page assembled above was not the right-most sibling,
       
  4918     ** insert a divider cell into the parent page.
       
  4919     */
       
  4920     if( i<nNew-1 && j<nCell ){
       
  4921       u8 *pCell;
       
  4922       u8 *pTemp;
       
  4923       int sz;
       
  4924 
       
  4925       assert( j<nMaxCells );
       
  4926       pCell = apCell[j];
       
  4927       sz = szCell[j] + leafCorrection;
       
  4928       if( !pNew->leaf ){
       
  4929         memcpy(&pNew->aData[8], pCell, 4);
       
  4930         pTemp = 0;
       
  4931       }else if( leafData ){
       
  4932 	/* If the tree is a leaf-data tree, and the siblings are leaves, 
       
  4933         ** then there is no divider cell in apCell[]. Instead, the divider 
       
  4934         ** cell consists of the integer key for the right-most cell of 
       
  4935         ** the sibling-page assembled above only.
       
  4936         */
       
  4937         CellInfo info;
       
  4938         j--;
       
  4939         parseCellPtr(pNew, apCell[j], &info);
       
  4940         pCell = &aSpace[iSpace];
       
  4941         fillInCell(pParent, pCell, 0, info.nKey, 0, 0, &sz);
       
  4942         iSpace += sz;
       
  4943         assert( iSpace<=pBt->pageSize*5 );
       
  4944         pTemp = 0;
       
  4945       }else{
       
  4946         pCell -= 4;
       
  4947         pTemp = &aSpace[iSpace];
       
  4948         iSpace += sz;
       
  4949         assert( iSpace<=pBt->pageSize*5 );
       
  4950       }
       
  4951       rc = insertCell(pParent, nxDiv, pCell, sz, pTemp, 4);
       
  4952       if( rc!=SQLITE_OK ) goto balance_cleanup;
       
  4953       put4byte(findOverflowCell(pParent,nxDiv), pNew->pgno);
       
  4954 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  4955       /* If this is an auto-vacuum database, and not a leaf-data tree,
       
  4956       ** then update the pointer map with an entry for the overflow page
       
  4957       ** that the cell just inserted points to (if any).
       
  4958       */
       
  4959       if( pBt->autoVacuum && !leafData ){
       
  4960         rc = ptrmapPutOvfl(pParent, nxDiv);
       
  4961         if( rc!=SQLITE_OK ){
       
  4962           goto balance_cleanup;
       
  4963         }
       
  4964       }
       
  4965 #endif
       
  4966       j++;
       
  4967       nxDiv++;
       
  4968     }
       
  4969   }
       
  4970   assert( j==nCell );
       
  4971   assert( nOld>0 );
       
  4972   assert( nNew>0 );
       
  4973   if( (pageFlags & PTF_LEAF)==0 ){
       
  4974     memcpy(&apNew[nNew-1]->aData[8], &apCopy[nOld-1]->aData[8], 4);
       
  4975   }
       
  4976   if( nxDiv==pParent->nCell+pParent->nOverflow ){
       
  4977     /* Right-most sibling is the right-most child of pParent */
       
  4978     put4byte(&pParent->aData[pParent->hdrOffset+8], pgnoNew[nNew-1]);
       
  4979   }else{
       
  4980     /* Right-most sibling is the left child of the first entry in pParent
       
  4981     ** past the right-most divider entry */
       
  4982     put4byte(findOverflowCell(pParent, nxDiv), pgnoNew[nNew-1]);
       
  4983   }
       
  4984 
       
  4985   /*
       
  4986   ** Reparent children of all cells.
       
  4987   */
       
  4988   for(i=0; i<nNew; i++){
       
  4989     rc = reparentChildPages(apNew[i]);
       
  4990     if( rc!=SQLITE_OK ) goto balance_cleanup;
       
  4991   }
       
  4992   rc = reparentChildPages(pParent);
       
  4993   if( rc!=SQLITE_OK ) goto balance_cleanup;
       
  4994 
       
  4995   /*
       
  4996   ** Balance the parent page.  Note that the current page (pPage) might
       
  4997   ** have been added to the freelist so it might no longer be initialized.
       
  4998   ** But the parent page will always be initialized.
       
  4999   */
       
  5000   assert( pParent->isInit );
       
  5001   /* assert( pPage->isInit ); // No! pPage might have been added to freelist */
       
  5002   /* pageIntegrity(pPage);    // No! pPage might have been added to freelist */ 
       
  5003   rc = balance(pParent, 0);
       
  5004   
       
  5005   /*
       
  5006   ** Cleanup before returning.
       
  5007   */
       
  5008 balance_cleanup:
       
  5009   sqliteFree(apCell);
       
  5010   for(i=0; i<nOld; i++){
       
  5011     releasePage(apOld[i]);
       
  5012   }
       
  5013   for(i=0; i<nNew; i++){
       
  5014     releasePage(apNew[i]);
       
  5015   }
       
  5016   releasePage(pParent);
       
  5017   TRACE(("BALANCE: finished with %d: old=%d new=%d cells=%d\n",
       
  5018           pPage->pgno, nOld, nNew, nCell));
       
  5019   return rc;
       
  5020 }
       
  5021 
       
  5022 /*
       
  5023 ** This routine is called for the root page of a btree when the root
       
  5024 ** page contains no cells.  This is an opportunity to make the tree
       
  5025 ** shallower by one level.
       
  5026 */
       
  5027 static int balance_shallower(MemPage *pPage){
       
  5028   MemPage *pChild;             /* The only child page of pPage */
       
  5029   Pgno pgnoChild;              /* Page number for pChild */
       
  5030   int rc = SQLITE_OK;          /* Return code from subprocedures */
       
  5031   BtShared *pBt;                  /* The main BTree structure */
       
  5032   int mxCellPerPage;           /* Maximum number of cells per page */
       
  5033   u8 **apCell;                 /* All cells from pages being balanced */
       
  5034   int *szCell;                 /* Local size of all cells */
       
  5035 
       
  5036   assert( pPage->pParent==0 );
       
  5037   assert( pPage->nCell==0 );
       
  5038   pBt = pPage->pBt;
       
  5039   mxCellPerPage = MX_CELL(pBt);
       
  5040   apCell = sqliteMallocRaw( mxCellPerPage*(sizeof(u8*)+sizeof(int)) );
       
  5041   if( apCell==0 ) return SQLITE_NOMEM;
       
  5042   szCell = (int*)&apCell[mxCellPerPage];
       
  5043   if( pPage->leaf ){
       
  5044     /* The table is completely empty */
       
  5045     TRACE(("BALANCE: empty table %d\n", pPage->pgno));
       
  5046   }else{
       
  5047     /* The root page is empty but has one child.  Transfer the
       
  5048     ** information from that one child into the root page if it 
       
  5049     ** will fit.  This reduces the depth of the tree by one.
       
  5050     **
       
  5051     ** If the root page is page 1, it has less space available than
       
  5052     ** its child (due to the 100 byte header that occurs at the beginning
       
  5053     ** of the database fle), so it might not be able to hold all of the 
       
  5054     ** information currently contained in the child.  If this is the 
       
  5055     ** case, then do not do the transfer.  Leave page 1 empty except
       
  5056     ** for the right-pointer to the child page.  The child page becomes
       
  5057     ** the virtual root of the tree.
       
  5058     */
       
  5059     pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]);
       
  5060     assert( pgnoChild>0 );
       
  5061     assert( pgnoChild<=sqlite3pager_pagecount(pPage->pBt->pPager) );
       
  5062     rc = getPage(pPage->pBt, pgnoChild, &pChild);
       
  5063     if( rc ) goto end_shallow_balance;
       
  5064     if( pPage->pgno==1 ){
       
  5065       rc = initPage(pChild, pPage);
       
  5066       if( rc ) goto end_shallow_balance;
       
  5067       assert( pChild->nOverflow==0 );
       
  5068       if( pChild->nFree>=100 ){
       
  5069         /* The child information will fit on the root page, so do the
       
  5070         ** copy */
       
  5071         int i;
       
  5072         zeroPage(pPage, pChild->aData[0]);
       
  5073         for(i=0; i<pChild->nCell; i++){
       
  5074           apCell[i] = findCell(pChild,i);
       
  5075           szCell[i] = cellSizePtr(pChild, apCell[i]);
       
  5076         }
       
  5077         assemblePage(pPage, pChild->nCell, apCell, szCell);
       
  5078         /* Copy the right-pointer of the child to the parent. */
       
  5079         put4byte(&pPage->aData[pPage->hdrOffset+8], 
       
  5080             get4byte(&pChild->aData[pChild->hdrOffset+8]));
       
  5081         freePage(pChild);
       
  5082         TRACE(("BALANCE: child %d transfer to page 1\n", pChild->pgno));
       
  5083       }else{
       
  5084         /* The child has more information that will fit on the root.
       
  5085         ** The tree is already balanced.  Do nothing. */
       
  5086         TRACE(("BALANCE: child %d will not fit on page 1\n", pChild->pgno));
       
  5087       }
       
  5088     }else{
       
  5089       memcpy(pPage->aData, pChild->aData, pPage->pBt->usableSize);
       
  5090       pPage->isInit = 0;
       
  5091       pPage->pParent = 0;
       
  5092       rc = initPage(pPage, 0);
       
  5093       assert( rc==SQLITE_OK );
       
  5094       freePage(pChild);
       
  5095       TRACE(("BALANCE: transfer child %d into root %d\n",
       
  5096               pChild->pgno, pPage->pgno));
       
  5097     }
       
  5098     rc = reparentChildPages(pPage);
       
  5099     assert( pPage->nOverflow==0 );
       
  5100 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  5101     if( pBt->autoVacuum ){
       
  5102       int i;
       
  5103       for(i=0; i<pPage->nCell; i++){ 
       
  5104         rc = ptrmapPutOvfl(pPage, i);
       
  5105         if( rc!=SQLITE_OK ){
       
  5106           goto end_shallow_balance;
       
  5107         }
       
  5108       }
       
  5109     }
       
  5110 #endif
       
  5111     if( rc!=SQLITE_OK ) goto end_shallow_balance;
       
  5112     releasePage(pChild);
       
  5113   }
       
  5114 end_shallow_balance:
       
  5115   sqliteFree(apCell);
       
  5116   return rc;
       
  5117 }
       
  5118 
       
  5119 
       
  5120 /*
       
  5121 ** The root page is overfull
       
  5122 **
       
  5123 ** When this happens, Create a new child page and copy the
       
  5124 ** contents of the root into the child.  Then make the root
       
  5125 ** page an empty page with rightChild pointing to the new
       
  5126 ** child.   Finally, call balance_internal() on the new child
       
  5127 ** to cause it to split.
       
  5128 */
       
  5129 static int balance_deeper(MemPage *pPage){
       
  5130   int rc;             /* Return value from subprocedures */
       
  5131   MemPage *pChild;    /* Pointer to a new child page */
       
  5132   Pgno pgnoChild;     /* Page number of the new child page */
       
  5133   BtShared *pBt;         /* The BTree */
       
  5134   int usableSize;     /* Total usable size of a page */
       
  5135   u8 *data;           /* Content of the parent page */
       
  5136   u8 *cdata;          /* Content of the child page */
       
  5137   int hdr;            /* Offset to page header in parent */
       
  5138   int brk;            /* Offset to content of first cell in parent */
       
  5139 
       
  5140   assert( pPage->pParent==0 );
       
  5141   assert( pPage->nOverflow>0 );
       
  5142   pBt = pPage->pBt;
       
  5143   rc = allocatePage(pBt, &pChild, &pgnoChild, pPage->pgno, 0);
       
  5144   if( rc ) return rc;
       
  5145   assert( sqlite3pager_iswriteable(pChild->aData) );
       
  5146   usableSize = pBt->usableSize;
       
  5147   data = pPage->aData;
       
  5148   hdr = pPage->hdrOffset;
       
  5149   brk = get2byte(&data[hdr+5]);
       
  5150   cdata = pChild->aData;
       
  5151   memcpy(cdata, &data[hdr], pPage->cellOffset+2*pPage->nCell-hdr);
       
  5152   memcpy(&cdata[brk], &data[brk], usableSize-brk);
       
  5153   assert( pChild->isInit==0 );
       
  5154   rc = initPage(pChild, pPage);
       
  5155   if( rc ) goto balancedeeper_out;
       
  5156   memcpy(pChild->aOvfl, pPage->aOvfl, pPage->nOverflow*sizeof(pPage->aOvfl[0]));
       
  5157   pChild->nOverflow = pPage->nOverflow;
       
  5158   if( pChild->nOverflow ){
       
  5159     pChild->nFree = 0;
       
  5160   }
       
  5161   assert( pChild->nCell==pPage->nCell );
       
  5162   zeroPage(pPage, pChild->aData[0] & ~PTF_LEAF);
       
  5163   put4byte(&pPage->aData[pPage->hdrOffset+8], pgnoChild);
       
  5164   TRACE(("BALANCE: copy root %d into %d\n", pPage->pgno, pChild->pgno));
       
  5165 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  5166   if( pBt->autoVacuum ){
       
  5167     int i;
       
  5168     rc = ptrmapPut(pBt, pChild->pgno, PTRMAP_BTREE, pPage->pgno);
       
  5169     if( rc ) goto balancedeeper_out;
       
  5170     for(i=0; i<pChild->nCell; i++){
       
  5171       rc = ptrmapPutOvfl(pChild, i);
       
  5172       if( rc!=SQLITE_OK ){
       
  5173         return rc;
       
  5174       }
       
  5175     }
       
  5176   }
       
  5177 #endif
       
  5178   rc = balance_nonroot(pChild);
       
  5179 
       
  5180 balancedeeper_out:
       
  5181   releasePage(pChild);
       
  5182   return rc;
       
  5183 }
       
  5184 
       
  5185 /*
       
  5186 ** Decide if the page pPage needs to be balanced.  If balancing is
       
  5187 ** required, call the appropriate balancing routine.
       
  5188 */
       
  5189 static int balance(MemPage *pPage, int insert){
       
  5190   int rc = SQLITE_OK;
       
  5191   if( pPage->pParent==0 ){
       
  5192     if( pPage->nOverflow>0 ){
       
  5193       rc = balance_deeper(pPage);
       
  5194     }
       
  5195     if( rc==SQLITE_OK && pPage->nCell==0 ){
       
  5196       rc = balance_shallower(pPage);
       
  5197     }
       
  5198   }else{
       
  5199     if( pPage->nOverflow>0 || 
       
  5200         (!insert && pPage->nFree>pPage->pBt->usableSize*2/3) ){
       
  5201       rc = balance_nonroot(pPage);
       
  5202     }
       
  5203   }
       
  5204   return rc;
       
  5205 }
       
  5206 
       
  5207 /*
       
  5208 ** This routine checks all cursors that point to table pgnoRoot.
       
  5209 ** If any of those cursors were opened with wrFlag==0 in a different
       
  5210 ** database connection (a database connection that shares the pager
       
  5211 ** cache with the current connection) and that other connection 
       
  5212 ** is not in the ReadUncommmitted state, then this routine returns 
       
  5213 ** SQLITE_LOCKED.
       
  5214 **
       
  5215 ** In addition to checking for read-locks (where a read-lock 
       
  5216 ** means a cursor opened with wrFlag==0) this routine also moves
       
  5217 ** all cursors write cursors so that they are pointing to the 
       
  5218 ** first Cell on the root page.  This is necessary because an insert 
       
  5219 ** or delete might change the number of cells on a page or delete
       
  5220 ** a page entirely and we do not want to leave any cursors 
       
  5221 ** pointing to non-existant pages or cells.
       
  5222 */
       
  5223 static int checkReadLocks(Btree *pBtree, Pgno pgnoRoot, BtCursor *pExclude){
       
  5224   BtCursor *p;
       
  5225   BtShared *pBt = pBtree->pBt;
       
  5226   sqlite3 *db = pBtree->pSqlite;
       
  5227   for(p=pBt->pCursor; p; p=p->pNext){
       
  5228     if( p==pExclude ) continue;
       
  5229     if( p->eState!=CURSOR_VALID ) continue;
       
  5230     if( p->pgnoRoot!=pgnoRoot ) continue;
       
  5231     if( p->wrFlag==0 ){
       
  5232       sqlite3 *dbOther = p->pBtree->pSqlite;
       
  5233       if( dbOther==0 ||
       
  5234          (dbOther!=db && (dbOther->flags & SQLITE_ReadUncommitted)==0) ){
       
  5235         return SQLITE_LOCKED;
       
  5236       }
       
  5237     }else if( p->pPage->pgno!=p->pgnoRoot ){
       
  5238       moveToRoot(p);
       
  5239     }
       
  5240   }
       
  5241   return SQLITE_OK;
       
  5242 }
       
  5243 
       
  5244 /*
       
  5245 ** Insert a new record into the BTree.  The key is given by (pKey,nKey)
       
  5246 ** and the data is given by (pData,nData).  The cursor is used only to
       
  5247 ** define what table the record should be inserted into.  The cursor
       
  5248 ** is left pointing at a random location.
       
  5249 **
       
  5250 ** For an INTKEY table, only the nKey value of the key is used.  pKey is
       
  5251 ** ignored.  For a ZERODATA table, the pData and nData are both ignored.
       
  5252 */
       
  5253 int sqlite3BtreeInsert(
       
  5254   BtCursor *pCur,                /* Insert data into the table of this cursor */
       
  5255   const void *pKey, i64 nKey,    /* The key of the new record */
       
  5256   const void *pData, int nData   /* The data of the new record */
       
  5257 ){
       
  5258   int rc;
       
  5259   int loc;
       
  5260   int szNew;
       
  5261   MemPage *pPage;
       
  5262   BtShared *pBt = pCur->pBtree->pBt;
       
  5263   unsigned char *oldCell;
       
  5264   unsigned char *newCell = 0;
       
  5265 
       
  5266   if( pBt->inTransaction!=TRANS_WRITE ){
       
  5267     /* Must start a transaction before doing an insert */
       
  5268     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
       
  5269   }
       
  5270   assert( !pBt->readOnly );
       
  5271   if( !pCur->wrFlag ){
       
  5272     return SQLITE_PERM;   /* Cursor not open for writing */
       
  5273   }
       
  5274   if( checkReadLocks(pCur->pBtree, pCur->pgnoRoot, pCur) ){
       
  5275     return SQLITE_LOCKED; /* The table pCur points to has a read lock */
       
  5276   }
       
  5277 
       
  5278   /* Save the positions of any other cursors open on this table */
       
  5279   restoreOrClearCursorPosition(pCur, 0);
       
  5280   if( 
       
  5281     SQLITE_OK!=(rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur)) ||
       
  5282     SQLITE_OK!=(rc = sqlite3BtreeMoveto(pCur, pKey, nKey, &loc))
       
  5283   ){
       
  5284     return rc;
       
  5285   }
       
  5286 
       
  5287   pPage = pCur->pPage;
       
  5288   assert( pPage->intKey || nKey>=0 );
       
  5289   assert( pPage->leaf || !pPage->leafData );
       
  5290   TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
       
  5291           pCur->pgnoRoot, nKey, nData, pPage->pgno,
       
  5292           loc==0 ? "overwrite" : "new entry"));
       
  5293   assert( pPage->isInit );
       
  5294   rc = sqlite3pager_write(pPage->aData);
       
  5295   if( rc ) return rc;
       
  5296   newCell = sqliteMallocRaw( MX_CELL_SIZE(pBt) );
       
  5297   if( newCell==0 ) return SQLITE_NOMEM;
       
  5298   rc = fillInCell(pPage, newCell, pKey, nKey, pData, nData, &szNew);
       
  5299   if( rc ) goto end_insert;
       
  5300   assert( szNew==cellSizePtr(pPage, newCell) );
       
  5301   assert( szNew<=MX_CELL_SIZE(pBt) );
       
  5302   if( loc==0 && CURSOR_VALID==pCur->eState ){
       
  5303     int szOld;
       
  5304     assert( pCur->idx>=0 && pCur->idx<pPage->nCell );
       
  5305     oldCell = findCell(pPage, pCur->idx);
       
  5306     if( !pPage->leaf ){
       
  5307       memcpy(newCell, oldCell, 4);
       
  5308     }
       
  5309     szOld = cellSizePtr(pPage, oldCell);
       
  5310     rc = clearCell(pPage, oldCell);
       
  5311     if( rc ) goto end_insert;
       
  5312     dropCell(pPage, pCur->idx, szOld);
       
  5313   }else if( loc<0 && pPage->nCell>0 ){
       
  5314     assert( pPage->leaf );
       
  5315     pCur->idx++;
       
  5316     pCur->info.nSize = 0;
       
  5317   }else{
       
  5318     assert( pPage->leaf );
       
  5319   }
       
  5320   rc = insertCell(pPage, pCur->idx, newCell, szNew, 0, 0);
       
  5321   if( rc!=SQLITE_OK ) goto end_insert;
       
  5322   rc = balance(pPage, 1);
       
  5323   /* sqlite3BtreePageDump(pCur->pBt, pCur->pgnoRoot, 1); */
       
  5324   /* fflush(stdout); */
       
  5325   if( rc==SQLITE_OK ){
       
  5326     moveToRoot(pCur);
       
  5327   }
       
  5328 end_insert:
       
  5329   sqliteFree(newCell);
       
  5330   return rc;
       
  5331 }
       
  5332 
       
  5333 /*
       
  5334 ** Delete the entry that the cursor is pointing to.  The cursor
       
  5335 ** is left pointing at a random location.
       
  5336 */
       
  5337 int sqlite3BtreeDelete(BtCursor *pCur){
       
  5338   MemPage *pPage = pCur->pPage;
       
  5339   unsigned char *pCell;
       
  5340   int rc;
       
  5341   Pgno pgnoChild = 0;
       
  5342   BtShared *pBt = pCur->pBtree->pBt;
       
  5343 
       
  5344   assert( pPage->isInit );
       
  5345   if( pBt->inTransaction!=TRANS_WRITE ){
       
  5346     /* Must start a transaction before doing a delete */
       
  5347     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
       
  5348   }
       
  5349   assert( !pBt->readOnly );
       
  5350   if( pCur->idx >= pPage->nCell ){
       
  5351     return SQLITE_ERROR;  /* The cursor is not pointing to anything */
       
  5352   }
       
  5353   if( !pCur->wrFlag ){
       
  5354     return SQLITE_PERM;   /* Did not open this cursor for writing */
       
  5355   }
       
  5356   if( checkReadLocks(pCur->pBtree, pCur->pgnoRoot, pCur) ){
       
  5357     return SQLITE_LOCKED; /* The table pCur points to has a read lock */
       
  5358   }
       
  5359 
       
  5360   /* Restore the current cursor position (a no-op if the cursor is not in 
       
  5361   ** CURSOR_REQUIRESEEK state) and save the positions of any other cursors 
       
  5362   ** open on the same table. Then call sqlite3pager_write() on the page
       
  5363   ** that the entry will be deleted from.
       
  5364   */
       
  5365   if( 
       
  5366     (rc = restoreOrClearCursorPosition(pCur, 1))!=0 ||
       
  5367     (rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur))!=0 ||
       
  5368     (rc = sqlite3pager_write(pPage->aData))!=0
       
  5369   ){
       
  5370     return rc;
       
  5371   }
       
  5372 
       
  5373   /* Locate the cell within it's page and leave pCell pointing to the
       
  5374   ** data. The clearCell() call frees any overflow pages associated with the
       
  5375   ** cell. The cell itself is still intact.
       
  5376   */
       
  5377   pCell = findCell(pPage, pCur->idx);
       
  5378   if( !pPage->leaf ){
       
  5379     pgnoChild = get4byte(pCell);
       
  5380   }
       
  5381   rc = clearCell(pPage, pCell);
       
  5382   if( rc ) return rc;
       
  5383 
       
  5384   if( !pPage->leaf ){
       
  5385     /*
       
  5386     ** The entry we are about to delete is not a leaf so if we do not
       
  5387     ** do something we will leave a hole on an internal page.
       
  5388     ** We have to fill the hole by moving in a cell from a leaf.  The
       
  5389     ** next Cell after the one to be deleted is guaranteed to exist and
       
  5390     ** to be a leaf so we can use it.
       
  5391     */
       
  5392     BtCursor leafCur;
       
  5393     unsigned char *pNext;
       
  5394     int szNext;  /* The compiler warning is wrong: szNext is always 
       
  5395                  ** initialized before use.  Adding an extra initialization
       
  5396                  ** to silence the compiler slows down the code. */
       
  5397     int notUsed;
       
  5398     unsigned char *tempCell = 0;
       
  5399     assert( !pPage->leafData );
       
  5400     getTempCursor(pCur, &leafCur);
       
  5401     rc = sqlite3BtreeNext(&leafCur, &notUsed);
       
  5402     if( rc!=SQLITE_OK ){
       
  5403       if( rc!=SQLITE_NOMEM ){
       
  5404         rc = SQLITE_CORRUPT_BKPT; 
       
  5405       }
       
  5406     }
       
  5407     if( rc==SQLITE_OK ){
       
  5408       rc = sqlite3pager_write(leafCur.pPage->aData);
       
  5409     }
       
  5410     if( rc==SQLITE_OK ){
       
  5411       TRACE(("DELETE: table=%d delete internal from %d replace from leaf %d\n",
       
  5412          pCur->pgnoRoot, pPage->pgno, leafCur.pPage->pgno));
       
  5413       dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
       
  5414       pNext = findCell(leafCur.pPage, leafCur.idx);
       
  5415       szNext = cellSizePtr(leafCur.pPage, pNext);
       
  5416       assert( MX_CELL_SIZE(pBt)>=szNext+4 );
       
  5417       tempCell = sqliteMallocRaw( MX_CELL_SIZE(pBt) );
       
  5418       if( tempCell==0 ){
       
  5419         rc = SQLITE_NOMEM;
       
  5420       }
       
  5421     }
       
  5422     if( rc==SQLITE_OK ){
       
  5423       rc = insertCell(pPage, pCur->idx, pNext-4, szNext+4, tempCell, 0);
       
  5424     }
       
  5425     if( rc==SQLITE_OK ){
       
  5426       put4byte(findOverflowCell(pPage, pCur->idx), pgnoChild);
       
  5427       rc = balance(pPage, 0);
       
  5428     }
       
  5429     if( rc==SQLITE_OK ){
       
  5430       dropCell(leafCur.pPage, leafCur.idx, szNext);
       
  5431       rc = balance(leafCur.pPage, 0);
       
  5432     }
       
  5433     sqliteFree(tempCell);
       
  5434     releaseTempCursor(&leafCur);
       
  5435   }else{
       
  5436     TRACE(("DELETE: table=%d delete from leaf %d\n",
       
  5437        pCur->pgnoRoot, pPage->pgno));
       
  5438     dropCell(pPage, pCur->idx, cellSizePtr(pPage, pCell));
       
  5439     rc = balance(pPage, 0);
       
  5440   }
       
  5441   if( rc==SQLITE_OK ){
       
  5442     moveToRoot(pCur);
       
  5443   }
       
  5444   return rc;
       
  5445 }
       
  5446 
       
  5447 /*
       
  5448 ** Create a new BTree table.  Write into *piTable the page
       
  5449 ** number for the root page of the new table.
       
  5450 **
       
  5451 ** The type of type is determined by the flags parameter.  Only the
       
  5452 ** following values of flags are currently in use.  Other values for
       
  5453 ** flags might not work:
       
  5454 **
       
  5455 **     BTREE_INTKEY|BTREE_LEAFDATA     Used for SQL tables with rowid keys
       
  5456 **     BTREE_ZERODATA                  Used for SQL indices
       
  5457 */
       
  5458 int sqlite3BtreeCreateTable(Btree *p, int *piTable, int flags){
       
  5459   BtShared *pBt = p->pBt;
       
  5460   MemPage *pRoot;
       
  5461   Pgno pgnoRoot;
       
  5462   int rc;
       
  5463   if( pBt->inTransaction!=TRANS_WRITE ){
       
  5464     /* Must start a transaction first */
       
  5465     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
       
  5466   }
       
  5467   assert( !pBt->readOnly );
       
  5468 
       
  5469   /* It is illegal to create a table if any cursors are open on the
       
  5470   ** database. This is because in auto-vacuum mode the backend may
       
  5471   ** need to move a database page to make room for the new root-page.
       
  5472   ** If an open cursor was using the page a problem would occur.
       
  5473   */
       
  5474   if( pBt->pCursor ){
       
  5475     return SQLITE_LOCKED;
       
  5476   }
       
  5477 
       
  5478 #ifdef SQLITE_OMIT_AUTOVACUUM
       
  5479   rc = allocatePage(pBt, &pRoot, &pgnoRoot, 1, 0);
       
  5480   if( rc ) return rc;
       
  5481 #else
       
  5482   if( pBt->autoVacuum ){
       
  5483     Pgno pgnoMove;      /* Move a page here to make room for the root-page */
       
  5484     MemPage *pPageMove; /* The page to move to. */
       
  5485 
       
  5486     /* Read the value of meta[3] from the database to determine where the
       
  5487     ** root page of the new table should go. meta[3] is the largest root-page
       
  5488     ** created so far, so the new root-page is (meta[3]+1).
       
  5489     */
       
  5490     rc = sqlite3BtreeGetMeta(p, 4, &pgnoRoot);
       
  5491     if( rc!=SQLITE_OK ) return rc;
       
  5492     pgnoRoot++;
       
  5493 
       
  5494     /* The new root-page may not be allocated on a pointer-map page, or the
       
  5495     ** PENDING_BYTE page.
       
  5496     */
       
  5497     if( pgnoRoot==PTRMAP_PAGENO(pBt, pgnoRoot) ||
       
  5498         pgnoRoot==PENDING_BYTE_PAGE(pBt) ){
       
  5499       pgnoRoot++;
       
  5500     }
       
  5501     assert( pgnoRoot>=3 );
       
  5502 
       
  5503     /* Allocate a page. The page that currently resides at pgnoRoot will
       
  5504     ** be moved to the allocated page (unless the allocated page happens
       
  5505     ** to reside at pgnoRoot).
       
  5506     */
       
  5507     rc = allocatePage(pBt, &pPageMove, &pgnoMove, pgnoRoot, 1);
       
  5508     if( rc!=SQLITE_OK ){
       
  5509       return rc;
       
  5510     }
       
  5511 
       
  5512     if( pgnoMove!=pgnoRoot ){
       
  5513       u8 eType;
       
  5514       Pgno iPtrPage;
       
  5515 
       
  5516       releasePage(pPageMove);
       
  5517       rc = getPage(pBt, pgnoRoot, &pRoot);
       
  5518       if( rc!=SQLITE_OK ){
       
  5519         return rc;
       
  5520       }
       
  5521       rc = ptrmapGet(pBt, pgnoRoot, &eType, &iPtrPage);
       
  5522       if( rc!=SQLITE_OK || eType==PTRMAP_ROOTPAGE || eType==PTRMAP_FREEPAGE ){
       
  5523         releasePage(pRoot);
       
  5524         return rc;
       
  5525       }
       
  5526       assert( eType!=PTRMAP_ROOTPAGE );
       
  5527       assert( eType!=PTRMAP_FREEPAGE );
       
  5528       rc = sqlite3pager_write(pRoot->aData);
       
  5529       if( rc!=SQLITE_OK ){
       
  5530         releasePage(pRoot);
       
  5531         return rc;
       
  5532       }
       
  5533       rc = relocatePage(pBt, pRoot, eType, iPtrPage, pgnoMove);
       
  5534       releasePage(pRoot);
       
  5535       if( rc!=SQLITE_OK ){
       
  5536         return rc;
       
  5537       }
       
  5538       rc = getPage(pBt, pgnoRoot, &pRoot);
       
  5539       if( rc!=SQLITE_OK ){
       
  5540         return rc;
       
  5541       }
       
  5542       rc = sqlite3pager_write(pRoot->aData);
       
  5543       if( rc!=SQLITE_OK ){
       
  5544         releasePage(pRoot);
       
  5545         return rc;
       
  5546       }
       
  5547     }else{
       
  5548       pRoot = pPageMove;
       
  5549     } 
       
  5550 
       
  5551     /* Update the pointer-map and meta-data with the new root-page number. */
       
  5552     rc = ptrmapPut(pBt, pgnoRoot, PTRMAP_ROOTPAGE, 0);
       
  5553     if( rc ){
       
  5554       releasePage(pRoot);
       
  5555       return rc;
       
  5556     }
       
  5557     rc = sqlite3BtreeUpdateMeta(p, 4, pgnoRoot);
       
  5558     if( rc ){
       
  5559       releasePage(pRoot);
       
  5560       return rc;
       
  5561     }
       
  5562 
       
  5563   }else{
       
  5564     rc = allocatePage(pBt, &pRoot, &pgnoRoot, 1, 0);
       
  5565     if( rc ) return rc;
       
  5566   }
       
  5567 #endif
       
  5568   assert( sqlite3pager_iswriteable(pRoot->aData) );
       
  5569   zeroPage(pRoot, flags | PTF_LEAF);
       
  5570   sqlite3pager_unref(pRoot->aData);
       
  5571   *piTable = (int)pgnoRoot;
       
  5572   return SQLITE_OK;
       
  5573 }
       
  5574 
       
  5575 /*
       
  5576 ** Erase the given database page and all its children.  Return
       
  5577 ** the page to the freelist.
       
  5578 */
       
  5579 static int clearDatabasePage(
       
  5580   BtShared *pBt,           /* The BTree that contains the table */
       
  5581   Pgno pgno,            /* Page number to clear */
       
  5582   MemPage *pParent,     /* Parent page.  NULL for the root */
       
  5583   int freePageFlag      /* Deallocate page if true */
       
  5584 ){
       
  5585   MemPage *pPage = 0;
       
  5586   int rc;
       
  5587   unsigned char *pCell;
       
  5588   int i;
       
  5589 
       
  5590   if( pgno>sqlite3pager_pagecount(pBt->pPager) ){
       
  5591     return SQLITE_CORRUPT_BKPT;
       
  5592   }
       
  5593 
       
  5594   rc = getAndInitPage(pBt, pgno, &pPage, pParent);
       
  5595   if( rc ) goto cleardatabasepage_out;
       
  5596   rc = sqlite3pager_write(pPage->aData);
       
  5597   if( rc ) goto cleardatabasepage_out;
       
  5598   for(i=0; i<pPage->nCell; i++){
       
  5599     pCell = findCell(pPage, i);
       
  5600     if( !pPage->leaf ){
       
  5601       rc = clearDatabasePage(pBt, get4byte(pCell), pPage->pParent, 1);
       
  5602       if( rc ) goto cleardatabasepage_out;
       
  5603     }
       
  5604     rc = clearCell(pPage, pCell);
       
  5605     if( rc ) goto cleardatabasepage_out;
       
  5606   }
       
  5607   if( !pPage->leaf ){
       
  5608     rc = clearDatabasePage(pBt, get4byte(&pPage->aData[8]), pPage->pParent, 1);
       
  5609     if( rc ) goto cleardatabasepage_out;
       
  5610   }
       
  5611   if( freePageFlag ){
       
  5612     rc = freePage(pPage);
       
  5613   }else{
       
  5614     zeroPage(pPage, pPage->aData[0] | PTF_LEAF);
       
  5615   }
       
  5616 
       
  5617 cleardatabasepage_out:
       
  5618   releasePage(pPage);
       
  5619   return rc;
       
  5620 }
       
  5621 
       
  5622 /*
       
  5623 ** Delete all information from a single table in the database.  iTable is
       
  5624 ** the page number of the root of the table.  After this routine returns,
       
  5625 ** the root page is empty, but still exists.
       
  5626 **
       
  5627 ** This routine will fail with SQLITE_LOCKED if there are any open
       
  5628 ** read cursors on the table.  Open write cursors are moved to the
       
  5629 ** root of the table.
       
  5630 */
       
  5631 int sqlite3BtreeClearTable(Btree *p, int iTable){
       
  5632   int rc;
       
  5633   BtShared *pBt = p->pBt;
       
  5634   if( p->inTrans!=TRANS_WRITE ){
       
  5635     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
       
  5636   }
       
  5637   rc = checkReadLocks(p, iTable, 0);
       
  5638   if( rc ){
       
  5639     return rc;
       
  5640   }
       
  5641 
       
  5642   /* Save the position of all cursors open on this table */
       
  5643   if( SQLITE_OK!=(rc = saveAllCursors(pBt, iTable, 0)) ){
       
  5644     return rc;
       
  5645   }
       
  5646 
       
  5647   return clearDatabasePage(pBt, (Pgno)iTable, 0, 0);
       
  5648 }
       
  5649 
       
  5650 /*
       
  5651 ** Erase all information in a table and add the root of the table to
       
  5652 ** the freelist.  Except, the root of the principle table (the one on
       
  5653 ** page 1) is never added to the freelist.
       
  5654 **
       
  5655 ** This routine will fail with SQLITE_LOCKED if there are any open
       
  5656 ** cursors on the table.
       
  5657 **
       
  5658 ** If AUTOVACUUM is enabled and the page at iTable is not the last
       
  5659 ** root page in the database file, then the last root page 
       
  5660 ** in the database file is moved into the slot formerly occupied by
       
  5661 ** iTable and that last slot formerly occupied by the last root page
       
  5662 ** is added to the freelist instead of iTable.  In this say, all
       
  5663 ** root pages are kept at the beginning of the database file, which
       
  5664 ** is necessary for AUTOVACUUM to work right.  *piMoved is set to the 
       
  5665 ** page number that used to be the last root page in the file before
       
  5666 ** the move.  If no page gets moved, *piMoved is set to 0.
       
  5667 ** The last root page is recorded in meta[3] and the value of
       
  5668 ** meta[3] is updated by this procedure.
       
  5669 */
       
  5670 int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
       
  5671   int rc;
       
  5672   MemPage *pPage = 0;
       
  5673   BtShared *pBt = p->pBt;
       
  5674 
       
  5675   if( p->inTrans!=TRANS_WRITE ){
       
  5676     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
       
  5677   }
       
  5678 
       
  5679   /* It is illegal to drop a table if any cursors are open on the
       
  5680   ** database. This is because in auto-vacuum mode the backend may
       
  5681   ** need to move another root-page to fill a gap left by the deleted
       
  5682   ** root page. If an open cursor was using this page a problem would 
       
  5683   ** occur.
       
  5684   */
       
  5685   if( pBt->pCursor ){
       
  5686     return SQLITE_LOCKED;
       
  5687   }
       
  5688 
       
  5689   rc = getPage(pBt, (Pgno)iTable, &pPage);
       
  5690   if( rc ) return rc;
       
  5691   rc = sqlite3BtreeClearTable(p, iTable);
       
  5692   if( rc ){
       
  5693     releasePage(pPage);
       
  5694     return rc;
       
  5695   }
       
  5696 
       
  5697   *piMoved = 0;
       
  5698 
       
  5699   if( iTable>1 ){
       
  5700 #ifdef SQLITE_OMIT_AUTOVACUUM
       
  5701     rc = freePage(pPage);
       
  5702     releasePage(pPage);
       
  5703 #else
       
  5704     if( pBt->autoVacuum ){
       
  5705       Pgno maxRootPgno;
       
  5706       rc = sqlite3BtreeGetMeta(p, 4, &maxRootPgno);
       
  5707       if( rc!=SQLITE_OK ){
       
  5708         releasePage(pPage);
       
  5709         return rc;
       
  5710       }
       
  5711 
       
  5712       if( iTable==maxRootPgno ){
       
  5713         /* If the table being dropped is the table with the largest root-page
       
  5714         ** number in the database, put the root page on the free list. 
       
  5715         */
       
  5716         rc = freePage(pPage);
       
  5717         releasePage(pPage);
       
  5718         if( rc!=SQLITE_OK ){
       
  5719           return rc;
       
  5720         }
       
  5721       }else{
       
  5722         /* The table being dropped does not have the largest root-page
       
  5723         ** number in the database. So move the page that does into the 
       
  5724         ** gap left by the deleted root-page.
       
  5725         */
       
  5726         MemPage *pMove;
       
  5727         releasePage(pPage);
       
  5728         rc = getPage(pBt, maxRootPgno, &pMove);
       
  5729         if( rc!=SQLITE_OK ){
       
  5730           return rc;
       
  5731         }
       
  5732         rc = relocatePage(pBt, pMove, PTRMAP_ROOTPAGE, 0, iTable);
       
  5733         releasePage(pMove);
       
  5734         if( rc!=SQLITE_OK ){
       
  5735           return rc;
       
  5736         }
       
  5737         rc = getPage(pBt, maxRootPgno, &pMove);
       
  5738         if( rc!=SQLITE_OK ){
       
  5739           return rc;
       
  5740         }
       
  5741         rc = freePage(pMove);
       
  5742         releasePage(pMove);
       
  5743         if( rc!=SQLITE_OK ){
       
  5744           return rc;
       
  5745         }
       
  5746         *piMoved = maxRootPgno;
       
  5747       }
       
  5748 
       
  5749       /* Set the new 'max-root-page' value in the database header. This
       
  5750       ** is the old value less one, less one more if that happens to
       
  5751       ** be a root-page number, less one again if that is the
       
  5752       ** PENDING_BYTE_PAGE.
       
  5753       */
       
  5754       maxRootPgno--;
       
  5755       if( maxRootPgno==PENDING_BYTE_PAGE(pBt) ){
       
  5756         maxRootPgno--;
       
  5757       }
       
  5758       if( maxRootPgno==PTRMAP_PAGENO(pBt, maxRootPgno) ){
       
  5759         maxRootPgno--;
       
  5760       }
       
  5761       assert( maxRootPgno!=PENDING_BYTE_PAGE(pBt) );
       
  5762 
       
  5763       rc = sqlite3BtreeUpdateMeta(p, 4, maxRootPgno);
       
  5764     }else{
       
  5765       rc = freePage(pPage);
       
  5766       releasePage(pPage);
       
  5767     }
       
  5768 #endif
       
  5769   }else{
       
  5770     /* If sqlite3BtreeDropTable was called on page 1. */
       
  5771     zeroPage(pPage, PTF_INTKEY|PTF_LEAF );
       
  5772     releasePage(pPage);
       
  5773   }
       
  5774   return rc;  
       
  5775 }
       
  5776 
       
  5777 
       
  5778 /*
       
  5779 ** Read the meta-information out of a database file.  Meta[0]
       
  5780 ** is the number of free pages currently in the database.  Meta[1]
       
  5781 ** through meta[15] are available for use by higher layers.  Meta[0]
       
  5782 ** is read-only, the others are read/write.
       
  5783 ** 
       
  5784 ** The schema layer numbers meta values differently.  At the schema
       
  5785 ** layer (and the SetCookie and ReadCookie opcodes) the number of
       
  5786 ** free pages is not visible.  So Cookie[0] is the same as Meta[1].
       
  5787 */
       
  5788 int sqlite3BtreeGetMeta(Btree *p, int idx, u32 *pMeta){
       
  5789   int rc;
       
  5790   unsigned char *pP1;
       
  5791   BtShared *pBt = p->pBt;
       
  5792 
       
  5793   /* Reading a meta-data value requires a read-lock on page 1 (and hence
       
  5794   ** the sqlite_master table. We grab this lock regardless of whether or
       
  5795   ** not the SQLITE_ReadUncommitted flag is set (the table rooted at page
       
  5796   ** 1 is treated as a special case by queryTableLock() and lockTable()).
       
  5797   */
       
  5798   rc = queryTableLock(p, 1, READ_LOCK);
       
  5799   if( rc!=SQLITE_OK ){
       
  5800     return rc;
       
  5801   }
       
  5802 
       
  5803   assert( idx>=0 && idx<=15 );
       
  5804   rc = sqlite3pager_get(pBt->pPager, 1, (void**)&pP1);
       
  5805   if( rc ) return rc;
       
  5806   *pMeta = get4byte(&pP1[36 + idx*4]);
       
  5807   sqlite3pager_unref(pP1);
       
  5808 
       
  5809   /* If autovacuumed is disabled in this build but we are trying to 
       
  5810   ** access an autovacuumed database, then make the database readonly. 
       
  5811   */
       
  5812 #ifdef SQLITE_OMIT_AUTOVACUUM
       
  5813   if( idx==4 && *pMeta>0 ) pBt->readOnly = 1;
       
  5814 #endif
       
  5815 
       
  5816   /* Grab the read-lock on page 1. */
       
  5817   rc = lockTable(p, 1, READ_LOCK);
       
  5818   return rc;
       
  5819 }
       
  5820 
       
  5821 /*
       
  5822 ** Write meta-information back into the database.  Meta[0] is
       
  5823 ** read-only and may not be written.
       
  5824 */
       
  5825 int sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){
       
  5826   BtShared *pBt = p->pBt;
       
  5827   unsigned char *pP1;
       
  5828   int rc;
       
  5829   assert( idx>=1 && idx<=15 );
       
  5830   if( p->inTrans!=TRANS_WRITE ){
       
  5831     return pBt->readOnly ? SQLITE_READONLY : SQLITE_ERROR;
       
  5832   }
       
  5833   assert( pBt->pPage1!=0 );
       
  5834   pP1 = pBt->pPage1->aData;
       
  5835   rc = sqlite3pager_write(pP1);
       
  5836   if( rc ) return rc;
       
  5837   put4byte(&pP1[36 + idx*4], iMeta);
       
  5838   return SQLITE_OK;
       
  5839 }
       
  5840 
       
  5841 /*
       
  5842 ** Return the flag byte at the beginning of the page that the cursor
       
  5843 ** is currently pointing to.
       
  5844 */
       
  5845 int sqlite3BtreeFlags(BtCursor *pCur){
       
  5846   /* TODO: What about CURSOR_REQUIRESEEK state? Probably need to call
       
  5847   ** restoreOrClearCursorPosition() here.
       
  5848   */
       
  5849   MemPage *pPage = pCur->pPage;
       
  5850   return pPage ? pPage->aData[pPage->hdrOffset] : 0;
       
  5851 }
       
  5852 
       
  5853 #ifdef SQLITE_DEBUG
       
  5854 /*
       
  5855 ** Print a disassembly of the given page on standard output.  This routine
       
  5856 ** is used for debugging and testing only.
       
  5857 */
       
  5858 static int btreePageDump(BtShared *pBt, int pgno, int recursive, MemPage *pParent){
       
  5859   int rc;
       
  5860   MemPage *pPage;
       
  5861   int i, j, c;
       
  5862   int nFree;
       
  5863   u16 idx;
       
  5864   int hdr;
       
  5865   int nCell;
       
  5866   int isInit;
       
  5867   unsigned char *data;
       
  5868   char range[20];
       
  5869   unsigned char payload[20];
       
  5870 
       
  5871   rc = getPage(pBt, (Pgno)pgno, &pPage);
       
  5872   isInit = pPage->isInit;
       
  5873   if( pPage->isInit==0 ){
       
  5874     initPage(pPage, pParent);
       
  5875   }
       
  5876   if( rc ){
       
  5877     return rc;
       
  5878   }
       
  5879   hdr = pPage->hdrOffset;
       
  5880   data = pPage->aData;
       
  5881   c = data[hdr];
       
  5882   pPage->intKey = (c & (PTF_INTKEY|PTF_LEAFDATA))!=0;
       
  5883   pPage->zeroData = (c & PTF_ZERODATA)!=0;
       
  5884   pPage->leafData = (c & PTF_LEAFDATA)!=0;
       
  5885   pPage->leaf = (c & PTF_LEAF)!=0;
       
  5886   pPage->hasData = !(pPage->zeroData || (!pPage->leaf && pPage->leafData));
       
  5887   nCell = get2byte(&data[hdr+3]);
       
  5888   sqlite3DebugPrintf("PAGE %d:  flags=0x%02x  frag=%d   parent=%d\n", pgno,
       
  5889     data[hdr], data[hdr+7], 
       
  5890     (pPage->isInit && pPage->pParent) ? pPage->pParent->pgno : 0);
       
  5891   assert( hdr == (pgno==1 ? 100 : 0) );
       
  5892   idx = hdr + 12 - pPage->leaf*4;
       
  5893   for(i=0; i<nCell; i++){
       
  5894     CellInfo info;
       
  5895     Pgno child;
       
  5896     unsigned char *pCell;
       
  5897     int sz;
       
  5898     int addr;
       
  5899 
       
  5900     addr = get2byte(&data[idx + 2*i]);
       
  5901     pCell = &data[addr];
       
  5902     parseCellPtr(pPage, pCell, &info);
       
  5903     sz = info.nSize;
       
  5904     sprintf(range,"%d..%d", addr, addr+sz-1);
       
  5905     if( pPage->leaf ){
       
  5906       child = 0;
       
  5907     }else{
       
  5908       child = get4byte(pCell);
       
  5909     }
       
  5910     sz = info.nData;
       
  5911     if( !pPage->intKey ) sz += info.nKey;
       
  5912     if( sz>sizeof(payload)-1 ) sz = sizeof(payload)-1;
       
  5913     memcpy(payload, &pCell[info.nHeader], sz);
       
  5914     for(j=0; j<sz; j++){
       
  5915       if( payload[j]<0x20 || payload[j]>0x7f ) payload[j] = '.';
       
  5916     }
       
  5917     payload[sz] = 0;
       
  5918     sqlite3DebugPrintf(
       
  5919       "cell %2d: i=%-10s chld=%-4d nk=%-4lld nd=%-4d payload=%s\n",
       
  5920       i, range, child, info.nKey, info.nData, payload
       
  5921     );
       
  5922   }
       
  5923   if( !pPage->leaf ){
       
  5924     sqlite3DebugPrintf("right_child: %d\n", get4byte(&data[hdr+8]));
       
  5925   }
       
  5926   nFree = 0;
       
  5927   i = 0;
       
  5928   idx = get2byte(&data[hdr+1]);
       
  5929   while( idx>0 && idx<pPage->pBt->usableSize ){
       
  5930     int sz = get2byte(&data[idx+2]);
       
  5931     sprintf(range,"%d..%d", idx, idx+sz-1);
       
  5932     nFree += sz;
       
  5933     sqlite3DebugPrintf("freeblock %2d: i=%-10s size=%-4d total=%d\n",
       
  5934        i, range, sz, nFree);
       
  5935     idx = get2byte(&data[idx]);
       
  5936     i++;
       
  5937   }
       
  5938   if( idx!=0 ){
       
  5939     sqlite3DebugPrintf("ERROR: next freeblock index out of range: %d\n", idx);
       
  5940   }
       
  5941   if( recursive && !pPage->leaf ){
       
  5942     for(i=0; i<nCell; i++){
       
  5943       unsigned char *pCell = findCell(pPage, i);
       
  5944       btreePageDump(pBt, get4byte(pCell), 1, pPage);
       
  5945       idx = get2byte(pCell);
       
  5946     }
       
  5947     btreePageDump(pBt, get4byte(&data[hdr+8]), 1, pPage);
       
  5948   }
       
  5949   pPage->isInit = isInit;
       
  5950   sqlite3pager_unref(data);
       
  5951   fflush(stdout);
       
  5952   return SQLITE_OK;
       
  5953 }
       
  5954 int sqlite3BtreePageDump(Btree *p, int pgno, int recursive){
       
  5955   return btreePageDump(p->pBt, pgno, recursive, 0);
       
  5956 }
       
  5957 #endif
       
  5958 
       
  5959 #if defined(SQLITE_TEST) || defined(SQLITE_DEBUG)
       
  5960 /*
       
  5961 ** Fill aResult[] with information about the entry and page that the
       
  5962 ** cursor is pointing to.
       
  5963 ** 
       
  5964 **   aResult[0] =  The page number
       
  5965 **   aResult[1] =  The entry number
       
  5966 **   aResult[2] =  Total number of entries on this page
       
  5967 **   aResult[3] =  Cell size (local payload + header)
       
  5968 **   aResult[4] =  Number of free bytes on this page
       
  5969 **   aResult[5] =  Number of free blocks on the page
       
  5970 **   aResult[6] =  Total payload size (local + overflow)
       
  5971 **   aResult[7] =  Header size in bytes
       
  5972 **   aResult[8] =  Local payload size
       
  5973 **   aResult[9] =  Parent page number
       
  5974 **
       
  5975 ** This routine is used for testing and debugging only.
       
  5976 */
       
  5977 int sqlite3BtreeCursorInfo(BtCursor *pCur, int *aResult, int upCnt){
       
  5978   int cnt, idx;
       
  5979   MemPage *pPage = pCur->pPage;
       
  5980   BtCursor tmpCur;
       
  5981 
       
  5982   int rc = restoreOrClearCursorPosition(pCur, 1);
       
  5983   if( rc!=SQLITE_OK ){
       
  5984     return rc;
       
  5985   }
       
  5986 
       
  5987   pageIntegrity(pPage);
       
  5988   assert( pPage->isInit );
       
  5989   getTempCursor(pCur, &tmpCur);
       
  5990   while( upCnt-- ){
       
  5991     moveToParent(&tmpCur);
       
  5992   }
       
  5993   pPage = tmpCur.pPage;
       
  5994   pageIntegrity(pPage);
       
  5995   aResult[0] = sqlite3pager_pagenumber(pPage->aData);
       
  5996   assert( aResult[0]==pPage->pgno );
       
  5997   aResult[1] = tmpCur.idx;
       
  5998   aResult[2] = pPage->nCell;
       
  5999   if( tmpCur.idx>=0 && tmpCur.idx<pPage->nCell ){
       
  6000     getCellInfo(&tmpCur);
       
  6001     aResult[3] = tmpCur.info.nSize;
       
  6002     aResult[6] = tmpCur.info.nData;
       
  6003     aResult[7] = tmpCur.info.nHeader;
       
  6004     aResult[8] = tmpCur.info.nLocal;
       
  6005   }else{
       
  6006     aResult[3] = 0;
       
  6007     aResult[6] = 0;
       
  6008     aResult[7] = 0;
       
  6009     aResult[8] = 0;
       
  6010   }
       
  6011   aResult[4] = pPage->nFree;
       
  6012   cnt = 0;
       
  6013   idx = get2byte(&pPage->aData[pPage->hdrOffset+1]);
       
  6014   while( idx>0 && idx<pPage->pBt->usableSize ){
       
  6015     cnt++;
       
  6016     idx = get2byte(&pPage->aData[idx]);
       
  6017   }
       
  6018   aResult[5] = cnt;
       
  6019   if( pPage->pParent==0 || isRootPage(pPage) ){
       
  6020     aResult[9] = 0;
       
  6021   }else{
       
  6022     aResult[9] = pPage->pParent->pgno;
       
  6023   }
       
  6024   releaseTempCursor(&tmpCur);
       
  6025   return SQLITE_OK;
       
  6026 }
       
  6027 #endif
       
  6028 
       
  6029 /*
       
  6030 ** Return the pager associated with a BTree.  This routine is used for
       
  6031 ** testing and debugging only.
       
  6032 */
       
  6033 Pager *sqlite3BtreePager(Btree *p){
       
  6034   return p->pBt->pPager;
       
  6035 }
       
  6036 
       
  6037 /*
       
  6038 ** This structure is passed around through all the sanity checking routines
       
  6039 ** in order to keep track of some global state information.
       
  6040 */
       
  6041 typedef struct IntegrityCk IntegrityCk;
       
  6042 struct IntegrityCk {
       
  6043   BtShared *pBt;    /* The tree being checked out */
       
  6044   Pager *pPager; /* The associated pager.  Also accessible by pBt->pPager */
       
  6045   int nPage;     /* Number of pages in the database */
       
  6046   int *anRef;    /* Number of times each page is referenced */
       
  6047   char *zErrMsg; /* An error message.  NULL of no errors seen. */
       
  6048 };
       
  6049 
       
  6050 #ifndef SQLITE_OMIT_INTEGRITY_CHECK
       
  6051 /*
       
  6052 ** Append a message to the error message string.
       
  6053 */
       
  6054 static void checkAppendMsg(
       
  6055   IntegrityCk *pCheck,
       
  6056   char *zMsg1,
       
  6057   const char *zFormat,
       
  6058   ...
       
  6059 ){
       
  6060   va_list ap;
       
  6061   char *zMsg2;
       
  6062   va_start(ap, zFormat);
       
  6063   zMsg2 = sqlite3VMPrintf(zFormat, ap);
       
  6064   va_end(ap);
       
  6065   if( zMsg1==0 ) zMsg1 = "";
       
  6066   if( pCheck->zErrMsg ){
       
  6067     char *zOld = pCheck->zErrMsg;
       
  6068     pCheck->zErrMsg = 0;
       
  6069     sqlite3SetString(&pCheck->zErrMsg, zOld, "\n", zMsg1, zMsg2, (char*)0);
       
  6070     sqliteFree(zOld);
       
  6071   }else{
       
  6072     sqlite3SetString(&pCheck->zErrMsg, zMsg1, zMsg2, (char*)0);
       
  6073   }
       
  6074   sqliteFree(zMsg2);
       
  6075 }
       
  6076 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
       
  6077 
       
  6078 #ifndef SQLITE_OMIT_INTEGRITY_CHECK
       
  6079 /*
       
  6080 ** Add 1 to the reference count for page iPage.  If this is the second
       
  6081 ** reference to the page, add an error message to pCheck->zErrMsg.
       
  6082 ** Return 1 if there are 2 ore more references to the page and 0 if
       
  6083 ** if this is the first reference to the page.
       
  6084 **
       
  6085 ** Also check that the page number is in bounds.
       
  6086 */
       
  6087 static int checkRef(IntegrityCk *pCheck, int iPage, char *zContext){
       
  6088   if( iPage==0 ) return 1;
       
  6089   if( iPage>pCheck->nPage || iPage<0 ){
       
  6090     checkAppendMsg(pCheck, zContext, "invalid page number %d", iPage);
       
  6091     return 1;
       
  6092   }
       
  6093   if( pCheck->anRef[iPage]==1 ){
       
  6094     checkAppendMsg(pCheck, zContext, "2nd reference to page %d", iPage);
       
  6095     return 1;
       
  6096   }
       
  6097   return  (pCheck->anRef[iPage]++)>1;
       
  6098 }
       
  6099 
       
  6100 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  6101 /*
       
  6102 ** Check that the entry in the pointer-map for page iChild maps to 
       
  6103 ** page iParent, pointer type ptrType. If not, append an error message
       
  6104 ** to pCheck.
       
  6105 */
       
  6106 static void checkPtrmap(
       
  6107   IntegrityCk *pCheck,   /* Integrity check context */
       
  6108   Pgno iChild,           /* Child page number */
       
  6109   u8 eType,              /* Expected pointer map type */
       
  6110   Pgno iParent,          /* Expected pointer map parent page number */
       
  6111   char *zContext         /* Context description (used for error msg) */
       
  6112 ){
       
  6113   int rc;
       
  6114   u8 ePtrmapType;
       
  6115   Pgno iPtrmapParent;
       
  6116 
       
  6117   rc = ptrmapGet(pCheck->pBt, iChild, &ePtrmapType, &iPtrmapParent);
       
  6118   if( rc!=SQLITE_OK ){
       
  6119     checkAppendMsg(pCheck, zContext, "Failed to read ptrmap key=%d", iChild);
       
  6120     return;
       
  6121   }
       
  6122 
       
  6123   if( ePtrmapType!=eType || iPtrmapParent!=iParent ){
       
  6124     checkAppendMsg(pCheck, zContext, 
       
  6125       "Bad ptr map entry key=%d expected=(%d,%d) got=(%d,%d)", 
       
  6126       iChild, eType, iParent, ePtrmapType, iPtrmapParent);
       
  6127   }
       
  6128 }
       
  6129 #endif
       
  6130 
       
  6131 /*
       
  6132 ** Check the integrity of the freelist or of an overflow page list.
       
  6133 ** Verify that the number of pages on the list is N.
       
  6134 */
       
  6135 static void checkList(
       
  6136   IntegrityCk *pCheck,  /* Integrity checking context */
       
  6137   int isFreeList,       /* True for a freelist.  False for overflow page list */
       
  6138   int iPage,            /* Page number for first page in the list */
       
  6139   int N,                /* Expected number of pages in the list */
       
  6140   char *zContext        /* Context for error messages */
       
  6141 ){
       
  6142   int i;
       
  6143   int expected = N;
       
  6144   int iFirst = iPage;
       
  6145   while( N-- > 0 ){
       
  6146     unsigned char *pOvfl;
       
  6147     if( iPage<1 ){
       
  6148       checkAppendMsg(pCheck, zContext,
       
  6149          "%d of %d pages missing from overflow list starting at %d",
       
  6150           N+1, expected, iFirst);
       
  6151       break;
       
  6152     }
       
  6153     if( checkRef(pCheck, iPage, zContext) ) break;
       
  6154     if( sqlite3pager_get(pCheck->pPager, (Pgno)iPage, (void**)&pOvfl) ){
       
  6155       checkAppendMsg(pCheck, zContext, "failed to get page %d", iPage);
       
  6156       break;
       
  6157     }
       
  6158     if( isFreeList ){
       
  6159       int n = get4byte(&pOvfl[4]);
       
  6160 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  6161       if( pCheck->pBt->autoVacuum ){
       
  6162         checkPtrmap(pCheck, iPage, PTRMAP_FREEPAGE, 0, zContext);
       
  6163       }
       
  6164 #endif
       
  6165       if( n>pCheck->pBt->usableSize/4-8 ){
       
  6166         checkAppendMsg(pCheck, zContext,
       
  6167            "freelist leaf count too big on page %d", iPage);
       
  6168         N--;
       
  6169       }else{
       
  6170         for(i=0; i<n; i++){
       
  6171           Pgno iFreePage = get4byte(&pOvfl[8+i*4]);
       
  6172 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  6173           if( pCheck->pBt->autoVacuum ){
       
  6174             checkPtrmap(pCheck, iFreePage, PTRMAP_FREEPAGE, 0, zContext);
       
  6175           }
       
  6176 #endif
       
  6177           checkRef(pCheck, iFreePage, zContext);
       
  6178         }
       
  6179         N -= n;
       
  6180       }
       
  6181     }
       
  6182 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  6183     else{
       
  6184       /* If this database supports auto-vacuum and iPage is not the last
       
  6185       ** page in this overflow list, check that the pointer-map entry for
       
  6186       ** the following page matches iPage.
       
  6187       */
       
  6188       if( pCheck->pBt->autoVacuum && N>0 ){
       
  6189         i = get4byte(pOvfl);
       
  6190         checkPtrmap(pCheck, i, PTRMAP_OVERFLOW2, iPage, zContext);
       
  6191       }
       
  6192     }
       
  6193 #endif
       
  6194     iPage = get4byte(pOvfl);
       
  6195     sqlite3pager_unref(pOvfl);
       
  6196   }
       
  6197 }
       
  6198 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
       
  6199 
       
  6200 #ifndef SQLITE_OMIT_INTEGRITY_CHECK
       
  6201 /*
       
  6202 ** Do various sanity checks on a single page of a tree.  Return
       
  6203 ** the tree depth.  Root pages return 0.  Parents of root pages
       
  6204 ** return 1, and so forth.
       
  6205 ** 
       
  6206 ** These checks are done:
       
  6207 **
       
  6208 **      1.  Make sure that cells and freeblocks do not overlap
       
  6209 **          but combine to completely cover the page.
       
  6210 **  NO  2.  Make sure cell keys are in order.
       
  6211 **  NO  3.  Make sure no key is less than or equal to zLowerBound.
       
  6212 **  NO  4.  Make sure no key is greater than or equal to zUpperBound.
       
  6213 **      5.  Check the integrity of overflow pages.
       
  6214 **      6.  Recursively call checkTreePage on all children.
       
  6215 **      7.  Verify that the depth of all children is the same.
       
  6216 **      8.  Make sure this page is at least 33% full or else it is
       
  6217 **          the root of the tree.
       
  6218 */
       
  6219 static int checkTreePage(
       
  6220   IntegrityCk *pCheck,  /* Context for the sanity check */
       
  6221   int iPage,            /* Page number of the page to check */
       
  6222   MemPage *pParent,     /* Parent page */
       
  6223   char *zParentContext  /* Parent context */
       
  6224 ){
       
  6225   MemPage *pPage;
       
  6226   int i, rc, depth, d2, pgno, cnt;
       
  6227   int hdr, cellStart;
       
  6228   int nCell;
       
  6229   u8 *data;
       
  6230   BtShared *pBt;
       
  6231   int usableSize;
       
  6232   char zContext[100];
       
  6233   char *hit;
       
  6234 
       
  6235   sprintf(zContext, "Page %d: ", iPage);
       
  6236 
       
  6237   /* Check that the page exists
       
  6238   */
       
  6239   pBt = pCheck->pBt;
       
  6240   usableSize = pBt->usableSize;
       
  6241   if( iPage==0 ) return 0;
       
  6242   if( checkRef(pCheck, iPage, zParentContext) ) return 0;
       
  6243   if( (rc = getPage(pBt, (Pgno)iPage, &pPage))!=0 ){
       
  6244     checkAppendMsg(pCheck, zContext,
       
  6245        "unable to get the page. error code=%d", rc);
       
  6246     return 0;
       
  6247   }
       
  6248   if( (rc = initPage(pPage, pParent))!=0 ){
       
  6249     checkAppendMsg(pCheck, zContext, "initPage() returns error code %d", rc);
       
  6250     releasePage(pPage);
       
  6251     return 0;
       
  6252   }
       
  6253 
       
  6254   /* Check out all the cells.
       
  6255   */
       
  6256   depth = 0;
       
  6257   for(i=0; i<pPage->nCell; i++){
       
  6258     u8 *pCell;
       
  6259     int sz;
       
  6260     CellInfo info;
       
  6261 
       
  6262     /* Check payload overflow pages
       
  6263     */
       
  6264     sprintf(zContext, "On tree page %d cell %d: ", iPage, i);
       
  6265     pCell = findCell(pPage,i);
       
  6266     parseCellPtr(pPage, pCell, &info);
       
  6267     sz = info.nData;
       
  6268     if( !pPage->intKey ) sz += info.nKey;
       
  6269     if( sz>info.nLocal ){
       
  6270       int nPage = (sz - info.nLocal + usableSize - 5)/(usableSize - 4);
       
  6271       Pgno pgnoOvfl = get4byte(&pCell[info.iOverflow]);
       
  6272 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  6273       if( pBt->autoVacuum ){
       
  6274         checkPtrmap(pCheck, pgnoOvfl, PTRMAP_OVERFLOW1, iPage, zContext);
       
  6275       }
       
  6276 #endif
       
  6277       checkList(pCheck, 0, pgnoOvfl, nPage, zContext);
       
  6278     }
       
  6279 
       
  6280     /* Check sanity of left child page.
       
  6281     */
       
  6282     if( !pPage->leaf ){
       
  6283       pgno = get4byte(pCell);
       
  6284 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  6285       if( pBt->autoVacuum ){
       
  6286         checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, zContext);
       
  6287       }
       
  6288 #endif
       
  6289       d2 = checkTreePage(pCheck,pgno,pPage,zContext);
       
  6290       if( i>0 && d2!=depth ){
       
  6291         checkAppendMsg(pCheck, zContext, "Child page depth differs");
       
  6292       }
       
  6293       depth = d2;
       
  6294     }
       
  6295   }
       
  6296   if( !pPage->leaf ){
       
  6297     pgno = get4byte(&pPage->aData[pPage->hdrOffset+8]);
       
  6298     sprintf(zContext, "On page %d at right child: ", iPage);
       
  6299 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  6300     if( pBt->autoVacuum ){
       
  6301       checkPtrmap(pCheck, pgno, PTRMAP_BTREE, iPage, 0);
       
  6302     }
       
  6303 #endif
       
  6304     checkTreePage(pCheck, pgno, pPage, zContext);
       
  6305   }
       
  6306  
       
  6307   /* Check for complete coverage of the page
       
  6308   */
       
  6309   data = pPage->aData;
       
  6310   hdr = pPage->hdrOffset;
       
  6311   hit = sqliteMalloc( usableSize );
       
  6312   if( hit ){
       
  6313     memset(hit, 1, get2byte(&data[hdr+5]));
       
  6314     nCell = get2byte(&data[hdr+3]);
       
  6315     cellStart = hdr + 12 - 4*pPage->leaf;
       
  6316     for(i=0; i<nCell; i++){
       
  6317       int pc = get2byte(&data[cellStart+i*2]);
       
  6318       int size = cellSizePtr(pPage, &data[pc]);
       
  6319       int j;
       
  6320       if( (pc+size-1)>=usableSize || pc<0 ){
       
  6321         checkAppendMsg(pCheck, 0, 
       
  6322             "Corruption detected in cell %d on page %d",i,iPage,0);
       
  6323       }else{
       
  6324         for(j=pc+size-1; j>=pc; j--) hit[j]++;
       
  6325       }
       
  6326     }
       
  6327     for(cnt=0, i=get2byte(&data[hdr+1]); i>0 && i<usableSize && cnt<10000; 
       
  6328            cnt++){
       
  6329       int size = get2byte(&data[i+2]);
       
  6330       int j;
       
  6331       if( (i+size-1)>=usableSize || i<0 ){
       
  6332         checkAppendMsg(pCheck, 0,  
       
  6333             "Corruption detected in cell %d on page %d",i,iPage,0);
       
  6334       }else{
       
  6335         for(j=i+size-1; j>=i; j--) hit[j]++;
       
  6336       }
       
  6337       i = get2byte(&data[i]);
       
  6338     }
       
  6339     for(i=cnt=0; i<usableSize; i++){
       
  6340       if( hit[i]==0 ){
       
  6341         cnt++;
       
  6342       }else if( hit[i]>1 ){
       
  6343         checkAppendMsg(pCheck, 0,
       
  6344           "Multiple uses for byte %d of page %d", i, iPage);
       
  6345         break;
       
  6346       }
       
  6347     }
       
  6348     if( cnt!=data[hdr+7] ){
       
  6349       checkAppendMsg(pCheck, 0, 
       
  6350           "Fragmented space is %d byte reported as %d on page %d",
       
  6351           cnt, data[hdr+7], iPage);
       
  6352     }
       
  6353   }
       
  6354   sqliteFree(hit);
       
  6355 
       
  6356   releasePage(pPage);
       
  6357   return depth+1;
       
  6358 }
       
  6359 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
       
  6360 
       
  6361 #ifndef SQLITE_OMIT_INTEGRITY_CHECK
       
  6362 /*
       
  6363 ** This routine does a complete check of the given BTree file.  aRoot[] is
       
  6364 ** an array of pages numbers were each page number is the root page of
       
  6365 ** a table.  nRoot is the number of entries in aRoot.
       
  6366 **
       
  6367 ** If everything checks out, this routine returns NULL.  If something is
       
  6368 ** amiss, an error message is written into memory obtained from malloc()
       
  6369 ** and a pointer to that error message is returned.  The calling function
       
  6370 ** is responsible for freeing the error message when it is done.
       
  6371 */
       
  6372 char *sqlite3BtreeIntegrityCheck(Btree *p, int *aRoot, int nRoot){
       
  6373   int i;
       
  6374   int nRef;
       
  6375   IntegrityCk sCheck;
       
  6376   BtShared *pBt = p->pBt;
       
  6377 
       
  6378   nRef = sqlite3pager_refcount(pBt->pPager);
       
  6379   if( lockBtreeWithRetry(p)!=SQLITE_OK ){
       
  6380     return sqliteStrDup("Unable to acquire a read lock on the database");
       
  6381   }
       
  6382   sCheck.pBt = pBt;
       
  6383   sCheck.pPager = pBt->pPager;
       
  6384   sCheck.nPage = sqlite3pager_pagecount(sCheck.pPager);
       
  6385   if( sCheck.nPage==0 ){
       
  6386     unlockBtreeIfUnused(pBt);
       
  6387     return 0;
       
  6388   }
       
  6389   sCheck.anRef = sqliteMallocRaw( (sCheck.nPage+1)*sizeof(sCheck.anRef[0]) );
       
  6390   if( !sCheck.anRef ){
       
  6391     unlockBtreeIfUnused(pBt);
       
  6392     return sqlite3MPrintf("Unable to malloc %d bytes", 
       
  6393         (sCheck.nPage+1)*sizeof(sCheck.anRef[0]));
       
  6394   }
       
  6395   for(i=0; i<=sCheck.nPage; i++){ sCheck.anRef[i] = 0; }
       
  6396   i = PENDING_BYTE_PAGE(pBt);
       
  6397   if( i<=sCheck.nPage ){
       
  6398     sCheck.anRef[i] = 1;
       
  6399   }
       
  6400   sCheck.zErrMsg = 0;
       
  6401 
       
  6402   /* Check the integrity of the freelist
       
  6403   */
       
  6404   checkList(&sCheck, 1, get4byte(&pBt->pPage1->aData[32]),
       
  6405             get4byte(&pBt->pPage1->aData[36]), "Main freelist: ");
       
  6406 
       
  6407   /* Check all the tables.
       
  6408   */
       
  6409   for(i=0; i<nRoot; i++){
       
  6410     if( aRoot[i]==0 ) continue;
       
  6411 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  6412     if( pBt->autoVacuum && aRoot[i]>1 ){
       
  6413       checkPtrmap(&sCheck, aRoot[i], PTRMAP_ROOTPAGE, 0, 0);
       
  6414     }
       
  6415 #endif
       
  6416     checkTreePage(&sCheck, aRoot[i], 0, "List of tree roots: ");
       
  6417   }
       
  6418 
       
  6419   /* Make sure every page in the file is referenced
       
  6420   */
       
  6421   for(i=1; i<=sCheck.nPage; i++){
       
  6422 #ifdef SQLITE_OMIT_AUTOVACUUM
       
  6423     if( sCheck.anRef[i]==0 ){
       
  6424       checkAppendMsg(&sCheck, 0, "Page %d is never used", i);
       
  6425     }
       
  6426 #else
       
  6427     /* If the database supports auto-vacuum, make sure no tables contain
       
  6428     ** references to pointer-map pages.
       
  6429     */
       
  6430     if( sCheck.anRef[i]==0 && 
       
  6431        (PTRMAP_PAGENO(pBt, i)!=i || !pBt->autoVacuum) ){
       
  6432       checkAppendMsg(&sCheck, 0, "Page %d is never used", i);
       
  6433     }
       
  6434     if( sCheck.anRef[i]!=0 && 
       
  6435        (PTRMAP_PAGENO(pBt, i)==i && pBt->autoVacuum) ){
       
  6436       checkAppendMsg(&sCheck, 0, "Pointer map page %d is referenced", i);
       
  6437     }
       
  6438 #endif
       
  6439   }
       
  6440 
       
  6441   /* Make sure this analysis did not leave any unref() pages
       
  6442   */
       
  6443   unlockBtreeIfUnused(pBt);
       
  6444   if( nRef != sqlite3pager_refcount(pBt->pPager) ){
       
  6445     checkAppendMsg(&sCheck, 0, 
       
  6446       "Outstanding page count goes from %d to %d during this analysis",
       
  6447       nRef, sqlite3pager_refcount(pBt->pPager)
       
  6448     );
       
  6449   }
       
  6450 
       
  6451   /* Clean  up and report errors.
       
  6452   */
       
  6453   sqliteFree(sCheck.anRef);
       
  6454   return sCheck.zErrMsg;
       
  6455 }
       
  6456 #endif /* SQLITE_OMIT_INTEGRITY_CHECK */
       
  6457 
       
  6458 /*
       
  6459 ** Return the full pathname of the underlying database file.
       
  6460 */
       
  6461 const char *sqlite3BtreeGetFilename(Btree *p){
       
  6462   assert( p->pBt->pPager!=0 );
       
  6463   return sqlite3pager_filename(p->pBt->pPager);
       
  6464 }
       
  6465 
       
  6466 /*
       
  6467 ** Return the pathname of the directory that contains the database file.
       
  6468 */
       
  6469 const char *sqlite3BtreeGetDirname(Btree *p){
       
  6470   assert( p->pBt->pPager!=0 );
       
  6471   return sqlite3pager_dirname(p->pBt->pPager);
       
  6472 }
       
  6473 
       
  6474 /*
       
  6475 ** Return the pathname of the journal file for this database. The return
       
  6476 ** value of this routine is the same regardless of whether the journal file
       
  6477 ** has been created or not.
       
  6478 */
       
  6479 const char *sqlite3BtreeGetJournalname(Btree *p){
       
  6480   assert( p->pBt->pPager!=0 );
       
  6481   return sqlite3pager_journalname(p->pBt->pPager);
       
  6482 }
       
  6483 
       
  6484 #ifndef SQLITE_OMIT_VACUUM
       
  6485 /*
       
  6486 ** Copy the complete content of pBtFrom into pBtTo.  A transaction
       
  6487 ** must be active for both files.
       
  6488 **
       
  6489 ** The size of file pBtFrom may be reduced by this operation.
       
  6490 ** If anything goes wrong, the transaction on pBtFrom is rolled back.
       
  6491 */
       
  6492 int sqlite3BtreeCopyFile(Btree *pTo, Btree *pFrom){
       
  6493   int rc = SQLITE_OK;
       
  6494   Pgno i, nPage, nToPage, iSkip;
       
  6495 
       
  6496   BtShared *pBtTo = pTo->pBt;
       
  6497   BtShared *pBtFrom = pFrom->pBt;
       
  6498 
       
  6499   if( pTo->inTrans!=TRANS_WRITE || pFrom->inTrans!=TRANS_WRITE ){
       
  6500     return SQLITE_ERROR;
       
  6501   }
       
  6502   if( pBtTo->pCursor ) return SQLITE_BUSY;
       
  6503   nToPage = sqlite3pager_pagecount(pBtTo->pPager);
       
  6504   nPage = sqlite3pager_pagecount(pBtFrom->pPager);
       
  6505   iSkip = PENDING_BYTE_PAGE(pBtTo);
       
  6506   for(i=1; rc==SQLITE_OK && i<=nPage; i++){
       
  6507     void *pPage;
       
  6508     if( i==iSkip ) continue;
       
  6509     rc = sqlite3pager_get(pBtFrom->pPager, i, &pPage);
       
  6510     if( rc ) break;
       
  6511     rc = sqlite3pager_overwrite(pBtTo->pPager, i, pPage);
       
  6512     if( rc ) break;
       
  6513     sqlite3pager_unref(pPage);
       
  6514   }
       
  6515   for(i=nPage+1; rc==SQLITE_OK && i<=nToPage; i++){
       
  6516     void *pPage;
       
  6517     if( i==iSkip ) continue;
       
  6518     rc = sqlite3pager_get(pBtTo->pPager, i, &pPage);
       
  6519     if( rc ) break;
       
  6520     rc = sqlite3pager_write(pPage);
       
  6521     sqlite3pager_unref(pPage);
       
  6522     sqlite3pager_dont_write(pBtTo->pPager, i);
       
  6523   }
       
  6524   if( !rc && nPage<nToPage ){
       
  6525     rc = sqlite3pager_truncate(pBtTo->pPager, nPage);
       
  6526   }
       
  6527   if( rc ){
       
  6528     sqlite3BtreeRollback(pTo);
       
  6529   }
       
  6530   return rc;  
       
  6531 }
       
  6532 #endif /* SQLITE_OMIT_VACUUM */
       
  6533 
       
  6534 /*
       
  6535 ** Return non-zero if a transaction is active.
       
  6536 */
       
  6537 int sqlite3BtreeIsInTrans(Btree *p){
       
  6538   return (p && (p->inTrans==TRANS_WRITE));
       
  6539 }
       
  6540 
       
  6541 /*
       
  6542 ** Return non-zero if a statement transaction is active.
       
  6543 */
       
  6544 int sqlite3BtreeIsInStmt(Btree *p){
       
  6545   return (p->pBt && p->pBt->inStmt);
       
  6546 }
       
  6547 
       
  6548 /*
       
  6549 ** Return non-zero if a read (or write) transaction is active.
       
  6550 */
       
  6551 int sqlite3BtreeIsInReadTrans(Btree *p){
       
  6552   return (p && (p->inTrans!=TRANS_NONE));
       
  6553 }
       
  6554 
       
  6555 /*
       
  6556 ** This call is a no-op if no write-transaction is currently active on pBt.
       
  6557 **
       
  6558 ** Otherwise, sync the database file for the btree pBt. zMaster points to
       
  6559 ** the name of a master journal file that should be written into the
       
  6560 ** individual journal file, or is NULL, indicating no master journal file 
       
  6561 ** (single database transaction).
       
  6562 **
       
  6563 ** When this is called, the master journal should already have been
       
  6564 ** created, populated with this journal pointer and synced to disk.
       
  6565 **
       
  6566 ** Once this is routine has returned, the only thing required to commit
       
  6567 ** the write-transaction for this database file is to delete the journal.
       
  6568 */
       
  6569 int sqlite3BtreeSync(Btree *p, const char *zMaster){
       
  6570   int rc = SQLITE_OK;
       
  6571   if( p->inTrans==TRANS_WRITE ){
       
  6572     BtShared *pBt = p->pBt;
       
  6573     Pgno nTrunc = 0;
       
  6574 #ifndef SQLITE_OMIT_AUTOVACUUM
       
  6575     if( pBt->autoVacuum ){
       
  6576       rc = autoVacuumCommit(pBt, &nTrunc); 
       
  6577       if( rc!=SQLITE_OK ){
       
  6578         return rc;
       
  6579       }
       
  6580     }
       
  6581 #endif
       
  6582     rc = sqlite3pager_sync(pBt->pPager, zMaster, nTrunc);
       
  6583   }
       
  6584   return rc;
       
  6585 }
       
  6586 
       
  6587 /*
       
  6588 ** This function returns a pointer to a blob of memory associated with
       
  6589 ** a single shared-btree. The memory is used by client code for it's own
       
  6590 ** purposes (for example, to store a high-level schema associated with 
       
  6591 ** the shared-btree). The btree layer manages reference counting issues.
       
  6592 **
       
  6593 ** The first time this is called on a shared-btree, nBytes bytes of memory
       
  6594 ** are allocated, zeroed, and returned to the caller. For each subsequent 
       
  6595 ** call the nBytes parameter is ignored and a pointer to the same blob
       
  6596 ** of memory returned. 
       
  6597 **
       
  6598 ** Just before the shared-btree is closed, the function passed as the 
       
  6599 ** xFree argument when the memory allocation was made is invoked on the 
       
  6600 ** blob of allocated memory. This function should not call sqliteFree()
       
  6601 ** on the memory, the btree layer does that.
       
  6602 */
       
  6603 void *sqlite3BtreeSchema(Btree *p, int nBytes, void(*xFree)(void *)){
       
  6604   BtShared *pBt = p->pBt;
       
  6605   if( !pBt->pSchema ){
       
  6606     pBt->pSchema = sqliteMalloc(nBytes);
       
  6607     pBt->xFreeSchema = xFree;
       
  6608   }
       
  6609   return pBt->pSchema;
       
  6610 }
       
  6611 
       
  6612 /*
       
  6613 ** Return true if another user of the same shared btree as the argument
       
  6614 ** handle holds an exclusive lock on the sqlite_master table.
       
  6615 */
       
  6616 int sqlite3BtreeSchemaLocked(Btree *p){
       
  6617   return (queryTableLock(p, MASTER_ROOT, READ_LOCK)!=SQLITE_OK);
       
  6618 }
       
  6619 
       
  6620 
       
  6621 #ifndef SQLITE_OMIT_SHARED_CACHE
       
  6622 /*
       
  6623 ** Obtain a lock on the table whose root page is iTab.  The
       
  6624 ** lock is a write lock if isWritelock is true or a read lock
       
  6625 ** if it is false.
       
  6626 */
       
  6627 int sqlite3BtreeLockTable(Btree *p, int iTab, u8 isWriteLock){
       
  6628   int rc = SQLITE_OK;
       
  6629   u8 lockType = (isWriteLock?WRITE_LOCK:READ_LOCK);
       
  6630   rc = queryTableLock(p, iTab, lockType);
       
  6631   if( rc==SQLITE_OK ){
       
  6632     rc = lockTable(p, iTab, lockType);
       
  6633   }
       
  6634   return rc;
       
  6635 }
       
  6636 #endif
       
  6637 
       
  6638 /*
       
  6639 ** The following debugging interface has to be in this file (rather
       
  6640 ** than in, for example, test1.c) so that it can get access to
       
  6641 ** the definition of BtShared.
       
  6642 */
       
  6643 #if defined(SQLITE_DEBUG) && defined(TCLSH)
       
  6644 #include <tcl.h>
       
  6645 int sqlite3_shared_cache_report(
       
  6646   void * clientData,
       
  6647   Tcl_Interp *interp,
       
  6648   int objc,
       
  6649   Tcl_Obj *CONST objv[]
       
  6650 ){
       
  6651 #ifndef SQLITE_OMIT_SHARED_CACHE
       
  6652   const ThreadData *pTd = sqlite3ThreadDataReadOnly();
       
  6653   if( pTd->useSharedData ){
       
  6654     BtShared *pBt;
       
  6655     Tcl_Obj *pRet = Tcl_NewObj();
       
  6656     for(pBt=pTd->pBtree; pBt; pBt=pBt->pNext){
       
  6657       const char *zFile = sqlite3pager_filename(pBt->pPager);
       
  6658       Tcl_ListObjAppendElement(interp, pRet, Tcl_NewStringObj(zFile, -1));
       
  6659       Tcl_ListObjAppendElement(interp, pRet, Tcl_NewIntObj(pBt->nRef));
       
  6660     }
       
  6661     Tcl_SetObjResult(interp, pRet);
       
  6662   }
       
  6663 #endif
       
  6664   return TCL_OK;
       
  6665 }
       
  6666 #endif