persistentstorage/sqlite3api/TEST/SRC/test_async.c
changeset 0 08ec8eefde2f
equal deleted inserted replaced
-1:000000000000 0:08ec8eefde2f
       
     1 /*
       
     2 ** 2005 December 14
       
     3 **
       
     4 ** The author disclaims copyright to this source code.  In place of
       
     5 ** a legal notice, here is a blessing:
       
     6 **
       
     7 **    May you do good and not evil.
       
     8 **    May you find forgiveness for yourself and forgive others.
       
     9 **    May you share freely, never taking more than you give.
       
    10 **
       
    11 *************************************************************************
       
    12 **
       
    13 ** $Id: test_async.c,v 1.48 2008/09/26 20:02:50 drh Exp $
       
    14 **
       
    15 ** This file contains an example implementation of an asynchronous IO 
       
    16 ** backend for SQLite.
       
    17 **
       
    18 ** WHAT IS ASYNCHRONOUS I/O?
       
    19 **
       
    20 ** With asynchronous I/O, write requests are handled by a separate thread
       
    21 ** running in the background.  This means that the thread that initiates
       
    22 ** a database write does not have to wait for (sometimes slow) disk I/O
       
    23 ** to occur.  The write seems to happen very quickly, though in reality
       
    24 ** it is happening at its usual slow pace in the background.
       
    25 **
       
    26 ** Asynchronous I/O appears to give better responsiveness, but at a price.
       
    27 ** You lose the Durable property.  With the default I/O backend of SQLite,
       
    28 ** once a write completes, you know that the information you wrote is
       
    29 ** safely on disk.  With the asynchronous I/O, this is not the case.  If
       
    30 ** your program crashes or if a power loss occurs after the database
       
    31 ** write but before the asynchronous write thread has completed, then the
       
    32 ** database change might never make it to disk and the next user of the
       
    33 ** database might not see your change.
       
    34 **
       
    35 ** You lose Durability with asynchronous I/O, but you still retain the
       
    36 ** other parts of ACID:  Atomic,  Consistent, and Isolated.  Many
       
    37 ** appliations get along fine without the Durablity.
       
    38 **
       
    39 ** HOW IT WORKS
       
    40 **
       
    41 ** Asynchronous I/O works by creating a special SQLite "vfs" structure
       
    42 ** and registering it with sqlite3_vfs_register(). When files opened via 
       
    43 ** this vfs are written to (using sqlite3OsWrite()), the data is not 
       
    44 ** written directly to disk, but is placed in the "write-queue" to be
       
    45 ** handled by the background thread.
       
    46 **
       
    47 ** When files opened with the asynchronous vfs are read from 
       
    48 ** (using sqlite3OsRead()), the data is read from the file on 
       
    49 ** disk and the write-queue, so that from the point of view of
       
    50 ** the vfs reader the OsWrite() appears to have already completed.
       
    51 **
       
    52 ** The special vfs is registered (and unregistered) by calls to 
       
    53 ** function asyncEnable() (see below).
       
    54 **
       
    55 ** LIMITATIONS
       
    56 **
       
    57 ** This demonstration code is deliberately kept simple in order to keep
       
    58 ** the main ideas clear and easy to understand.  Real applications that
       
    59 ** want to do asynchronous I/O might want to add additional capabilities.
       
    60 ** For example, in this demonstration if writes are happening at a steady
       
    61 ** stream that exceeds the I/O capability of the background writer thread,
       
    62 ** the queue of pending write operations will grow without bound until we
       
    63 ** run out of memory.  Users of this technique may want to keep track of
       
    64 ** the quantity of pending writes and stop accepting new write requests
       
    65 ** when the buffer gets to be too big.
       
    66 **
       
    67 ** LOCKING + CONCURRENCY
       
    68 **
       
    69 ** Multiple connections from within a single process that use this
       
    70 ** implementation of asynchronous IO may access a single database
       
    71 ** file concurrently. From the point of view of the user, if all
       
    72 ** connections are from within a single process, there is no difference
       
    73 ** between the concurrency offered by "normal" SQLite and SQLite
       
    74 ** using the asynchronous backend.
       
    75 **
       
    76 ** If connections from within multiple database files may access the
       
    77 ** database file, the ENABLE_FILE_LOCKING symbol (see below) must be
       
    78 ** defined. If it is not defined, then no locks are established on 
       
    79 ** the database file. In this case, if multiple processes access 
       
    80 ** the database file, corruption will quickly result.
       
    81 **
       
    82 ** If ENABLE_FILE_LOCKING is defined (the default), then connections 
       
    83 ** from within multiple processes may access a single database file 
       
    84 ** without risking corruption. However concurrency is reduced as
       
    85 ** follows:
       
    86 **
       
    87 **   * When a connection using asynchronous IO begins a database
       
    88 **     transaction, the database is locked immediately. However the
       
    89 **     lock is not released until after all relevant operations
       
    90 **     in the write-queue have been flushed to disk. This means
       
    91 **     (for example) that the database may remain locked for some 
       
    92 **     time after a "COMMIT" or "ROLLBACK" is issued.
       
    93 **
       
    94 **   * If an application using asynchronous IO executes transactions
       
    95 **     in quick succession, other database users may be effectively
       
    96 **     locked out of the database. This is because when a BEGIN
       
    97 **     is executed, a database lock is established immediately. But
       
    98 **     when the corresponding COMMIT or ROLLBACK occurs, the lock
       
    99 **     is not released until the relevant part of the write-queue 
       
   100 **     has been flushed through. As a result, if a COMMIT is followed
       
   101 **     by a BEGIN before the write-queue is flushed through, the database 
       
   102 **     is never unlocked,preventing other processes from accessing 
       
   103 **     the database.
       
   104 **
       
   105 ** Defining ENABLE_FILE_LOCKING when using an NFS or other remote 
       
   106 ** file-system may slow things down, as synchronous round-trips to the 
       
   107 ** server may be required to establish database file locks.
       
   108 */
       
   109 #define ENABLE_FILE_LOCKING
       
   110 
       
   111 #ifndef SQLITE_AMALGAMATION
       
   112 # include "sqlite3.h"
       
   113 # include <assert.h>
       
   114 # include <string.h>
       
   115 #endif
       
   116 #include "tcl.h"
       
   117 
       
   118 /*
       
   119 ** This test uses pthreads and hence only works on unix and with
       
   120 ** a threadsafe build of SQLite.
       
   121 */
       
   122 #if SQLITE_OS_UNIX && SQLITE_THREADSAFE
       
   123 
       
   124 /*
       
   125 ** This demo uses pthreads.  If you do not have a pthreads implementation
       
   126 ** for your operating system, you will need to recode the threading 
       
   127 ** logic.
       
   128 */
       
   129 #include <pthread.h>
       
   130 #include <sched.h>
       
   131 
       
   132 /* Useful macros used in several places */
       
   133 #define MIN(x,y) ((x)<(y)?(x):(y))
       
   134 #define MAX(x,y) ((x)>(y)?(x):(y))
       
   135 
       
   136 /* Forward references */
       
   137 typedef struct AsyncWrite AsyncWrite;
       
   138 typedef struct AsyncFile AsyncFile;
       
   139 typedef struct AsyncFileData AsyncFileData;
       
   140 typedef struct AsyncFileLock AsyncFileLock;
       
   141 typedef struct AsyncLock AsyncLock;
       
   142 
       
   143 /* Enable for debugging */
       
   144 static int sqlite3async_trace = 0;
       
   145 # define ASYNC_TRACE(X) if( sqlite3async_trace ) asyncTrace X
       
   146 static void asyncTrace(const char *zFormat, ...){
       
   147   char *z;
       
   148   va_list ap;
       
   149   va_start(ap, zFormat);
       
   150   z = sqlite3_vmprintf(zFormat, ap);
       
   151   va_end(ap);
       
   152   fprintf(stderr, "[%d] %s", (int)pthread_self(), z);
       
   153   sqlite3_free(z);
       
   154 }
       
   155 
       
   156 /*
       
   157 ** THREAD SAFETY NOTES
       
   158 **
       
   159 ** Basic rules:
       
   160 **
       
   161 **     * Both read and write access to the global write-op queue must be 
       
   162 **       protected by the async.queueMutex. As are the async.ioError and
       
   163 **       async.nFile variables.
       
   164 **
       
   165 **     * The async.pLock list and all AsyncLock and AsyncFileLock
       
   166 **       structures must be protected by the async.lockMutex mutex.
       
   167 **
       
   168 **     * The file handles from the underlying system are not assumed to 
       
   169 **       be thread safe.
       
   170 **
       
   171 **     * See the last two paragraphs under "The Writer Thread" for
       
   172 **       an assumption to do with file-handle synchronization by the Os.
       
   173 **
       
   174 ** Deadlock prevention:
       
   175 **
       
   176 **     There are three mutex used by the system: the "writer" mutex, 
       
   177 **     the "queue" mutex and the "lock" mutex. Rules are:
       
   178 **
       
   179 **     * It is illegal to block on the writer mutex when any other mutex
       
   180 **       are held, and 
       
   181 **
       
   182 **     * It is illegal to block on the queue mutex when the lock mutex
       
   183 **       is held.
       
   184 **
       
   185 **     i.e. mutex's must be grabbed in the order "writer", "queue", "lock".
       
   186 **
       
   187 ** File system operations (invoked by SQLite thread):
       
   188 **
       
   189 **     xOpen
       
   190 **     xDelete
       
   191 **     xFileExists
       
   192 **
       
   193 ** File handle operations (invoked by SQLite thread):
       
   194 **
       
   195 **         asyncWrite, asyncClose, asyncTruncate, asyncSync 
       
   196 **    
       
   197 **     The operations above add an entry to the global write-op list. They
       
   198 **     prepare the entry, acquire the async.queueMutex momentarily while
       
   199 **     list pointers are  manipulated to insert the new entry, then release
       
   200 **     the mutex and signal the writer thread to wake up in case it happens
       
   201 **     to be asleep.
       
   202 **
       
   203 **    
       
   204 **         asyncRead, asyncFileSize.
       
   205 **
       
   206 **     Read operations. Both of these read from both the underlying file
       
   207 **     first then adjust their result based on pending writes in the 
       
   208 **     write-op queue.   So async.queueMutex is held for the duration
       
   209 **     of these operations to prevent other threads from changing the
       
   210 **     queue in mid operation.
       
   211 **    
       
   212 **
       
   213 **         asyncLock, asyncUnlock, asyncCheckReservedLock
       
   214 **    
       
   215 **     These primitives implement in-process locking using a hash table
       
   216 **     on the file name.  Files are locked correctly for connections coming
       
   217 **     from the same process.  But other processes cannot see these locks
       
   218 **     and will therefore not honor them.
       
   219 **
       
   220 **
       
   221 ** The writer thread:
       
   222 **
       
   223 **     The async.writerMutex is used to make sure only there is only
       
   224 **     a single writer thread running at a time.
       
   225 **
       
   226 **     Inside the writer thread is a loop that works like this:
       
   227 **
       
   228 **         WHILE (write-op list is not empty)
       
   229 **             Do IO operation at head of write-op list
       
   230 **             Remove entry from head of write-op list
       
   231 **         END WHILE
       
   232 **
       
   233 **     The async.queueMutex is always held during the <write-op list is 
       
   234 **     not empty> test, and when the entry is removed from the head
       
   235 **     of the write-op list. Sometimes it is held for the interim
       
   236 **     period (while the IO is performed), and sometimes it is
       
   237 **     relinquished. It is relinquished if (a) the IO op is an
       
   238 **     ASYNC_CLOSE or (b) when the file handle was opened, two of
       
   239 **     the underlying systems handles were opened on the same
       
   240 **     file-system entry.
       
   241 **
       
   242 **     If condition (b) above is true, then one file-handle 
       
   243 **     (AsyncFile.pBaseRead) is used exclusively by sqlite threads to read the
       
   244 **     file, the other (AsyncFile.pBaseWrite) by sqlite3_async_flush() 
       
   245 **     threads to perform write() operations. This means that read 
       
   246 **     operations are not blocked by asynchronous writes (although 
       
   247 **     asynchronous writes may still be blocked by reads).
       
   248 **
       
   249 **     This assumes that the OS keeps two handles open on the same file
       
   250 **     properly in sync. That is, any read operation that starts after a
       
   251 **     write operation on the same file system entry has completed returns
       
   252 **     data consistent with the write. We also assume that if one thread 
       
   253 **     reads a file while another is writing it all bytes other than the
       
   254 **     ones actually being written contain valid data.
       
   255 **
       
   256 **     If the above assumptions are not true, set the preprocessor symbol
       
   257 **     SQLITE_ASYNC_TWO_FILEHANDLES to 0.
       
   258 */
       
   259 
       
   260 #ifndef SQLITE_ASYNC_TWO_FILEHANDLES
       
   261 /* #define SQLITE_ASYNC_TWO_FILEHANDLES 0 */
       
   262 #define SQLITE_ASYNC_TWO_FILEHANDLES 1
       
   263 #endif
       
   264 
       
   265 /*
       
   266 ** State information is held in the static variable "async" defined
       
   267 ** as the following structure.
       
   268 **
       
   269 ** Both async.ioError and async.nFile are protected by async.queueMutex.
       
   270 */
       
   271 static struct TestAsyncStaticData {
       
   272   pthread_mutex_t lockMutex;   /* For access to aLock hash table */
       
   273   pthread_mutex_t queueMutex;  /* Mutex for access to write operation queue */
       
   274   pthread_mutex_t writerMutex; /* Prevents multiple writer threads */
       
   275   pthread_cond_t queueSignal;  /* For waking up sleeping writer thread */
       
   276   pthread_cond_t emptySignal;  /* Notify when the write queue is empty */
       
   277   AsyncWrite *pQueueFirst;     /* Next write operation to be processed */
       
   278   AsyncWrite *pQueueLast;      /* Last write operation on the list */
       
   279   AsyncLock *pLock;            /* Linked list of all AsyncLock structures */
       
   280   volatile int ioDelay;             /* Extra delay between write operations */
       
   281   volatile int writerHaltWhenIdle;  /* Writer thread halts when queue empty */
       
   282   volatile int writerHaltNow;       /* Writer thread halts after next op */
       
   283   int ioError;                 /* True if an IO error has occured */
       
   284   int nFile;                   /* Number of open files (from sqlite pov) */
       
   285 } async = {
       
   286   PTHREAD_MUTEX_INITIALIZER,
       
   287   PTHREAD_MUTEX_INITIALIZER,
       
   288   PTHREAD_MUTEX_INITIALIZER,
       
   289   PTHREAD_COND_INITIALIZER,
       
   290   PTHREAD_COND_INITIALIZER,
       
   291 };
       
   292 
       
   293 /* Possible values of AsyncWrite.op */
       
   294 #define ASYNC_NOOP          0
       
   295 #define ASYNC_WRITE         1
       
   296 #define ASYNC_SYNC          2
       
   297 #define ASYNC_TRUNCATE      3
       
   298 #define ASYNC_CLOSE         4
       
   299 #define ASYNC_DELETE        5
       
   300 #define ASYNC_OPENEXCLUSIVE 6
       
   301 #define ASYNC_UNLOCK        7
       
   302 
       
   303 /* Names of opcodes.  Used for debugging only.
       
   304 ** Make sure these stay in sync with the macros above!
       
   305 */
       
   306 static const char *azOpcodeName[] = {
       
   307   "NOOP", "WRITE", "SYNC", "TRUNCATE", "CLOSE", "DELETE", "OPENEX", "UNLOCK"
       
   308 };
       
   309 
       
   310 /*
       
   311 ** Entries on the write-op queue are instances of the AsyncWrite
       
   312 ** structure, defined here.
       
   313 **
       
   314 ** The interpretation of the iOffset and nByte variables varies depending 
       
   315 ** on the value of AsyncWrite.op:
       
   316 **
       
   317 ** ASYNC_NOOP:
       
   318 **     No values used.
       
   319 **
       
   320 ** ASYNC_WRITE:
       
   321 **     iOffset -> Offset in file to write to.
       
   322 **     nByte   -> Number of bytes of data to write (pointed to by zBuf).
       
   323 **
       
   324 ** ASYNC_SYNC:
       
   325 **     nByte   -> flags to pass to sqlite3OsSync().
       
   326 **
       
   327 ** ASYNC_TRUNCATE:
       
   328 **     iOffset -> Size to truncate file to.
       
   329 **     nByte   -> Unused.
       
   330 **
       
   331 ** ASYNC_CLOSE:
       
   332 **     iOffset -> Unused.
       
   333 **     nByte   -> Unused.
       
   334 **
       
   335 ** ASYNC_DELETE:
       
   336 **     iOffset -> Contains the "syncDir" flag.
       
   337 **     nByte   -> Number of bytes of zBuf points to (file name).
       
   338 **
       
   339 ** ASYNC_OPENEXCLUSIVE:
       
   340 **     iOffset -> Value of "delflag".
       
   341 **     nByte   -> Number of bytes of zBuf points to (file name).
       
   342 **
       
   343 ** ASYNC_UNLOCK:
       
   344 **     nByte   -> Argument to sqlite3OsUnlock().
       
   345 **
       
   346 **
       
   347 ** For an ASYNC_WRITE operation, zBuf points to the data to write to the file. 
       
   348 ** This space is sqlite3_malloc()d along with the AsyncWrite structure in a
       
   349 ** single blob, so is deleted when sqlite3_free() is called on the parent 
       
   350 ** structure.
       
   351 */
       
   352 struct AsyncWrite {
       
   353   AsyncFileData *pFileData;    /* File to write data to or sync */
       
   354   int op;                      /* One of ASYNC_xxx etc. */
       
   355   sqlite_int64 iOffset;        /* See above */
       
   356   int nByte;          /* See above */
       
   357   char *zBuf;         /* Data to write to file (or NULL if op!=ASYNC_WRITE) */
       
   358   AsyncWrite *pNext;  /* Next write operation (to any file) */
       
   359 };
       
   360 
       
   361 /*
       
   362 ** An instance of this structure is created for each distinct open file 
       
   363 ** (i.e. if two handles are opened on the one file, only one of these
       
   364 ** structures is allocated) and stored in the async.aLock hash table. The
       
   365 ** keys for async.aLock are the full pathnames of the opened files.
       
   366 **
       
   367 ** AsyncLock.pList points to the head of a linked list of AsyncFileLock
       
   368 ** structures, one for each handle currently open on the file.
       
   369 **
       
   370 ** If the opened file is not a main-database (the SQLITE_OPEN_MAIN_DB is
       
   371 ** not passed to the sqlite3OsOpen() call), or if ENABLE_FILE_LOCKING is 
       
   372 ** not defined at compile time, variables AsyncLock.pFile and 
       
   373 ** AsyncLock.eLock are never used. Otherwise, pFile is a file handle
       
   374 ** opened on the file in question and used to obtain the file-system 
       
   375 ** locks required by database connections within this process.
       
   376 **
       
   377 ** See comments above the asyncLock() function for more details on 
       
   378 ** the implementation of database locking used by this backend.
       
   379 */
       
   380 struct AsyncLock {
       
   381   char *zFile;
       
   382   int nFile;
       
   383   sqlite3_file *pFile;
       
   384   int eLock;
       
   385   AsyncFileLock *pList;
       
   386   AsyncLock *pNext;           /* Next in linked list headed by async.pLock */
       
   387 };
       
   388 
       
   389 /*
       
   390 ** An instance of the following structure is allocated along with each
       
   391 ** AsyncFileData structure (see AsyncFileData.lock), but is only used if the
       
   392 ** file was opened with the SQLITE_OPEN_MAIN_DB.
       
   393 */
       
   394 struct AsyncFileLock {
       
   395   int eLock;                /* Internally visible lock state (sqlite pov) */
       
   396   int eAsyncLock;           /* Lock-state with write-queue unlock */
       
   397   AsyncFileLock *pNext;
       
   398 };
       
   399 
       
   400 /* 
       
   401 ** The AsyncFile structure is a subclass of sqlite3_file used for 
       
   402 ** asynchronous IO. 
       
   403 **
       
   404 ** All of the actual data for the structure is stored in the structure
       
   405 ** pointed to by AsyncFile.pData, which is allocated as part of the
       
   406 ** sqlite3OsOpen() using sqlite3_malloc(). The reason for this is that the
       
   407 ** lifetime of the AsyncFile structure is ended by the caller after OsClose()
       
   408 ** is called, but the data in AsyncFileData may be required by the
       
   409 ** writer thread after that point.
       
   410 */
       
   411 struct AsyncFile {
       
   412   sqlite3_io_methods *pMethod;
       
   413   AsyncFileData *pData;
       
   414 };
       
   415 struct AsyncFileData {
       
   416   char *zName;               /* Underlying OS filename - used for debugging */
       
   417   int nName;                 /* Number of characters in zName */
       
   418   sqlite3_file *pBaseRead;   /* Read handle to the underlying Os file */
       
   419   sqlite3_file *pBaseWrite;  /* Write handle to the underlying Os file */
       
   420   AsyncFileLock lock;        /* Lock state for this handle */
       
   421   AsyncLock *pLock;          /* AsyncLock object for this file system entry */
       
   422   AsyncWrite close;
       
   423 };
       
   424 
       
   425 /*
       
   426 ** The following async_XXX functions are debugging wrappers around the
       
   427 ** corresponding pthread_XXX functions:
       
   428 **
       
   429 **     pthread_mutex_lock();
       
   430 **     pthread_mutex_unlock();
       
   431 **     pthread_mutex_trylock();
       
   432 **     pthread_cond_wait();
       
   433 **
       
   434 ** It is illegal to pass any mutex other than those stored in the
       
   435 ** following global variables of these functions.
       
   436 **
       
   437 **     async.queueMutex
       
   438 **     async.writerMutex
       
   439 **     async.lockMutex
       
   440 **
       
   441 ** If NDEBUG is defined, these wrappers do nothing except call the 
       
   442 ** corresponding pthreads function. If NDEBUG is not defined, then the
       
   443 ** following variables are used to store the thread-id (as returned
       
   444 ** by pthread_self()) currently holding the mutex, or 0 otherwise:
       
   445 **
       
   446 **     asyncdebug.queueMutexHolder
       
   447 **     asyncdebug.writerMutexHolder
       
   448 **     asyncdebug.lockMutexHolder
       
   449 **
       
   450 ** These variables are used by some assert() statements that verify
       
   451 ** the statements made in the "Deadlock Prevention" notes earlier
       
   452 ** in this file.
       
   453 */
       
   454 #ifndef NDEBUG
       
   455 
       
   456 static struct TestAsyncDebugData {
       
   457   pthread_t lockMutexHolder;
       
   458   pthread_t queueMutexHolder;
       
   459   pthread_t writerMutexHolder;
       
   460 } asyncdebug = {0, 0, 0};
       
   461 
       
   462 /*
       
   463 ** Wrapper around pthread_mutex_lock(). Checks that we have not violated
       
   464 ** the anti-deadlock rules (see "Deadlock prevention" above).
       
   465 */
       
   466 static int async_mutex_lock(pthread_mutex_t *pMutex){
       
   467   int iIdx;
       
   468   int rc;
       
   469   pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async);
       
   470   pthread_t *aHolder = (pthread_t *)(&asyncdebug);
       
   471 
       
   472   /* The code in this 'ifndef NDEBUG' block depends on a certain alignment
       
   473    * of the variables in TestAsyncStaticData and TestAsyncDebugData. The
       
   474    * following assert() statements check that this has not been changed.
       
   475    *
       
   476    * Really, these only need to be run once at startup time.
       
   477    */
       
   478   assert(&(aMutex[0])==&async.lockMutex);
       
   479   assert(&(aMutex[1])==&async.queueMutex);
       
   480   assert(&(aMutex[2])==&async.writerMutex);
       
   481   assert(&(aHolder[0])==&asyncdebug.lockMutexHolder);
       
   482   assert(&(aHolder[1])==&asyncdebug.queueMutexHolder);
       
   483   assert(&(aHolder[2])==&asyncdebug.writerMutexHolder);
       
   484 
       
   485   assert( pthread_self()!=0 );
       
   486 
       
   487   for(iIdx=0; iIdx<3; iIdx++){
       
   488     if( pMutex==&aMutex[iIdx] ) break;
       
   489 
       
   490     /* This is the key assert(). Here we are checking that if the caller
       
   491      * is trying to block on async.writerMutex, neither of the other two
       
   492      * mutex are held. If the caller is trying to block on async.queueMutex,
       
   493      * lockMutex is not held.
       
   494      */
       
   495     assert(!pthread_equal(aHolder[iIdx], pthread_self()));
       
   496   }
       
   497   assert(iIdx<3);
       
   498 
       
   499   rc = pthread_mutex_lock(pMutex);
       
   500   if( rc==0 ){
       
   501     assert(aHolder[iIdx]==0);
       
   502     aHolder[iIdx] = pthread_self();
       
   503   }
       
   504   return rc;
       
   505 }
       
   506 
       
   507 /*
       
   508 ** Wrapper around pthread_mutex_unlock().
       
   509 */
       
   510 static int async_mutex_unlock(pthread_mutex_t *pMutex){
       
   511   int iIdx;
       
   512   int rc;
       
   513   pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async);
       
   514   pthread_t *aHolder = (pthread_t *)(&asyncdebug);
       
   515 
       
   516   for(iIdx=0; iIdx<3; iIdx++){
       
   517     if( pMutex==&aMutex[iIdx] ) break;
       
   518   }
       
   519   assert(iIdx<3);
       
   520 
       
   521   assert(pthread_equal(aHolder[iIdx], pthread_self()));
       
   522   aHolder[iIdx] = 0;
       
   523   rc = pthread_mutex_unlock(pMutex);
       
   524   assert(rc==0);
       
   525 
       
   526   return 0;
       
   527 }
       
   528 
       
   529 /*
       
   530 ** Wrapper around pthread_mutex_trylock().
       
   531 */
       
   532 static int async_mutex_trylock(pthread_mutex_t *pMutex){
       
   533   int iIdx;
       
   534   int rc;
       
   535   pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async);
       
   536   pthread_t *aHolder = (pthread_t *)(&asyncdebug);
       
   537 
       
   538   for(iIdx=0; iIdx<3; iIdx++){
       
   539     if( pMutex==&aMutex[iIdx] ) break;
       
   540   }
       
   541   assert(iIdx<3);
       
   542 
       
   543   rc = pthread_mutex_trylock(pMutex);
       
   544   if( rc==0 ){
       
   545     assert(aHolder[iIdx]==0);
       
   546     aHolder[iIdx] = pthread_self();
       
   547   }
       
   548   return rc;
       
   549 }
       
   550 
       
   551 /*
       
   552 ** Wrapper around pthread_cond_wait().
       
   553 */
       
   554 static int async_cond_wait(pthread_cond_t *pCond, pthread_mutex_t *pMutex){
       
   555   int iIdx;
       
   556   int rc;
       
   557   pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async);
       
   558   pthread_t *aHolder = (pthread_t *)(&asyncdebug);
       
   559 
       
   560   for(iIdx=0; iIdx<3; iIdx++){
       
   561     if( pMutex==&aMutex[iIdx] ) break;
       
   562   }
       
   563   assert(iIdx<3);
       
   564 
       
   565   assert(pthread_equal(aHolder[iIdx],pthread_self()));
       
   566   aHolder[iIdx] = 0;
       
   567   rc = pthread_cond_wait(pCond, pMutex);
       
   568   if( rc==0 ){
       
   569     aHolder[iIdx] = pthread_self();
       
   570   }
       
   571   return rc;
       
   572 }
       
   573 
       
   574 /*
       
   575 ** Assert that the mutex is held by the current thread.
       
   576 */
       
   577 static void assert_mutex_is_held(pthread_mutex_t *pMutex){
       
   578   int iIdx;
       
   579   pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async);
       
   580   pthread_t *aHolder = (pthread_t *)(&asyncdebug);
       
   581 
       
   582   for(iIdx=0; iIdx<3; iIdx++){
       
   583     if( pMutex==&aMutex[iIdx] ) break;
       
   584   }
       
   585   assert(iIdx<3);
       
   586   assert( aHolder[iIdx]==pthread_self() );
       
   587 }
       
   588 
       
   589 /* Call our async_XX wrappers instead of selected pthread_XX functions */
       
   590 #define pthread_mutex_lock    async_mutex_lock
       
   591 #define pthread_mutex_unlock  async_mutex_unlock
       
   592 #define pthread_mutex_trylock async_mutex_trylock
       
   593 #define pthread_cond_wait     async_cond_wait
       
   594 
       
   595 #else    /* if defined(NDEBUG) */
       
   596 
       
   597 #define assert_mutex_is_held(X)    /* A no-op when not debugging */
       
   598 
       
   599 #endif   /* !defined(NDEBUG) */
       
   600 
       
   601 /*
       
   602 ** Add an entry to the end of the global write-op list. pWrite should point 
       
   603 ** to an AsyncWrite structure allocated using sqlite3_malloc().  The writer
       
   604 ** thread will call sqlite3_free() to free the structure after the specified
       
   605 ** operation has been completed.
       
   606 **
       
   607 ** Once an AsyncWrite structure has been added to the list, it becomes the
       
   608 ** property of the writer thread and must not be read or modified by the
       
   609 ** caller.  
       
   610 */
       
   611 static void addAsyncWrite(AsyncWrite *pWrite){
       
   612   /* We must hold the queue mutex in order to modify the queue pointers */
       
   613   pthread_mutex_lock(&async.queueMutex);
       
   614 
       
   615   /* Add the record to the end of the write-op queue */
       
   616   assert( !pWrite->pNext );
       
   617   if( async.pQueueLast ){
       
   618     assert( async.pQueueFirst );
       
   619     async.pQueueLast->pNext = pWrite;
       
   620   }else{
       
   621     async.pQueueFirst = pWrite;
       
   622   }
       
   623   async.pQueueLast = pWrite;
       
   624   ASYNC_TRACE(("PUSH %p (%s %s %d)\n", pWrite, azOpcodeName[pWrite->op],
       
   625          pWrite->pFileData ? pWrite->pFileData->zName : "-", pWrite->iOffset));
       
   626 
       
   627   if( pWrite->op==ASYNC_CLOSE ){
       
   628     async.nFile--;
       
   629   }
       
   630 
       
   631   /* Drop the queue mutex */
       
   632   pthread_mutex_unlock(&async.queueMutex);
       
   633 
       
   634   /* The writer thread might have been idle because there was nothing
       
   635   ** on the write-op queue for it to do.  So wake it up. */
       
   636   pthread_cond_signal(&async.queueSignal);
       
   637 }
       
   638 
       
   639 /*
       
   640 ** Increment async.nFile in a thread-safe manner.
       
   641 */
       
   642 static void incrOpenFileCount(){
       
   643   /* We must hold the queue mutex in order to modify async.nFile */
       
   644   pthread_mutex_lock(&async.queueMutex);
       
   645   if( async.nFile==0 ){
       
   646     async.ioError = SQLITE_OK;
       
   647   }
       
   648   async.nFile++;
       
   649   pthread_mutex_unlock(&async.queueMutex);
       
   650 }
       
   651 
       
   652 /*
       
   653 ** This is a utility function to allocate and populate a new AsyncWrite
       
   654 ** structure and insert it (via addAsyncWrite() ) into the global list.
       
   655 */
       
   656 static int addNewAsyncWrite(
       
   657   AsyncFileData *pFileData, 
       
   658   int op, 
       
   659   sqlite3_int64 iOffset, 
       
   660   int nByte,
       
   661   const char *zByte
       
   662 ){
       
   663   AsyncWrite *p;
       
   664   if( op!=ASYNC_CLOSE && async.ioError ){
       
   665     return async.ioError;
       
   666   }
       
   667   p = sqlite3_malloc(sizeof(AsyncWrite) + (zByte?nByte:0));
       
   668   if( !p ){
       
   669     /* The upper layer does not expect operations like OsWrite() to
       
   670     ** return SQLITE_NOMEM. This is partly because under normal conditions
       
   671     ** SQLite is required to do rollback without calling malloc(). So
       
   672     ** if malloc() fails here, treat it as an I/O error. The above
       
   673     ** layer knows how to handle that.
       
   674     */
       
   675     return SQLITE_IOERR;
       
   676   }
       
   677   p->op = op;
       
   678   p->iOffset = iOffset;
       
   679   p->nByte = nByte;
       
   680   p->pFileData = pFileData;
       
   681   p->pNext = 0;
       
   682   if( zByte ){
       
   683     p->zBuf = (char *)&p[1];
       
   684     memcpy(p->zBuf, zByte, nByte);
       
   685   }else{
       
   686     p->zBuf = 0;
       
   687   }
       
   688   addAsyncWrite(p);
       
   689   return SQLITE_OK;
       
   690 }
       
   691 
       
   692 /*
       
   693 ** Close the file. This just adds an entry to the write-op list, the file is
       
   694 ** not actually closed.
       
   695 */
       
   696 static int asyncClose(sqlite3_file *pFile){
       
   697   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
       
   698 
       
   699   /* Unlock the file, if it is locked */
       
   700   pthread_mutex_lock(&async.lockMutex);
       
   701   p->lock.eLock = 0;
       
   702   pthread_mutex_unlock(&async.lockMutex);
       
   703 
       
   704   addAsyncWrite(&p->close);
       
   705   return SQLITE_OK;
       
   706 }
       
   707 
       
   708 /*
       
   709 ** Implementation of sqlite3OsWrite() for asynchronous files. Instead of 
       
   710 ** writing to the underlying file, this function adds an entry to the end of
       
   711 ** the global AsyncWrite list. Either SQLITE_OK or SQLITE_NOMEM may be
       
   712 ** returned.
       
   713 */
       
   714 static int asyncWrite(
       
   715   sqlite3_file *pFile, 
       
   716   const void *pBuf, 
       
   717   int amt, 
       
   718   sqlite3_int64 iOff
       
   719 ){
       
   720   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
       
   721   return addNewAsyncWrite(p, ASYNC_WRITE, iOff, amt, pBuf);
       
   722 }
       
   723 
       
   724 /*
       
   725 ** Read data from the file. First we read from the filesystem, then adjust 
       
   726 ** the contents of the buffer based on ASYNC_WRITE operations in the 
       
   727 ** write-op queue.
       
   728 **
       
   729 ** This method holds the mutex from start to finish.
       
   730 */
       
   731 static int asyncRead(
       
   732   sqlite3_file *pFile, 
       
   733   void *zOut, 
       
   734   int iAmt, 
       
   735   sqlite3_int64 iOffset
       
   736 ){
       
   737   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
       
   738   int rc = SQLITE_OK;
       
   739   sqlite3_int64 filesize;
       
   740   int nRead;
       
   741   sqlite3_file *pBase = p->pBaseRead;
       
   742 
       
   743   /* Grab the write queue mutex for the duration of the call */
       
   744   pthread_mutex_lock(&async.queueMutex);
       
   745 
       
   746   /* If an I/O error has previously occurred in this virtual file 
       
   747   ** system, then all subsequent operations fail.
       
   748   */
       
   749   if( async.ioError!=SQLITE_OK ){
       
   750     rc = async.ioError;
       
   751     goto asyncread_out;
       
   752   }
       
   753 
       
   754   if( pBase->pMethods ){
       
   755     rc = pBase->pMethods->xFileSize(pBase, &filesize);
       
   756     if( rc!=SQLITE_OK ){
       
   757       goto asyncread_out;
       
   758     }
       
   759     nRead = MIN(filesize - iOffset, iAmt);
       
   760     if( nRead>0 ){
       
   761       rc = pBase->pMethods->xRead(pBase, zOut, nRead, iOffset);
       
   762       ASYNC_TRACE(("READ %s %d bytes at %d\n", p->zName, nRead, iOffset));
       
   763     }
       
   764   }
       
   765 
       
   766   if( rc==SQLITE_OK ){
       
   767     AsyncWrite *pWrite;
       
   768     char *zName = p->zName;
       
   769 
       
   770     for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
       
   771       if( pWrite->op==ASYNC_WRITE && (
       
   772         (pWrite->pFileData==p) ||
       
   773         (zName && pWrite->pFileData->zName==zName)
       
   774       )){
       
   775         int iBeginOut = (pWrite->iOffset-iOffset);
       
   776         int iBeginIn = -iBeginOut;
       
   777         int nCopy;
       
   778 
       
   779         if( iBeginIn<0 ) iBeginIn = 0;
       
   780         if( iBeginOut<0 ) iBeginOut = 0;
       
   781         nCopy = MIN(pWrite->nByte-iBeginIn, iAmt-iBeginOut);
       
   782 
       
   783         if( nCopy>0 ){
       
   784           memcpy(&((char *)zOut)[iBeginOut], &pWrite->zBuf[iBeginIn], nCopy);
       
   785           ASYNC_TRACE(("OVERREAD %d bytes at %d\n", nCopy, iBeginOut+iOffset));
       
   786         }
       
   787       }
       
   788     }
       
   789   }
       
   790 
       
   791 asyncread_out:
       
   792   pthread_mutex_unlock(&async.queueMutex);
       
   793   return rc;
       
   794 }
       
   795 
       
   796 /*
       
   797 ** Truncate the file to nByte bytes in length. This just adds an entry to 
       
   798 ** the write-op list, no IO actually takes place.
       
   799 */
       
   800 static int asyncTruncate(sqlite3_file *pFile, sqlite3_int64 nByte){
       
   801   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
       
   802   return addNewAsyncWrite(p, ASYNC_TRUNCATE, nByte, 0, 0);
       
   803 }
       
   804 
       
   805 /*
       
   806 ** Sync the file. This just adds an entry to the write-op list, the 
       
   807 ** sync() is done later by sqlite3_async_flush().
       
   808 */
       
   809 static int asyncSync(sqlite3_file *pFile, int flags){
       
   810   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
       
   811   return addNewAsyncWrite(p, ASYNC_SYNC, 0, flags, 0);
       
   812 }
       
   813 
       
   814 /*
       
   815 ** Read the size of the file. First we read the size of the file system 
       
   816 ** entry, then adjust for any ASYNC_WRITE or ASYNC_TRUNCATE operations 
       
   817 ** currently in the write-op list. 
       
   818 **
       
   819 ** This method holds the mutex from start to finish.
       
   820 */
       
   821 int asyncFileSize(sqlite3_file *pFile, sqlite3_int64 *piSize){
       
   822   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
       
   823   int rc = SQLITE_OK;
       
   824   sqlite3_int64 s = 0;
       
   825   sqlite3_file *pBase;
       
   826 
       
   827   pthread_mutex_lock(&async.queueMutex);
       
   828 
       
   829   /* Read the filesystem size from the base file. If pBaseRead is NULL, this
       
   830   ** means the file hasn't been opened yet. In this case all relevant data 
       
   831   ** must be in the write-op queue anyway, so we can omit reading from the
       
   832   ** file-system.
       
   833   */
       
   834   pBase = p->pBaseRead;
       
   835   if( pBase->pMethods ){
       
   836     rc = pBase->pMethods->xFileSize(pBase, &s);
       
   837   }
       
   838 
       
   839   if( rc==SQLITE_OK ){
       
   840     AsyncWrite *pWrite;
       
   841     for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
       
   842       if( pWrite->op==ASYNC_DELETE 
       
   843        && p->zName 
       
   844        && strcmp(p->zName, pWrite->zBuf)==0 
       
   845       ){
       
   846         s = 0;
       
   847       }else if( pWrite->pFileData && (
       
   848           (pWrite->pFileData==p) 
       
   849        || (p->zName && pWrite->pFileData->zName==p->zName) 
       
   850       )){
       
   851         switch( pWrite->op ){
       
   852           case ASYNC_WRITE:
       
   853             s = MAX(pWrite->iOffset + (sqlite3_int64)(pWrite->nByte), s);
       
   854             break;
       
   855           case ASYNC_TRUNCATE:
       
   856             s = MIN(s, pWrite->iOffset);
       
   857             break;
       
   858         }
       
   859       }
       
   860     }
       
   861     *piSize = s;
       
   862   }
       
   863   pthread_mutex_unlock(&async.queueMutex);
       
   864   return rc;
       
   865 }
       
   866 
       
   867 /*
       
   868 ** Lock or unlock the actual file-system entry.
       
   869 */
       
   870 static int getFileLock(AsyncLock *pLock){
       
   871   int rc = SQLITE_OK;
       
   872   AsyncFileLock *pIter;
       
   873   int eRequired = 0;
       
   874 
       
   875   if( pLock->pFile ){
       
   876     for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
       
   877       assert(pIter->eAsyncLock>=pIter->eLock);
       
   878       if( pIter->eAsyncLock>eRequired ){
       
   879         eRequired = pIter->eAsyncLock;
       
   880         assert(eRequired>=0 && eRequired<=SQLITE_LOCK_EXCLUSIVE);
       
   881       }
       
   882     }
       
   883 
       
   884     if( eRequired>pLock->eLock ){
       
   885       rc = pLock->pFile->pMethods->xLock(pLock->pFile, eRequired);
       
   886       if( rc==SQLITE_OK ){
       
   887         pLock->eLock = eRequired;
       
   888       }
       
   889     }
       
   890     else if( eRequired<pLock->eLock && eRequired<=SQLITE_LOCK_SHARED ){
       
   891       rc = pLock->pFile->pMethods->xUnlock(pLock->pFile, eRequired);
       
   892       if( rc==SQLITE_OK ){
       
   893         pLock->eLock = eRequired;
       
   894       }
       
   895     }
       
   896   }
       
   897 
       
   898   return rc;
       
   899 }
       
   900 
       
   901 /*
       
   902 ** Return the AsyncLock structure from the global async.pLock list 
       
   903 ** associated with the file-system entry identified by path zName 
       
   904 ** (a string of nName bytes). If no such structure exists, return 0.
       
   905 */
       
   906 static AsyncLock *findLock(const char *zName, int nName){
       
   907   AsyncLock *p = async.pLock;
       
   908   while( p && (p->nFile!=nName || memcmp(p->zFile, zName, nName)) ){
       
   909     p = p->pNext;
       
   910   }
       
   911   return p;
       
   912 }
       
   913 
       
   914 /*
       
   915 ** The following two methods - asyncLock() and asyncUnlock() - are used
       
   916 ** to obtain and release locks on database files opened with the
       
   917 ** asynchronous backend.
       
   918 */
       
   919 static int asyncLock(sqlite3_file *pFile, int eLock){
       
   920   int rc = SQLITE_OK;
       
   921   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
       
   922 
       
   923   if( p->zName ){
       
   924     pthread_mutex_lock(&async.lockMutex);
       
   925     if( p->lock.eLock<eLock ){
       
   926       AsyncLock *pLock = p->pLock;
       
   927       AsyncFileLock *pIter;
       
   928       assert(pLock && pLock->pList);
       
   929       for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
       
   930         if( pIter!=&p->lock && (
       
   931           (eLock==SQLITE_LOCK_EXCLUSIVE && pIter->eLock>=SQLITE_LOCK_SHARED) ||
       
   932           (eLock==SQLITE_LOCK_PENDING && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
       
   933           (eLock==SQLITE_LOCK_RESERVED && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
       
   934           (eLock==SQLITE_LOCK_SHARED && pIter->eLock>=SQLITE_LOCK_PENDING)
       
   935         )){
       
   936           rc = SQLITE_BUSY;
       
   937         }
       
   938       }
       
   939       if( rc==SQLITE_OK ){
       
   940         p->lock.eLock = eLock;
       
   941         p->lock.eAsyncLock = MAX(p->lock.eAsyncLock, eLock);
       
   942       }
       
   943       assert(p->lock.eAsyncLock>=p->lock.eLock);
       
   944       if( rc==SQLITE_OK ){
       
   945         rc = getFileLock(pLock);
       
   946       }
       
   947     }
       
   948     pthread_mutex_unlock(&async.lockMutex);
       
   949   }
       
   950 
       
   951   ASYNC_TRACE(("LOCK %d (%s) rc=%d\n", eLock, p->zName, rc));
       
   952   return rc;
       
   953 }
       
   954 static int asyncUnlock(sqlite3_file *pFile, int eLock){
       
   955   int rc = SQLITE_OK;
       
   956   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
       
   957   if( p->zName ){
       
   958     AsyncFileLock *pLock = &p->lock;
       
   959     pthread_mutex_lock(&async.lockMutex);
       
   960     pLock->eLock = MIN(pLock->eLock, eLock);
       
   961     pthread_mutex_unlock(&async.lockMutex);
       
   962     rc = addNewAsyncWrite(p, ASYNC_UNLOCK, 0, eLock, 0);
       
   963   }
       
   964   return rc;
       
   965 }
       
   966 
       
   967 /*
       
   968 ** This function is called when the pager layer first opens a database file
       
   969 ** and is checking for a hot-journal.
       
   970 */
       
   971 static int asyncCheckReservedLock(sqlite3_file *pFile, int *pResOut){
       
   972   int ret = 0;
       
   973   AsyncFileLock *pIter;
       
   974   AsyncFileData *p = ((AsyncFile *)pFile)->pData;
       
   975 
       
   976   pthread_mutex_lock(&async.lockMutex);
       
   977   for(pIter=p->pLock->pList; pIter; pIter=pIter->pNext){
       
   978     if( pIter->eLock>=SQLITE_LOCK_RESERVED ){
       
   979       ret = 1;
       
   980     }
       
   981   }
       
   982   pthread_mutex_unlock(&async.lockMutex);
       
   983 
       
   984   ASYNC_TRACE(("CHECK-LOCK %d (%s)\n", ret, p->zName));
       
   985   *pResOut = ret;
       
   986   return SQLITE_OK;
       
   987 }
       
   988 
       
   989 /* 
       
   990 ** sqlite3_file_control() implementation.
       
   991 */
       
   992 static int asyncFileControl(sqlite3_file *id, int op, void *pArg){
       
   993   switch( op ){
       
   994     case SQLITE_FCNTL_LOCKSTATE: {
       
   995       pthread_mutex_lock(&async.lockMutex);
       
   996       *(int*)pArg = ((AsyncFile*)id)->pData->lock.eLock;
       
   997       pthread_mutex_unlock(&async.lockMutex);
       
   998       return SQLITE_OK;
       
   999     }
       
  1000   }
       
  1001   return SQLITE_ERROR;
       
  1002 }
       
  1003 
       
  1004 /* 
       
  1005 ** Return the device characteristics and sector-size of the device. It
       
  1006 ** is not tricky to implement these correctly, as this backend might 
       
  1007 ** not have an open file handle at this point.
       
  1008 */
       
  1009 static int asyncSectorSize(sqlite3_file *pFile){
       
  1010   return 512;
       
  1011 }
       
  1012 static int asyncDeviceCharacteristics(sqlite3_file *pFile){
       
  1013   return 0;
       
  1014 }
       
  1015 
       
  1016 static int unlinkAsyncFile(AsyncFileData *pData){
       
  1017   AsyncFileLock **ppIter;
       
  1018   int rc = SQLITE_OK;
       
  1019 
       
  1020   if( pData->zName ){
       
  1021     AsyncLock *pLock = pData->pLock;
       
  1022     for(ppIter=&pLock->pList; *ppIter; ppIter=&((*ppIter)->pNext)){
       
  1023       if( (*ppIter)==&pData->lock ){
       
  1024         *ppIter = pData->lock.pNext;
       
  1025         break;
       
  1026       }
       
  1027     }
       
  1028     if( !pLock->pList ){
       
  1029       AsyncLock **pp;
       
  1030       if( pLock->pFile ){
       
  1031         pLock->pFile->pMethods->xClose(pLock->pFile);
       
  1032       }
       
  1033       for(pp=&async.pLock; *pp!=pLock; pp=&((*pp)->pNext));
       
  1034       *pp = pLock->pNext;
       
  1035       sqlite3_free(pLock);
       
  1036     }else{
       
  1037       rc = getFileLock(pLock);
       
  1038     }
       
  1039   }
       
  1040 
       
  1041   return rc;
       
  1042 }
       
  1043 
       
  1044 /*
       
  1045 ** Open a file.
       
  1046 */
       
  1047 static int asyncOpen(
       
  1048   sqlite3_vfs *pAsyncVfs,
       
  1049   const char *zName,
       
  1050   sqlite3_file *pFile,
       
  1051   int flags,
       
  1052   int *pOutFlags
       
  1053 ){
       
  1054   static sqlite3_io_methods async_methods = {
       
  1055     1,                               /* iVersion */
       
  1056     asyncClose,                      /* xClose */
       
  1057     asyncRead,                       /* xRead */
       
  1058     asyncWrite,                      /* xWrite */
       
  1059     asyncTruncate,                   /* xTruncate */
       
  1060     asyncSync,                       /* xSync */
       
  1061     asyncFileSize,                   /* xFileSize */
       
  1062     asyncLock,                       /* xLock */
       
  1063     asyncUnlock,                     /* xUnlock */
       
  1064     asyncCheckReservedLock,          /* xCheckReservedLock */
       
  1065     asyncFileControl,                /* xFileControl */
       
  1066     asyncSectorSize,                 /* xSectorSize */
       
  1067     asyncDeviceCharacteristics       /* xDeviceCharacteristics */
       
  1068   };
       
  1069 
       
  1070   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
       
  1071   AsyncFile *p = (AsyncFile *)pFile;
       
  1072   int nName = 0;
       
  1073   int rc = SQLITE_OK;
       
  1074   int nByte;
       
  1075   AsyncFileData *pData;
       
  1076   AsyncLock *pLock = 0;
       
  1077   char *z;
       
  1078   int isExclusive = (flags&SQLITE_OPEN_EXCLUSIVE);
       
  1079 
       
  1080   /* If zName is NULL, then the upper layer is requesting an anonymous file */
       
  1081   if( zName ){
       
  1082     nName = strlen(zName)+1;
       
  1083   }
       
  1084 
       
  1085   nByte = (
       
  1086     sizeof(AsyncFileData) +        /* AsyncFileData structure */
       
  1087     2 * pVfs->szOsFile +           /* AsyncFileData.pBaseRead and pBaseWrite */
       
  1088     nName                          /* AsyncFileData.zName */
       
  1089   ); 
       
  1090   z = sqlite3_malloc(nByte);
       
  1091   if( !z ){
       
  1092     return SQLITE_NOMEM;
       
  1093   }
       
  1094   memset(z, 0, nByte);
       
  1095   pData = (AsyncFileData*)z;
       
  1096   z += sizeof(pData[0]);
       
  1097   pData->pBaseRead = (sqlite3_file*)z;
       
  1098   z += pVfs->szOsFile;
       
  1099   pData->pBaseWrite = (sqlite3_file*)z;
       
  1100   pData->close.pFileData = pData;
       
  1101   pData->close.op = ASYNC_CLOSE;
       
  1102 
       
  1103   if( zName ){
       
  1104     z += pVfs->szOsFile;
       
  1105     pData->zName = z;
       
  1106     pData->nName = nName;
       
  1107     memcpy(pData->zName, zName, nName);
       
  1108   }
       
  1109 
       
  1110   if( !isExclusive ){
       
  1111     rc = pVfs->xOpen(pVfs, zName, pData->pBaseRead, flags, pOutFlags);
       
  1112     if( rc==SQLITE_OK && ((*pOutFlags)&SQLITE_OPEN_READWRITE) ){
       
  1113       rc = pVfs->xOpen(pVfs, zName, pData->pBaseWrite, flags, 0);
       
  1114     }
       
  1115   }
       
  1116 
       
  1117   pthread_mutex_lock(&async.lockMutex);
       
  1118 
       
  1119   if( zName && rc==SQLITE_OK ){
       
  1120     pLock = findLock(pData->zName, pData->nName);
       
  1121     if( !pLock ){
       
  1122       int nByte = pVfs->szOsFile + sizeof(AsyncLock) + pData->nName + 1; 
       
  1123       pLock = (AsyncLock *)sqlite3_malloc(nByte);
       
  1124       if( pLock ){
       
  1125         memset(pLock, 0, nByte);
       
  1126 #ifdef ENABLE_FILE_LOCKING
       
  1127         if( flags&SQLITE_OPEN_MAIN_DB ){
       
  1128           pLock->pFile = (sqlite3_file *)&pLock[1];
       
  1129           rc = pVfs->xOpen(pVfs, zName, pLock->pFile, flags, 0);
       
  1130           if( rc!=SQLITE_OK ){
       
  1131             sqlite3_free(pLock);
       
  1132             pLock = 0;
       
  1133           }
       
  1134         }
       
  1135 #endif
       
  1136         if( pLock ){
       
  1137           pLock->nFile = pData->nName;
       
  1138           pLock->zFile = &((char *)(&pLock[1]))[pVfs->szOsFile];
       
  1139           memcpy(pLock->zFile, pData->zName, pLock->nFile);
       
  1140           pLock->pNext = async.pLock;
       
  1141           async.pLock = pLock;
       
  1142         }
       
  1143       }else{
       
  1144         rc = SQLITE_NOMEM;
       
  1145       }
       
  1146     }
       
  1147   }
       
  1148 
       
  1149   if( rc==SQLITE_OK ){
       
  1150     p->pMethod = &async_methods;
       
  1151     p->pData = pData;
       
  1152 
       
  1153     /* Link AsyncFileData.lock into the linked list of 
       
  1154     ** AsyncFileLock structures for this file.
       
  1155     */
       
  1156     if( zName ){
       
  1157       pData->lock.pNext = pLock->pList;
       
  1158       pLock->pList = &pData->lock;
       
  1159       pData->zName = pLock->zFile;
       
  1160     }
       
  1161   }else{
       
  1162     if( pData->pBaseRead->pMethods ){
       
  1163       pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
       
  1164     }
       
  1165     if( pData->pBaseWrite->pMethods ){
       
  1166       pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
       
  1167     }
       
  1168     sqlite3_free(pData);
       
  1169   }
       
  1170 
       
  1171   pthread_mutex_unlock(&async.lockMutex);
       
  1172 
       
  1173   if( rc==SQLITE_OK ){
       
  1174     incrOpenFileCount();
       
  1175     pData->pLock = pLock;
       
  1176   }
       
  1177 
       
  1178   if( rc==SQLITE_OK && isExclusive ){
       
  1179     rc = addNewAsyncWrite(pData, ASYNC_OPENEXCLUSIVE, (sqlite3_int64)flags,0,0);
       
  1180     if( rc==SQLITE_OK ){
       
  1181       if( pOutFlags ) *pOutFlags = flags;
       
  1182     }else{
       
  1183       pthread_mutex_lock(&async.lockMutex);
       
  1184       unlinkAsyncFile(pData);
       
  1185       pthread_mutex_unlock(&async.lockMutex);
       
  1186       sqlite3_free(pData);
       
  1187     }
       
  1188   }
       
  1189   return rc;
       
  1190 }
       
  1191 
       
  1192 /*
       
  1193 ** Implementation of sqlite3OsDelete. Add an entry to the end of the 
       
  1194 ** write-op queue to perform the delete.
       
  1195 */
       
  1196 static int asyncDelete(sqlite3_vfs *pAsyncVfs, const char *z, int syncDir){
       
  1197   return addNewAsyncWrite(0, ASYNC_DELETE, syncDir, strlen(z)+1, z);
       
  1198 }
       
  1199 
       
  1200 /*
       
  1201 ** Implementation of sqlite3OsAccess. This method holds the mutex from
       
  1202 ** start to finish.
       
  1203 */
       
  1204 static int asyncAccess(
       
  1205   sqlite3_vfs *pAsyncVfs, 
       
  1206   const char *zName, 
       
  1207   int flags,
       
  1208   int *pResOut
       
  1209 ){
       
  1210   int rc;
       
  1211   int ret;
       
  1212   AsyncWrite *p;
       
  1213   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
       
  1214 
       
  1215   assert(flags==SQLITE_ACCESS_READWRITE 
       
  1216       || flags==SQLITE_ACCESS_READ 
       
  1217       || flags==SQLITE_ACCESS_EXISTS 
       
  1218   );
       
  1219 
       
  1220   pthread_mutex_lock(&async.queueMutex);
       
  1221   rc = pVfs->xAccess(pVfs, zName, flags, &ret);
       
  1222   if( rc==SQLITE_OK && flags==SQLITE_ACCESS_EXISTS ){
       
  1223     for(p=async.pQueueFirst; p; p = p->pNext){
       
  1224       if( p->op==ASYNC_DELETE && 0==strcmp(p->zBuf, zName) ){
       
  1225         ret = 0;
       
  1226       }else if( p->op==ASYNC_OPENEXCLUSIVE 
       
  1227              && p->pFileData->zName
       
  1228              && 0==strcmp(p->pFileData->zName, zName) 
       
  1229       ){
       
  1230         ret = 1;
       
  1231       }
       
  1232     }
       
  1233   }
       
  1234   ASYNC_TRACE(("ACCESS(%s): %s = %d\n", 
       
  1235     flags==SQLITE_ACCESS_READWRITE?"read-write":
       
  1236     flags==SQLITE_ACCESS_READ?"read":"exists"
       
  1237     , zName, ret)
       
  1238   );
       
  1239   pthread_mutex_unlock(&async.queueMutex);
       
  1240   *pResOut = ret;
       
  1241   return rc;
       
  1242 }
       
  1243 
       
  1244 /*
       
  1245 ** Fill in zPathOut with the full path to the file identified by zPath.
       
  1246 */
       
  1247 static int asyncFullPathname(
       
  1248   sqlite3_vfs *pAsyncVfs, 
       
  1249   const char *zPath, 
       
  1250   int nPathOut,
       
  1251   char *zPathOut
       
  1252 ){
       
  1253   int rc;
       
  1254   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
       
  1255   rc = pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut);
       
  1256 
       
  1257   /* Because of the way intra-process file locking works, this backend
       
  1258   ** needs to return a canonical path. The following block assumes the
       
  1259   ** file-system uses unix style paths. 
       
  1260   */
       
  1261   if( rc==SQLITE_OK ){
       
  1262     int iIn;
       
  1263     int iOut = 0;
       
  1264     int nPathOut = strlen(zPathOut);
       
  1265 
       
  1266     for(iIn=0; iIn<nPathOut; iIn++){
       
  1267 
       
  1268       /* Replace any occurences of "//" with "/" */
       
  1269       if( iIn<=(nPathOut-2) && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='/'
       
  1270       ){
       
  1271         continue;
       
  1272       }
       
  1273 
       
  1274       /* Replace any occurences of "/./" with "/" */
       
  1275       if( iIn<=(nPathOut-3) 
       
  1276        && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='.' && zPathOut[iIn+2]=='/'
       
  1277       ){
       
  1278         iIn++;
       
  1279         continue;
       
  1280       }
       
  1281 
       
  1282       /* Replace any occurences of "<path-component>/../" with "" */
       
  1283       if( iOut>0 && iIn<=(nPathOut-4) 
       
  1284        && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='.' 
       
  1285        && zPathOut[iIn+2]=='.' && zPathOut[iIn+3]=='/'
       
  1286       ){
       
  1287         iIn += 3;
       
  1288         iOut--;
       
  1289         for( ; iOut>0 && zPathOut[iOut-1]!='/'; iOut--);
       
  1290         continue;
       
  1291       }
       
  1292 
       
  1293       zPathOut[iOut++] = zPathOut[iIn];
       
  1294     }
       
  1295     zPathOut[iOut] = '\0';
       
  1296   }
       
  1297 
       
  1298   return rc;
       
  1299 }
       
  1300 static void *asyncDlOpen(sqlite3_vfs *pAsyncVfs, const char *zPath){
       
  1301   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
       
  1302   return pVfs->xDlOpen(pVfs, zPath);
       
  1303 }
       
  1304 static void asyncDlError(sqlite3_vfs *pAsyncVfs, int nByte, char *zErrMsg){
       
  1305   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
       
  1306   pVfs->xDlError(pVfs, nByte, zErrMsg);
       
  1307 }
       
  1308 static void *asyncDlSym(
       
  1309   sqlite3_vfs *pAsyncVfs, 
       
  1310   void *pHandle, 
       
  1311   const char *zSymbol
       
  1312 ){
       
  1313   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
       
  1314   return pVfs->xDlSym(pVfs, pHandle, zSymbol);
       
  1315 }
       
  1316 static void asyncDlClose(sqlite3_vfs *pAsyncVfs, void *pHandle){
       
  1317   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
       
  1318   pVfs->xDlClose(pVfs, pHandle);
       
  1319 }
       
  1320 static int asyncRandomness(sqlite3_vfs *pAsyncVfs, int nByte, char *zBufOut){
       
  1321   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
       
  1322   return pVfs->xRandomness(pVfs, nByte, zBufOut);
       
  1323 }
       
  1324 static int asyncSleep(sqlite3_vfs *pAsyncVfs, int nMicro){
       
  1325   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
       
  1326   return pVfs->xSleep(pVfs, nMicro);
       
  1327 }
       
  1328 static int asyncCurrentTime(sqlite3_vfs *pAsyncVfs, double *pTimeOut){
       
  1329   sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
       
  1330   return pVfs->xCurrentTime(pVfs, pTimeOut);
       
  1331 }
       
  1332 
       
  1333 static sqlite3_vfs async_vfs = {
       
  1334   1,                    /* iVersion */
       
  1335   sizeof(AsyncFile),    /* szOsFile */
       
  1336   0,                    /* mxPathname */
       
  1337   0,                    /* pNext */
       
  1338   "async",              /* zName */
       
  1339   0,                    /* pAppData */
       
  1340   asyncOpen,            /* xOpen */
       
  1341   asyncDelete,          /* xDelete */
       
  1342   asyncAccess,          /* xAccess */
       
  1343   asyncFullPathname,    /* xFullPathname */
       
  1344   asyncDlOpen,          /* xDlOpen */
       
  1345   asyncDlError,         /* xDlError */
       
  1346   asyncDlSym,           /* xDlSym */
       
  1347   asyncDlClose,         /* xDlClose */
       
  1348   asyncRandomness,      /* xDlError */
       
  1349   asyncSleep,           /* xDlSym */
       
  1350   asyncCurrentTime      /* xDlClose */
       
  1351 };
       
  1352 
       
  1353 /*
       
  1354 ** Call this routine to enable or disable the
       
  1355 ** asynchronous IO features implemented in this file. 
       
  1356 **
       
  1357 ** This routine is not even remotely threadsafe.  Do not call
       
  1358 ** this routine while any SQLite database connections are open.
       
  1359 */
       
  1360 static void asyncEnable(int enable){
       
  1361   if( enable ){
       
  1362     if( !async_vfs.pAppData ){
       
  1363       async_vfs.pAppData = (void *)sqlite3_vfs_find(0);
       
  1364       async_vfs.mxPathname = ((sqlite3_vfs *)async_vfs.pAppData)->mxPathname;
       
  1365       sqlite3_vfs_register(&async_vfs, 1);
       
  1366     }
       
  1367   }else{
       
  1368     if( async_vfs.pAppData ){
       
  1369       sqlite3_vfs_unregister(&async_vfs);
       
  1370       async_vfs.pAppData = 0;
       
  1371     }
       
  1372   }
       
  1373 }
       
  1374 
       
  1375 /* 
       
  1376 ** This procedure runs in a separate thread, reading messages off of the
       
  1377 ** write queue and processing them one by one.  
       
  1378 **
       
  1379 ** If async.writerHaltNow is true, then this procedure exits
       
  1380 ** after processing a single message.
       
  1381 **
       
  1382 ** If async.writerHaltWhenIdle is true, then this procedure exits when
       
  1383 ** the write queue is empty.
       
  1384 **
       
  1385 ** If both of the above variables are false, this procedure runs
       
  1386 ** indefinately, waiting for operations to be added to the write queue
       
  1387 ** and processing them in the order in which they arrive.
       
  1388 **
       
  1389 ** An artifical delay of async.ioDelay milliseconds is inserted before
       
  1390 ** each write operation in order to simulate the effect of a slow disk.
       
  1391 **
       
  1392 ** Only one instance of this procedure may be running at a time.
       
  1393 */
       
  1394 static void *asyncWriterThread(void *pIsStarted){
       
  1395   sqlite3_vfs *pVfs = (sqlite3_vfs *)(async_vfs.pAppData);
       
  1396   AsyncWrite *p = 0;
       
  1397   int rc = SQLITE_OK;
       
  1398   int holdingMutex = 0;
       
  1399 
       
  1400   if( pthread_mutex_trylock(&async.writerMutex) ){
       
  1401     return 0;
       
  1402   }
       
  1403   (*(int *)pIsStarted) = 1;
       
  1404   while( async.writerHaltNow==0 ){
       
  1405     int doNotFree = 0;
       
  1406     sqlite3_file *pBase = 0;
       
  1407 
       
  1408     if( !holdingMutex ){
       
  1409       pthread_mutex_lock(&async.queueMutex);
       
  1410     }
       
  1411     while( (p = async.pQueueFirst)==0 ){
       
  1412       pthread_cond_broadcast(&async.emptySignal);
       
  1413       if( async.writerHaltWhenIdle ){
       
  1414         pthread_mutex_unlock(&async.queueMutex);
       
  1415         break;
       
  1416       }else{
       
  1417         ASYNC_TRACE(("IDLE\n"));
       
  1418         pthread_cond_wait(&async.queueSignal, &async.queueMutex);
       
  1419         ASYNC_TRACE(("WAKEUP\n"));
       
  1420       }
       
  1421     }
       
  1422     if( p==0 ) break;
       
  1423     holdingMutex = 1;
       
  1424 
       
  1425     /* Right now this thread is holding the mutex on the write-op queue.
       
  1426     ** Variable 'p' points to the first entry in the write-op queue. In
       
  1427     ** the general case, we hold on to the mutex for the entire body of
       
  1428     ** the loop. 
       
  1429     **
       
  1430     ** However in the cases enumerated below, we relinquish the mutex,
       
  1431     ** perform the IO, and then re-request the mutex before removing 'p' from
       
  1432     ** the head of the write-op queue. The idea is to increase concurrency with
       
  1433     ** sqlite threads.
       
  1434     **
       
  1435     **     * An ASYNC_CLOSE operation.
       
  1436     **     * An ASYNC_OPENEXCLUSIVE operation. For this one, we relinquish 
       
  1437     **       the mutex, call the underlying xOpenExclusive() function, then
       
  1438     **       re-aquire the mutex before seting the AsyncFile.pBaseRead 
       
  1439     **       variable.
       
  1440     **     * ASYNC_SYNC and ASYNC_WRITE operations, if 
       
  1441     **       SQLITE_ASYNC_TWO_FILEHANDLES was set at compile time and two
       
  1442     **       file-handles are open for the particular file being "synced".
       
  1443     */
       
  1444     if( async.ioError!=SQLITE_OK && p->op!=ASYNC_CLOSE ){
       
  1445       p->op = ASYNC_NOOP;
       
  1446     }
       
  1447     if( p->pFileData ){
       
  1448       pBase = p->pFileData->pBaseWrite;
       
  1449       if( 
       
  1450         p->op==ASYNC_CLOSE || 
       
  1451         p->op==ASYNC_OPENEXCLUSIVE ||
       
  1452         (pBase->pMethods && (p->op==ASYNC_SYNC || p->op==ASYNC_WRITE) ) 
       
  1453       ){
       
  1454         pthread_mutex_unlock(&async.queueMutex);
       
  1455         holdingMutex = 0;
       
  1456       }
       
  1457       if( !pBase->pMethods ){
       
  1458         pBase = p->pFileData->pBaseRead;
       
  1459       }
       
  1460     }
       
  1461 
       
  1462     switch( p->op ){
       
  1463       case ASYNC_NOOP:
       
  1464         break;
       
  1465 
       
  1466       case ASYNC_WRITE:
       
  1467         assert( pBase );
       
  1468         ASYNC_TRACE(("WRITE %s %d bytes at %d\n",
       
  1469                 p->pFileData->zName, p->nByte, p->iOffset));
       
  1470         rc = pBase->pMethods->xWrite(pBase, (void *)(p->zBuf), p->nByte, p->iOffset);
       
  1471         break;
       
  1472 
       
  1473       case ASYNC_SYNC:
       
  1474         assert( pBase );
       
  1475         ASYNC_TRACE(("SYNC %s\n", p->pFileData->zName));
       
  1476         rc = pBase->pMethods->xSync(pBase, p->nByte);
       
  1477         break;
       
  1478 
       
  1479       case ASYNC_TRUNCATE:
       
  1480         assert( pBase );
       
  1481         ASYNC_TRACE(("TRUNCATE %s to %d bytes\n", 
       
  1482                 p->pFileData->zName, p->iOffset));
       
  1483         rc = pBase->pMethods->xTruncate(pBase, p->iOffset);
       
  1484         break;
       
  1485 
       
  1486       case ASYNC_CLOSE: {
       
  1487         AsyncFileData *pData = p->pFileData;
       
  1488         ASYNC_TRACE(("CLOSE %s\n", p->pFileData->zName));
       
  1489         if( pData->pBaseWrite->pMethods ){
       
  1490           pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
       
  1491         }
       
  1492         if( pData->pBaseRead->pMethods ){
       
  1493           pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
       
  1494         }
       
  1495 
       
  1496         /* Unlink AsyncFileData.lock from the linked list of AsyncFileLock 
       
  1497         ** structures for this file. Obtain the async.lockMutex mutex 
       
  1498         ** before doing so.
       
  1499         */
       
  1500         pthread_mutex_lock(&async.lockMutex);
       
  1501         rc = unlinkAsyncFile(pData);
       
  1502         pthread_mutex_unlock(&async.lockMutex);
       
  1503 
       
  1504         if( !holdingMutex ){
       
  1505           pthread_mutex_lock(&async.queueMutex);
       
  1506           holdingMutex = 1;
       
  1507         }
       
  1508         assert_mutex_is_held(&async.queueMutex);
       
  1509         async.pQueueFirst = p->pNext;
       
  1510         sqlite3_free(pData);
       
  1511         doNotFree = 1;
       
  1512         break;
       
  1513       }
       
  1514 
       
  1515       case ASYNC_UNLOCK: {
       
  1516         AsyncFileData *pData = p->pFileData;
       
  1517         int eLock = p->nByte;
       
  1518         pthread_mutex_lock(&async.lockMutex);
       
  1519         pData->lock.eAsyncLock = MIN(
       
  1520             pData->lock.eAsyncLock, MAX(pData->lock.eLock, eLock)
       
  1521         );
       
  1522         assert(pData->lock.eAsyncLock>=pData->lock.eLock);
       
  1523         rc = getFileLock(pData->pLock);
       
  1524         pthread_mutex_unlock(&async.lockMutex);
       
  1525         break;
       
  1526       }
       
  1527 
       
  1528       case ASYNC_DELETE:
       
  1529         ASYNC_TRACE(("DELETE %s\n", p->zBuf));
       
  1530         rc = pVfs->xDelete(pVfs, p->zBuf, (int)p->iOffset);
       
  1531         break;
       
  1532 
       
  1533       case ASYNC_OPENEXCLUSIVE: {
       
  1534         int flags = (int)p->iOffset;
       
  1535         AsyncFileData *pData = p->pFileData;
       
  1536         ASYNC_TRACE(("OPEN %s flags=%d\n", p->zBuf, (int)p->iOffset));
       
  1537         assert(pData->pBaseRead->pMethods==0 && pData->pBaseWrite->pMethods==0);
       
  1538         rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, 0);
       
  1539         assert( holdingMutex==0 );
       
  1540         pthread_mutex_lock(&async.queueMutex);
       
  1541         holdingMutex = 1;
       
  1542         break;
       
  1543       }
       
  1544 
       
  1545       default: assert(!"Illegal value for AsyncWrite.op");
       
  1546     }
       
  1547 
       
  1548     /* If we didn't hang on to the mutex during the IO op, obtain it now
       
  1549     ** so that the AsyncWrite structure can be safely removed from the 
       
  1550     ** global write-op queue.
       
  1551     */
       
  1552     if( !holdingMutex ){
       
  1553       pthread_mutex_lock(&async.queueMutex);
       
  1554       holdingMutex = 1;
       
  1555     }
       
  1556     /* ASYNC_TRACE(("UNLINK %p\n", p)); */
       
  1557     if( p==async.pQueueLast ){
       
  1558       async.pQueueLast = 0;
       
  1559     }
       
  1560     if( !doNotFree ){
       
  1561       assert_mutex_is_held(&async.queueMutex);
       
  1562       async.pQueueFirst = p->pNext;
       
  1563       sqlite3_free(p);
       
  1564     }
       
  1565     assert( holdingMutex );
       
  1566 
       
  1567     /* An IO error has occured. We cannot report the error back to the
       
  1568     ** connection that requested the I/O since the error happened 
       
  1569     ** asynchronously.  The connection has already moved on.  There 
       
  1570     ** really is nobody to report the error to.
       
  1571     **
       
  1572     ** The file for which the error occured may have been a database or
       
  1573     ** journal file. Regardless, none of the currently queued operations
       
  1574     ** associated with the same database should now be performed. Nor should
       
  1575     ** any subsequently requested IO on either a database or journal file 
       
  1576     ** handle for the same database be accepted until the main database
       
  1577     ** file handle has been closed and reopened.
       
  1578     **
       
  1579     ** Furthermore, no further IO should be queued or performed on any file
       
  1580     ** handle associated with a database that may have been part of a 
       
  1581     ** multi-file transaction that included the database associated with 
       
  1582     ** the IO error (i.e. a database ATTACHed to the same handle at some 
       
  1583     ** point in time).
       
  1584     */
       
  1585     if( rc!=SQLITE_OK ){
       
  1586       async.ioError = rc;
       
  1587     }
       
  1588 
       
  1589     if( async.ioError && !async.pQueueFirst ){
       
  1590       pthread_mutex_lock(&async.lockMutex);
       
  1591       if( 0==async.pLock ){
       
  1592         async.ioError = SQLITE_OK;
       
  1593       }
       
  1594       pthread_mutex_unlock(&async.lockMutex);
       
  1595     }
       
  1596 
       
  1597     /* Drop the queue mutex before continuing to the next write operation
       
  1598     ** in order to give other threads a chance to work with the write queue.
       
  1599     */
       
  1600     if( !async.pQueueFirst || !async.ioError ){
       
  1601       pthread_mutex_unlock(&async.queueMutex);
       
  1602       holdingMutex = 0;
       
  1603       if( async.ioDelay>0 ){
       
  1604         pVfs->xSleep(pVfs, async.ioDelay);
       
  1605       }else{
       
  1606         sched_yield();
       
  1607       }
       
  1608     }
       
  1609   }
       
  1610   
       
  1611   pthread_mutex_unlock(&async.writerMutex);
       
  1612   return 0;
       
  1613 }
       
  1614 
       
  1615 /**************************************************************************
       
  1616 ** The remaining code defines a Tcl interface for testing the asynchronous
       
  1617 ** IO implementation in this file.
       
  1618 **
       
  1619 ** To adapt the code to a non-TCL environment, delete or comment out
       
  1620 ** the code that follows.
       
  1621 */
       
  1622 
       
  1623 /*
       
  1624 ** sqlite3async_enable ?YES/NO?
       
  1625 **
       
  1626 ** Enable or disable the asynchronous I/O backend.  This command is
       
  1627 ** not thread-safe.  Do not call it while any database connections
       
  1628 ** are open.
       
  1629 */
       
  1630 static int testAsyncEnable(
       
  1631   void * clientData,
       
  1632   Tcl_Interp *interp,
       
  1633   int objc,
       
  1634   Tcl_Obj *CONST objv[]
       
  1635 ){
       
  1636   if( objc!=1 && objc!=2 ){
       
  1637     Tcl_WrongNumArgs(interp, 1, objv, "?YES/NO?");
       
  1638     return TCL_ERROR;
       
  1639   }
       
  1640   if( objc==1 ){
       
  1641     Tcl_SetObjResult(interp, Tcl_NewBooleanObj(async_vfs.pAppData!=0));
       
  1642   }else{
       
  1643     int en;
       
  1644     if( Tcl_GetBooleanFromObj(interp, objv[1], &en) ) return TCL_ERROR;
       
  1645     asyncEnable(en);
       
  1646   }
       
  1647   return TCL_OK;
       
  1648 }
       
  1649 
       
  1650 /*
       
  1651 ** sqlite3async_halt  "now"|"idle"|"never"
       
  1652 **
       
  1653 ** Set the conditions at which the writer thread will halt.
       
  1654 */
       
  1655 static int testAsyncHalt(
       
  1656   void * clientData,
       
  1657   Tcl_Interp *interp,
       
  1658   int objc,
       
  1659   Tcl_Obj *CONST objv[]
       
  1660 ){
       
  1661   const char *zCond;
       
  1662   if( objc!=2 ){
       
  1663     Tcl_WrongNumArgs(interp, 1, objv, "\"now\"|\"idle\"|\"never\"");
       
  1664     return TCL_ERROR;
       
  1665   }
       
  1666   zCond = Tcl_GetString(objv[1]);
       
  1667   if( strcmp(zCond, "now")==0 ){
       
  1668     async.writerHaltNow = 1;
       
  1669     pthread_cond_broadcast(&async.queueSignal);
       
  1670   }else if( strcmp(zCond, "idle")==0 ){
       
  1671     async.writerHaltWhenIdle = 1;
       
  1672     async.writerHaltNow = 0;
       
  1673     pthread_cond_broadcast(&async.queueSignal);
       
  1674   }else if( strcmp(zCond, "never")==0 ){
       
  1675     async.writerHaltWhenIdle = 0;
       
  1676     async.writerHaltNow = 0;
       
  1677   }else{
       
  1678     Tcl_AppendResult(interp, 
       
  1679       "should be one of: \"now\", \"idle\", or \"never\"", (char*)0);
       
  1680     return TCL_ERROR;
       
  1681   }
       
  1682   return TCL_OK;
       
  1683 }
       
  1684 
       
  1685 /*
       
  1686 ** sqlite3async_delay ?MS?
       
  1687 **
       
  1688 ** Query or set the number of milliseconds of delay in the writer
       
  1689 ** thread after each write operation.  The default is 0.  By increasing
       
  1690 ** the memory delay we can simulate the effect of slow disk I/O.
       
  1691 */
       
  1692 static int testAsyncDelay(
       
  1693   void * clientData,
       
  1694   Tcl_Interp *interp,
       
  1695   int objc,
       
  1696   Tcl_Obj *CONST objv[]
       
  1697 ){
       
  1698   if( objc!=1 && objc!=2 ){
       
  1699     Tcl_WrongNumArgs(interp, 1, objv, "?MS?");
       
  1700     return TCL_ERROR;
       
  1701   }
       
  1702   if( objc==1 ){
       
  1703     Tcl_SetObjResult(interp, Tcl_NewIntObj(async.ioDelay));
       
  1704   }else{
       
  1705     int ioDelay;
       
  1706     if( Tcl_GetIntFromObj(interp, objv[1], &ioDelay) ) return TCL_ERROR;
       
  1707     async.ioDelay = ioDelay;
       
  1708   }
       
  1709   return TCL_OK;
       
  1710 }
       
  1711 
       
  1712 /*
       
  1713 ** sqlite3async_start
       
  1714 **
       
  1715 ** Start a new writer thread.
       
  1716 */
       
  1717 static int testAsyncStart(
       
  1718   void * clientData,
       
  1719   Tcl_Interp *interp,
       
  1720   int objc,
       
  1721   Tcl_Obj *CONST objv[]
       
  1722 ){
       
  1723   pthread_t x;
       
  1724   int rc;
       
  1725   volatile int isStarted = 0;
       
  1726   rc = pthread_create(&x, 0, asyncWriterThread, (void *)&isStarted);
       
  1727   if( rc ){
       
  1728     Tcl_AppendResult(interp, "failed to create the thread", 0);
       
  1729     return TCL_ERROR;
       
  1730   }
       
  1731   pthread_detach(x);
       
  1732   while( isStarted==0 ){
       
  1733     sched_yield();
       
  1734   }
       
  1735   return TCL_OK;
       
  1736 }
       
  1737 
       
  1738 /*
       
  1739 ** sqlite3async_wait
       
  1740 **
       
  1741 ** Wait for the current writer thread to terminate.
       
  1742 **
       
  1743 ** If the current writer thread is set to run forever then this
       
  1744 ** command would block forever.  To prevent that, an error is returned. 
       
  1745 */
       
  1746 static int testAsyncWait(
       
  1747   void * clientData,
       
  1748   Tcl_Interp *interp,
       
  1749   int objc,
       
  1750   Tcl_Obj *CONST objv[]
       
  1751 ){
       
  1752   int cnt = 10;
       
  1753   if( async.writerHaltNow==0 && async.writerHaltWhenIdle==0 ){
       
  1754     Tcl_AppendResult(interp, "would block forever", (char*)0);
       
  1755     return TCL_ERROR;
       
  1756   }
       
  1757 
       
  1758   while( cnt-- && !pthread_mutex_trylock(&async.writerMutex) ){
       
  1759     pthread_mutex_unlock(&async.writerMutex);
       
  1760     sched_yield();
       
  1761   }
       
  1762   if( cnt>=0 ){
       
  1763     ASYNC_TRACE(("WAIT\n"));
       
  1764     pthread_mutex_lock(&async.queueMutex);
       
  1765     pthread_cond_broadcast(&async.queueSignal);
       
  1766     pthread_mutex_unlock(&async.queueMutex);
       
  1767     pthread_mutex_lock(&async.writerMutex);
       
  1768     pthread_mutex_unlock(&async.writerMutex);
       
  1769   }else{
       
  1770     ASYNC_TRACE(("NO-WAIT\n"));
       
  1771   }
       
  1772   return TCL_OK;
       
  1773 }
       
  1774 
       
  1775 
       
  1776 #endif  /* SQLITE_OS_UNIX and SQLITE_THREADSAFE */
       
  1777 
       
  1778 /*
       
  1779 ** This routine registers the custom TCL commands defined in this
       
  1780 ** module.  This should be the only procedure visible from outside
       
  1781 ** of this module.
       
  1782 */
       
  1783 int Sqlitetestasync_Init(Tcl_Interp *interp){
       
  1784 #if SQLITE_OS_UNIX && SQLITE_THREADSAFE
       
  1785   Tcl_CreateObjCommand(interp,"sqlite3async_enable",testAsyncEnable,0,0);
       
  1786   Tcl_CreateObjCommand(interp,"sqlite3async_halt",testAsyncHalt,0,0);
       
  1787   Tcl_CreateObjCommand(interp,"sqlite3async_delay",testAsyncDelay,0,0);
       
  1788   Tcl_CreateObjCommand(interp,"sqlite3async_start",testAsyncStart,0,0);
       
  1789   Tcl_CreateObjCommand(interp,"sqlite3async_wait",testAsyncWait,0,0);
       
  1790   Tcl_LinkVar(interp, "sqlite3async_trace",
       
  1791       (char*)&sqlite3async_trace, TCL_LINK_INT);
       
  1792 #endif  /* SQLITE_OS_UNIX and SQLITE_THREADSAFE */
       
  1793   return TCL_OK;
       
  1794 }