persistentstorage/sqlite3api/TEST/SRC/test_async.c
changeset 0 08ec8eefde2f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/persistentstorage/sqlite3api/TEST/SRC/test_async.c	Fri Jan 22 11:06:30 2010 +0200
@@ -0,0 +1,1794 @@
+/*
+** 2005 December 14
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+**
+** $Id: test_async.c,v 1.48 2008/09/26 20:02:50 drh Exp $
+**
+** This file contains an example implementation of an asynchronous IO 
+** backend for SQLite.
+**
+** WHAT IS ASYNCHRONOUS I/O?
+**
+** With asynchronous I/O, write requests are handled by a separate thread
+** running in the background.  This means that the thread that initiates
+** a database write does not have to wait for (sometimes slow) disk I/O
+** to occur.  The write seems to happen very quickly, though in reality
+** it is happening at its usual slow pace in the background.
+**
+** Asynchronous I/O appears to give better responsiveness, but at a price.
+** You lose the Durable property.  With the default I/O backend of SQLite,
+** once a write completes, you know that the information you wrote is
+** safely on disk.  With the asynchronous I/O, this is not the case.  If
+** your program crashes or if a power loss occurs after the database
+** write but before the asynchronous write thread has completed, then the
+** database change might never make it to disk and the next user of the
+** database might not see your change.
+**
+** You lose Durability with asynchronous I/O, but you still retain the
+** other parts of ACID:  Atomic,  Consistent, and Isolated.  Many
+** appliations get along fine without the Durablity.
+**
+** HOW IT WORKS
+**
+** Asynchronous I/O works by creating a special SQLite "vfs" structure
+** and registering it with sqlite3_vfs_register(). When files opened via 
+** this vfs are written to (using sqlite3OsWrite()), the data is not 
+** written directly to disk, but is placed in the "write-queue" to be
+** handled by the background thread.
+**
+** When files opened with the asynchronous vfs are read from 
+** (using sqlite3OsRead()), the data is read from the file on 
+** disk and the write-queue, so that from the point of view of
+** the vfs reader the OsWrite() appears to have already completed.
+**
+** The special vfs is registered (and unregistered) by calls to 
+** function asyncEnable() (see below).
+**
+** LIMITATIONS
+**
+** This demonstration code is deliberately kept simple in order to keep
+** the main ideas clear and easy to understand.  Real applications that
+** want to do asynchronous I/O might want to add additional capabilities.
+** For example, in this demonstration if writes are happening at a steady
+** stream that exceeds the I/O capability of the background writer thread,
+** the queue of pending write operations will grow without bound until we
+** run out of memory.  Users of this technique may want to keep track of
+** the quantity of pending writes and stop accepting new write requests
+** when the buffer gets to be too big.
+**
+** LOCKING + CONCURRENCY
+**
+** Multiple connections from within a single process that use this
+** implementation of asynchronous IO may access a single database
+** file concurrently. From the point of view of the user, if all
+** connections are from within a single process, there is no difference
+** between the concurrency offered by "normal" SQLite and SQLite
+** using the asynchronous backend.
+**
+** If connections from within multiple database files may access the
+** database file, the ENABLE_FILE_LOCKING symbol (see below) must be
+** defined. If it is not defined, then no locks are established on 
+** the database file. In this case, if multiple processes access 
+** the database file, corruption will quickly result.
+**
+** If ENABLE_FILE_LOCKING is defined (the default), then connections 
+** from within multiple processes may access a single database file 
+** without risking corruption. However concurrency is reduced as
+** follows:
+**
+**   * When a connection using asynchronous IO begins a database
+**     transaction, the database is locked immediately. However the
+**     lock is not released until after all relevant operations
+**     in the write-queue have been flushed to disk. This means
+**     (for example) that the database may remain locked for some 
+**     time after a "COMMIT" or "ROLLBACK" is issued.
+**
+**   * If an application using asynchronous IO executes transactions
+**     in quick succession, other database users may be effectively
+**     locked out of the database. This is because when a BEGIN
+**     is executed, a database lock is established immediately. But
+**     when the corresponding COMMIT or ROLLBACK occurs, the lock
+**     is not released until the relevant part of the write-queue 
+**     has been flushed through. As a result, if a COMMIT is followed
+**     by a BEGIN before the write-queue is flushed through, the database 
+**     is never unlocked,preventing other processes from accessing 
+**     the database.
+**
+** Defining ENABLE_FILE_LOCKING when using an NFS or other remote 
+** file-system may slow things down, as synchronous round-trips to the 
+** server may be required to establish database file locks.
+*/
+#define ENABLE_FILE_LOCKING
+
+#ifndef SQLITE_AMALGAMATION
+# include "sqlite3.h"
+# include <assert.h>
+# include <string.h>
+#endif
+#include "tcl.h"
+
+/*
+** This test uses pthreads and hence only works on unix and with
+** a threadsafe build of SQLite.
+*/
+#if SQLITE_OS_UNIX && SQLITE_THREADSAFE
+
+/*
+** This demo uses pthreads.  If you do not have a pthreads implementation
+** for your operating system, you will need to recode the threading 
+** logic.
+*/
+#include <pthread.h>
+#include <sched.h>
+
+/* Useful macros used in several places */
+#define MIN(x,y) ((x)<(y)?(x):(y))
+#define MAX(x,y) ((x)>(y)?(x):(y))
+
+/* Forward references */
+typedef struct AsyncWrite AsyncWrite;
+typedef struct AsyncFile AsyncFile;
+typedef struct AsyncFileData AsyncFileData;
+typedef struct AsyncFileLock AsyncFileLock;
+typedef struct AsyncLock AsyncLock;
+
+/* Enable for debugging */
+static int sqlite3async_trace = 0;
+# define ASYNC_TRACE(X) if( sqlite3async_trace ) asyncTrace X
+static void asyncTrace(const char *zFormat, ...){
+  char *z;
+  va_list ap;
+  va_start(ap, zFormat);
+  z = sqlite3_vmprintf(zFormat, ap);
+  va_end(ap);
+  fprintf(stderr, "[%d] %s", (int)pthread_self(), z);
+  sqlite3_free(z);
+}
+
+/*
+** THREAD SAFETY NOTES
+**
+** Basic rules:
+**
+**     * Both read and write access to the global write-op queue must be 
+**       protected by the async.queueMutex. As are the async.ioError and
+**       async.nFile variables.
+**
+**     * The async.pLock list and all AsyncLock and AsyncFileLock
+**       structures must be protected by the async.lockMutex mutex.
+**
+**     * The file handles from the underlying system are not assumed to 
+**       be thread safe.
+**
+**     * See the last two paragraphs under "The Writer Thread" for
+**       an assumption to do with file-handle synchronization by the Os.
+**
+** Deadlock prevention:
+**
+**     There are three mutex used by the system: the "writer" mutex, 
+**     the "queue" mutex and the "lock" mutex. Rules are:
+**
+**     * It is illegal to block on the writer mutex when any other mutex
+**       are held, and 
+**
+**     * It is illegal to block on the queue mutex when the lock mutex
+**       is held.
+**
+**     i.e. mutex's must be grabbed in the order "writer", "queue", "lock".
+**
+** File system operations (invoked by SQLite thread):
+**
+**     xOpen
+**     xDelete
+**     xFileExists
+**
+** File handle operations (invoked by SQLite thread):
+**
+**         asyncWrite, asyncClose, asyncTruncate, asyncSync 
+**    
+**     The operations above add an entry to the global write-op list. They
+**     prepare the entry, acquire the async.queueMutex momentarily while
+**     list pointers are  manipulated to insert the new entry, then release
+**     the mutex and signal the writer thread to wake up in case it happens
+**     to be asleep.
+**
+**    
+**         asyncRead, asyncFileSize.
+**
+**     Read operations. Both of these read from both the underlying file
+**     first then adjust their result based on pending writes in the 
+**     write-op queue.   So async.queueMutex is held for the duration
+**     of these operations to prevent other threads from changing the
+**     queue in mid operation.
+**    
+**
+**         asyncLock, asyncUnlock, asyncCheckReservedLock
+**    
+**     These primitives implement in-process locking using a hash table
+**     on the file name.  Files are locked correctly for connections coming
+**     from the same process.  But other processes cannot see these locks
+**     and will therefore not honor them.
+**
+**
+** The writer thread:
+**
+**     The async.writerMutex is used to make sure only there is only
+**     a single writer thread running at a time.
+**
+**     Inside the writer thread is a loop that works like this:
+**
+**         WHILE (write-op list is not empty)
+**             Do IO operation at head of write-op list
+**             Remove entry from head of write-op list
+**         END WHILE
+**
+**     The async.queueMutex is always held during the <write-op list is 
+**     not empty> test, and when the entry is removed from the head
+**     of the write-op list. Sometimes it is held for the interim
+**     period (while the IO is performed), and sometimes it is
+**     relinquished. It is relinquished if (a) the IO op is an
+**     ASYNC_CLOSE or (b) when the file handle was opened, two of
+**     the underlying systems handles were opened on the same
+**     file-system entry.
+**
+**     If condition (b) above is true, then one file-handle 
+**     (AsyncFile.pBaseRead) is used exclusively by sqlite threads to read the
+**     file, the other (AsyncFile.pBaseWrite) by sqlite3_async_flush() 
+**     threads to perform write() operations. This means that read 
+**     operations are not blocked by asynchronous writes (although 
+**     asynchronous writes may still be blocked by reads).
+**
+**     This assumes that the OS keeps two handles open on the same file
+**     properly in sync. That is, any read operation that starts after a
+**     write operation on the same file system entry has completed returns
+**     data consistent with the write. We also assume that if one thread 
+**     reads a file while another is writing it all bytes other than the
+**     ones actually being written contain valid data.
+**
+**     If the above assumptions are not true, set the preprocessor symbol
+**     SQLITE_ASYNC_TWO_FILEHANDLES to 0.
+*/
+
+#ifndef SQLITE_ASYNC_TWO_FILEHANDLES
+/* #define SQLITE_ASYNC_TWO_FILEHANDLES 0 */
+#define SQLITE_ASYNC_TWO_FILEHANDLES 1
+#endif
+
+/*
+** State information is held in the static variable "async" defined
+** as the following structure.
+**
+** Both async.ioError and async.nFile are protected by async.queueMutex.
+*/
+static struct TestAsyncStaticData {
+  pthread_mutex_t lockMutex;   /* For access to aLock hash table */
+  pthread_mutex_t queueMutex;  /* Mutex for access to write operation queue */
+  pthread_mutex_t writerMutex; /* Prevents multiple writer threads */
+  pthread_cond_t queueSignal;  /* For waking up sleeping writer thread */
+  pthread_cond_t emptySignal;  /* Notify when the write queue is empty */
+  AsyncWrite *pQueueFirst;     /* Next write operation to be processed */
+  AsyncWrite *pQueueLast;      /* Last write operation on the list */
+  AsyncLock *pLock;            /* Linked list of all AsyncLock structures */
+  volatile int ioDelay;             /* Extra delay between write operations */
+  volatile int writerHaltWhenIdle;  /* Writer thread halts when queue empty */
+  volatile int writerHaltNow;       /* Writer thread halts after next op */
+  int ioError;                 /* True if an IO error has occured */
+  int nFile;                   /* Number of open files (from sqlite pov) */
+} async = {
+  PTHREAD_MUTEX_INITIALIZER,
+  PTHREAD_MUTEX_INITIALIZER,
+  PTHREAD_MUTEX_INITIALIZER,
+  PTHREAD_COND_INITIALIZER,
+  PTHREAD_COND_INITIALIZER,
+};
+
+/* Possible values of AsyncWrite.op */
+#define ASYNC_NOOP          0
+#define ASYNC_WRITE         1
+#define ASYNC_SYNC          2
+#define ASYNC_TRUNCATE      3
+#define ASYNC_CLOSE         4
+#define ASYNC_DELETE        5
+#define ASYNC_OPENEXCLUSIVE 6
+#define ASYNC_UNLOCK        7
+
+/* Names of opcodes.  Used for debugging only.
+** Make sure these stay in sync with the macros above!
+*/
+static const char *azOpcodeName[] = {
+  "NOOP", "WRITE", "SYNC", "TRUNCATE", "CLOSE", "DELETE", "OPENEX", "UNLOCK"
+};
+
+/*
+** Entries on the write-op queue are instances of the AsyncWrite
+** structure, defined here.
+**
+** The interpretation of the iOffset and nByte variables varies depending 
+** on the value of AsyncWrite.op:
+**
+** ASYNC_NOOP:
+**     No values used.
+**
+** ASYNC_WRITE:
+**     iOffset -> Offset in file to write to.
+**     nByte   -> Number of bytes of data to write (pointed to by zBuf).
+**
+** ASYNC_SYNC:
+**     nByte   -> flags to pass to sqlite3OsSync().
+**
+** ASYNC_TRUNCATE:
+**     iOffset -> Size to truncate file to.
+**     nByte   -> Unused.
+**
+** ASYNC_CLOSE:
+**     iOffset -> Unused.
+**     nByte   -> Unused.
+**
+** ASYNC_DELETE:
+**     iOffset -> Contains the "syncDir" flag.
+**     nByte   -> Number of bytes of zBuf points to (file name).
+**
+** ASYNC_OPENEXCLUSIVE:
+**     iOffset -> Value of "delflag".
+**     nByte   -> Number of bytes of zBuf points to (file name).
+**
+** ASYNC_UNLOCK:
+**     nByte   -> Argument to sqlite3OsUnlock().
+**
+**
+** For an ASYNC_WRITE operation, zBuf points to the data to write to the file. 
+** This space is sqlite3_malloc()d along with the AsyncWrite structure in a
+** single blob, so is deleted when sqlite3_free() is called on the parent 
+** structure.
+*/
+struct AsyncWrite {
+  AsyncFileData *pFileData;    /* File to write data to or sync */
+  int op;                      /* One of ASYNC_xxx etc. */
+  sqlite_int64 iOffset;        /* See above */
+  int nByte;          /* See above */
+  char *zBuf;         /* Data to write to file (or NULL if op!=ASYNC_WRITE) */
+  AsyncWrite *pNext;  /* Next write operation (to any file) */
+};
+
+/*
+** An instance of this structure is created for each distinct open file 
+** (i.e. if two handles are opened on the one file, only one of these
+** structures is allocated) and stored in the async.aLock hash table. The
+** keys for async.aLock are the full pathnames of the opened files.
+**
+** AsyncLock.pList points to the head of a linked list of AsyncFileLock
+** structures, one for each handle currently open on the file.
+**
+** If the opened file is not a main-database (the SQLITE_OPEN_MAIN_DB is
+** not passed to the sqlite3OsOpen() call), or if ENABLE_FILE_LOCKING is 
+** not defined at compile time, variables AsyncLock.pFile and 
+** AsyncLock.eLock are never used. Otherwise, pFile is a file handle
+** opened on the file in question and used to obtain the file-system 
+** locks required by database connections within this process.
+**
+** See comments above the asyncLock() function for more details on 
+** the implementation of database locking used by this backend.
+*/
+struct AsyncLock {
+  char *zFile;
+  int nFile;
+  sqlite3_file *pFile;
+  int eLock;
+  AsyncFileLock *pList;
+  AsyncLock *pNext;           /* Next in linked list headed by async.pLock */
+};
+
+/*
+** An instance of the following structure is allocated along with each
+** AsyncFileData structure (see AsyncFileData.lock), but is only used if the
+** file was opened with the SQLITE_OPEN_MAIN_DB.
+*/
+struct AsyncFileLock {
+  int eLock;                /* Internally visible lock state (sqlite pov) */
+  int eAsyncLock;           /* Lock-state with write-queue unlock */
+  AsyncFileLock *pNext;
+};
+
+/* 
+** The AsyncFile structure is a subclass of sqlite3_file used for 
+** asynchronous IO. 
+**
+** All of the actual data for the structure is stored in the structure
+** pointed to by AsyncFile.pData, which is allocated as part of the
+** sqlite3OsOpen() using sqlite3_malloc(). The reason for this is that the
+** lifetime of the AsyncFile structure is ended by the caller after OsClose()
+** is called, but the data in AsyncFileData may be required by the
+** writer thread after that point.
+*/
+struct AsyncFile {
+  sqlite3_io_methods *pMethod;
+  AsyncFileData *pData;
+};
+struct AsyncFileData {
+  char *zName;               /* Underlying OS filename - used for debugging */
+  int nName;                 /* Number of characters in zName */
+  sqlite3_file *pBaseRead;   /* Read handle to the underlying Os file */
+  sqlite3_file *pBaseWrite;  /* Write handle to the underlying Os file */
+  AsyncFileLock lock;        /* Lock state for this handle */
+  AsyncLock *pLock;          /* AsyncLock object for this file system entry */
+  AsyncWrite close;
+};
+
+/*
+** The following async_XXX functions are debugging wrappers around the
+** corresponding pthread_XXX functions:
+**
+**     pthread_mutex_lock();
+**     pthread_mutex_unlock();
+**     pthread_mutex_trylock();
+**     pthread_cond_wait();
+**
+** It is illegal to pass any mutex other than those stored in the
+** following global variables of these functions.
+**
+**     async.queueMutex
+**     async.writerMutex
+**     async.lockMutex
+**
+** If NDEBUG is defined, these wrappers do nothing except call the 
+** corresponding pthreads function. If NDEBUG is not defined, then the
+** following variables are used to store the thread-id (as returned
+** by pthread_self()) currently holding the mutex, or 0 otherwise:
+**
+**     asyncdebug.queueMutexHolder
+**     asyncdebug.writerMutexHolder
+**     asyncdebug.lockMutexHolder
+**
+** These variables are used by some assert() statements that verify
+** the statements made in the "Deadlock Prevention" notes earlier
+** in this file.
+*/
+#ifndef NDEBUG
+
+static struct TestAsyncDebugData {
+  pthread_t lockMutexHolder;
+  pthread_t queueMutexHolder;
+  pthread_t writerMutexHolder;
+} asyncdebug = {0, 0, 0};
+
+/*
+** Wrapper around pthread_mutex_lock(). Checks that we have not violated
+** the anti-deadlock rules (see "Deadlock prevention" above).
+*/
+static int async_mutex_lock(pthread_mutex_t *pMutex){
+  int iIdx;
+  int rc;
+  pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async);
+  pthread_t *aHolder = (pthread_t *)(&asyncdebug);
+
+  /* The code in this 'ifndef NDEBUG' block depends on a certain alignment
+   * of the variables in TestAsyncStaticData and TestAsyncDebugData. The
+   * following assert() statements check that this has not been changed.
+   *
+   * Really, these only need to be run once at startup time.
+   */
+  assert(&(aMutex[0])==&async.lockMutex);
+  assert(&(aMutex[1])==&async.queueMutex);
+  assert(&(aMutex[2])==&async.writerMutex);
+  assert(&(aHolder[0])==&asyncdebug.lockMutexHolder);
+  assert(&(aHolder[1])==&asyncdebug.queueMutexHolder);
+  assert(&(aHolder[2])==&asyncdebug.writerMutexHolder);
+
+  assert( pthread_self()!=0 );
+
+  for(iIdx=0; iIdx<3; iIdx++){
+    if( pMutex==&aMutex[iIdx] ) break;
+
+    /* This is the key assert(). Here we are checking that if the caller
+     * is trying to block on async.writerMutex, neither of the other two
+     * mutex are held. If the caller is trying to block on async.queueMutex,
+     * lockMutex is not held.
+     */
+    assert(!pthread_equal(aHolder[iIdx], pthread_self()));
+  }
+  assert(iIdx<3);
+
+  rc = pthread_mutex_lock(pMutex);
+  if( rc==0 ){
+    assert(aHolder[iIdx]==0);
+    aHolder[iIdx] = pthread_self();
+  }
+  return rc;
+}
+
+/*
+** Wrapper around pthread_mutex_unlock().
+*/
+static int async_mutex_unlock(pthread_mutex_t *pMutex){
+  int iIdx;
+  int rc;
+  pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async);
+  pthread_t *aHolder = (pthread_t *)(&asyncdebug);
+
+  for(iIdx=0; iIdx<3; iIdx++){
+    if( pMutex==&aMutex[iIdx] ) break;
+  }
+  assert(iIdx<3);
+
+  assert(pthread_equal(aHolder[iIdx], pthread_self()));
+  aHolder[iIdx] = 0;
+  rc = pthread_mutex_unlock(pMutex);
+  assert(rc==0);
+
+  return 0;
+}
+
+/*
+** Wrapper around pthread_mutex_trylock().
+*/
+static int async_mutex_trylock(pthread_mutex_t *pMutex){
+  int iIdx;
+  int rc;
+  pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async);
+  pthread_t *aHolder = (pthread_t *)(&asyncdebug);
+
+  for(iIdx=0; iIdx<3; iIdx++){
+    if( pMutex==&aMutex[iIdx] ) break;
+  }
+  assert(iIdx<3);
+
+  rc = pthread_mutex_trylock(pMutex);
+  if( rc==0 ){
+    assert(aHolder[iIdx]==0);
+    aHolder[iIdx] = pthread_self();
+  }
+  return rc;
+}
+
+/*
+** Wrapper around pthread_cond_wait().
+*/
+static int async_cond_wait(pthread_cond_t *pCond, pthread_mutex_t *pMutex){
+  int iIdx;
+  int rc;
+  pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async);
+  pthread_t *aHolder = (pthread_t *)(&asyncdebug);
+
+  for(iIdx=0; iIdx<3; iIdx++){
+    if( pMutex==&aMutex[iIdx] ) break;
+  }
+  assert(iIdx<3);
+
+  assert(pthread_equal(aHolder[iIdx],pthread_self()));
+  aHolder[iIdx] = 0;
+  rc = pthread_cond_wait(pCond, pMutex);
+  if( rc==0 ){
+    aHolder[iIdx] = pthread_self();
+  }
+  return rc;
+}
+
+/*
+** Assert that the mutex is held by the current thread.
+*/
+static void assert_mutex_is_held(pthread_mutex_t *pMutex){
+  int iIdx;
+  pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async);
+  pthread_t *aHolder = (pthread_t *)(&asyncdebug);
+
+  for(iIdx=0; iIdx<3; iIdx++){
+    if( pMutex==&aMutex[iIdx] ) break;
+  }
+  assert(iIdx<3);
+  assert( aHolder[iIdx]==pthread_self() );
+}
+
+/* Call our async_XX wrappers instead of selected pthread_XX functions */
+#define pthread_mutex_lock    async_mutex_lock
+#define pthread_mutex_unlock  async_mutex_unlock
+#define pthread_mutex_trylock async_mutex_trylock
+#define pthread_cond_wait     async_cond_wait
+
+#else    /* if defined(NDEBUG) */
+
+#define assert_mutex_is_held(X)    /* A no-op when not debugging */
+
+#endif   /* !defined(NDEBUG) */
+
+/*
+** Add an entry to the end of the global write-op list. pWrite should point 
+** to an AsyncWrite structure allocated using sqlite3_malloc().  The writer
+** thread will call sqlite3_free() to free the structure after the specified
+** operation has been completed.
+**
+** Once an AsyncWrite structure has been added to the list, it becomes the
+** property of the writer thread and must not be read or modified by the
+** caller.  
+*/
+static void addAsyncWrite(AsyncWrite *pWrite){
+  /* We must hold the queue mutex in order to modify the queue pointers */
+  pthread_mutex_lock(&async.queueMutex);
+
+  /* Add the record to the end of the write-op queue */
+  assert( !pWrite->pNext );
+  if( async.pQueueLast ){
+    assert( async.pQueueFirst );
+    async.pQueueLast->pNext = pWrite;
+  }else{
+    async.pQueueFirst = pWrite;
+  }
+  async.pQueueLast = pWrite;
+  ASYNC_TRACE(("PUSH %p (%s %s %d)\n", pWrite, azOpcodeName[pWrite->op],
+         pWrite->pFileData ? pWrite->pFileData->zName : "-", pWrite->iOffset));
+
+  if( pWrite->op==ASYNC_CLOSE ){
+    async.nFile--;
+  }
+
+  /* Drop the queue mutex */
+  pthread_mutex_unlock(&async.queueMutex);
+
+  /* The writer thread might have been idle because there was nothing
+  ** on the write-op queue for it to do.  So wake it up. */
+  pthread_cond_signal(&async.queueSignal);
+}
+
+/*
+** Increment async.nFile in a thread-safe manner.
+*/
+static void incrOpenFileCount(){
+  /* We must hold the queue mutex in order to modify async.nFile */
+  pthread_mutex_lock(&async.queueMutex);
+  if( async.nFile==0 ){
+    async.ioError = SQLITE_OK;
+  }
+  async.nFile++;
+  pthread_mutex_unlock(&async.queueMutex);
+}
+
+/*
+** This is a utility function to allocate and populate a new AsyncWrite
+** structure and insert it (via addAsyncWrite() ) into the global list.
+*/
+static int addNewAsyncWrite(
+  AsyncFileData *pFileData, 
+  int op, 
+  sqlite3_int64 iOffset, 
+  int nByte,
+  const char *zByte
+){
+  AsyncWrite *p;
+  if( op!=ASYNC_CLOSE && async.ioError ){
+    return async.ioError;
+  }
+  p = sqlite3_malloc(sizeof(AsyncWrite) + (zByte?nByte:0));
+  if( !p ){
+    /* The upper layer does not expect operations like OsWrite() to
+    ** return SQLITE_NOMEM. This is partly because under normal conditions
+    ** SQLite is required to do rollback without calling malloc(). So
+    ** if malloc() fails here, treat it as an I/O error. The above
+    ** layer knows how to handle that.
+    */
+    return SQLITE_IOERR;
+  }
+  p->op = op;
+  p->iOffset = iOffset;
+  p->nByte = nByte;
+  p->pFileData = pFileData;
+  p->pNext = 0;
+  if( zByte ){
+    p->zBuf = (char *)&p[1];
+    memcpy(p->zBuf, zByte, nByte);
+  }else{
+    p->zBuf = 0;
+  }
+  addAsyncWrite(p);
+  return SQLITE_OK;
+}
+
+/*
+** Close the file. This just adds an entry to the write-op list, the file is
+** not actually closed.
+*/
+static int asyncClose(sqlite3_file *pFile){
+  AsyncFileData *p = ((AsyncFile *)pFile)->pData;
+
+  /* Unlock the file, if it is locked */
+  pthread_mutex_lock(&async.lockMutex);
+  p->lock.eLock = 0;
+  pthread_mutex_unlock(&async.lockMutex);
+
+  addAsyncWrite(&p->close);
+  return SQLITE_OK;
+}
+
+/*
+** Implementation of sqlite3OsWrite() for asynchronous files. Instead of 
+** writing to the underlying file, this function adds an entry to the end of
+** the global AsyncWrite list. Either SQLITE_OK or SQLITE_NOMEM may be
+** returned.
+*/
+static int asyncWrite(
+  sqlite3_file *pFile, 
+  const void *pBuf, 
+  int amt, 
+  sqlite3_int64 iOff
+){
+  AsyncFileData *p = ((AsyncFile *)pFile)->pData;
+  return addNewAsyncWrite(p, ASYNC_WRITE, iOff, amt, pBuf);
+}
+
+/*
+** Read data from the file. First we read from the filesystem, then adjust 
+** the contents of the buffer based on ASYNC_WRITE operations in the 
+** write-op queue.
+**
+** This method holds the mutex from start to finish.
+*/
+static int asyncRead(
+  sqlite3_file *pFile, 
+  void *zOut, 
+  int iAmt, 
+  sqlite3_int64 iOffset
+){
+  AsyncFileData *p = ((AsyncFile *)pFile)->pData;
+  int rc = SQLITE_OK;
+  sqlite3_int64 filesize;
+  int nRead;
+  sqlite3_file *pBase = p->pBaseRead;
+
+  /* Grab the write queue mutex for the duration of the call */
+  pthread_mutex_lock(&async.queueMutex);
+
+  /* If an I/O error has previously occurred in this virtual file 
+  ** system, then all subsequent operations fail.
+  */
+  if( async.ioError!=SQLITE_OK ){
+    rc = async.ioError;
+    goto asyncread_out;
+  }
+
+  if( pBase->pMethods ){
+    rc = pBase->pMethods->xFileSize(pBase, &filesize);
+    if( rc!=SQLITE_OK ){
+      goto asyncread_out;
+    }
+    nRead = MIN(filesize - iOffset, iAmt);
+    if( nRead>0 ){
+      rc = pBase->pMethods->xRead(pBase, zOut, nRead, iOffset);
+      ASYNC_TRACE(("READ %s %d bytes at %d\n", p->zName, nRead, iOffset));
+    }
+  }
+
+  if( rc==SQLITE_OK ){
+    AsyncWrite *pWrite;
+    char *zName = p->zName;
+
+    for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
+      if( pWrite->op==ASYNC_WRITE && (
+        (pWrite->pFileData==p) ||
+        (zName && pWrite->pFileData->zName==zName)
+      )){
+        int iBeginOut = (pWrite->iOffset-iOffset);
+        int iBeginIn = -iBeginOut;
+        int nCopy;
+
+        if( iBeginIn<0 ) iBeginIn = 0;
+        if( iBeginOut<0 ) iBeginOut = 0;
+        nCopy = MIN(pWrite->nByte-iBeginIn, iAmt-iBeginOut);
+
+        if( nCopy>0 ){
+          memcpy(&((char *)zOut)[iBeginOut], &pWrite->zBuf[iBeginIn], nCopy);
+          ASYNC_TRACE(("OVERREAD %d bytes at %d\n", nCopy, iBeginOut+iOffset));
+        }
+      }
+    }
+  }
+
+asyncread_out:
+  pthread_mutex_unlock(&async.queueMutex);
+  return rc;
+}
+
+/*
+** Truncate the file to nByte bytes in length. This just adds an entry to 
+** the write-op list, no IO actually takes place.
+*/
+static int asyncTruncate(sqlite3_file *pFile, sqlite3_int64 nByte){
+  AsyncFileData *p = ((AsyncFile *)pFile)->pData;
+  return addNewAsyncWrite(p, ASYNC_TRUNCATE, nByte, 0, 0);
+}
+
+/*
+** Sync the file. This just adds an entry to the write-op list, the 
+** sync() is done later by sqlite3_async_flush().
+*/
+static int asyncSync(sqlite3_file *pFile, int flags){
+  AsyncFileData *p = ((AsyncFile *)pFile)->pData;
+  return addNewAsyncWrite(p, ASYNC_SYNC, 0, flags, 0);
+}
+
+/*
+** Read the size of the file. First we read the size of the file system 
+** entry, then adjust for any ASYNC_WRITE or ASYNC_TRUNCATE operations 
+** currently in the write-op list. 
+**
+** This method holds the mutex from start to finish.
+*/
+int asyncFileSize(sqlite3_file *pFile, sqlite3_int64 *piSize){
+  AsyncFileData *p = ((AsyncFile *)pFile)->pData;
+  int rc = SQLITE_OK;
+  sqlite3_int64 s = 0;
+  sqlite3_file *pBase;
+
+  pthread_mutex_lock(&async.queueMutex);
+
+  /* Read the filesystem size from the base file. If pBaseRead is NULL, this
+  ** means the file hasn't been opened yet. In this case all relevant data 
+  ** must be in the write-op queue anyway, so we can omit reading from the
+  ** file-system.
+  */
+  pBase = p->pBaseRead;
+  if( pBase->pMethods ){
+    rc = pBase->pMethods->xFileSize(pBase, &s);
+  }
+
+  if( rc==SQLITE_OK ){
+    AsyncWrite *pWrite;
+    for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){
+      if( pWrite->op==ASYNC_DELETE 
+       && p->zName 
+       && strcmp(p->zName, pWrite->zBuf)==0 
+      ){
+        s = 0;
+      }else if( pWrite->pFileData && (
+          (pWrite->pFileData==p) 
+       || (p->zName && pWrite->pFileData->zName==p->zName) 
+      )){
+        switch( pWrite->op ){
+          case ASYNC_WRITE:
+            s = MAX(pWrite->iOffset + (sqlite3_int64)(pWrite->nByte), s);
+            break;
+          case ASYNC_TRUNCATE:
+            s = MIN(s, pWrite->iOffset);
+            break;
+        }
+      }
+    }
+    *piSize = s;
+  }
+  pthread_mutex_unlock(&async.queueMutex);
+  return rc;
+}
+
+/*
+** Lock or unlock the actual file-system entry.
+*/
+static int getFileLock(AsyncLock *pLock){
+  int rc = SQLITE_OK;
+  AsyncFileLock *pIter;
+  int eRequired = 0;
+
+  if( pLock->pFile ){
+    for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
+      assert(pIter->eAsyncLock>=pIter->eLock);
+      if( pIter->eAsyncLock>eRequired ){
+        eRequired = pIter->eAsyncLock;
+        assert(eRequired>=0 && eRequired<=SQLITE_LOCK_EXCLUSIVE);
+      }
+    }
+
+    if( eRequired>pLock->eLock ){
+      rc = pLock->pFile->pMethods->xLock(pLock->pFile, eRequired);
+      if( rc==SQLITE_OK ){
+        pLock->eLock = eRequired;
+      }
+    }
+    else if( eRequired<pLock->eLock && eRequired<=SQLITE_LOCK_SHARED ){
+      rc = pLock->pFile->pMethods->xUnlock(pLock->pFile, eRequired);
+      if( rc==SQLITE_OK ){
+        pLock->eLock = eRequired;
+      }
+    }
+  }
+
+  return rc;
+}
+
+/*
+** Return the AsyncLock structure from the global async.pLock list 
+** associated with the file-system entry identified by path zName 
+** (a string of nName bytes). If no such structure exists, return 0.
+*/
+static AsyncLock *findLock(const char *zName, int nName){
+  AsyncLock *p = async.pLock;
+  while( p && (p->nFile!=nName || memcmp(p->zFile, zName, nName)) ){
+    p = p->pNext;
+  }
+  return p;
+}
+
+/*
+** The following two methods - asyncLock() and asyncUnlock() - are used
+** to obtain and release locks on database files opened with the
+** asynchronous backend.
+*/
+static int asyncLock(sqlite3_file *pFile, int eLock){
+  int rc = SQLITE_OK;
+  AsyncFileData *p = ((AsyncFile *)pFile)->pData;
+
+  if( p->zName ){
+    pthread_mutex_lock(&async.lockMutex);
+    if( p->lock.eLock<eLock ){
+      AsyncLock *pLock = p->pLock;
+      AsyncFileLock *pIter;
+      assert(pLock && pLock->pList);
+      for(pIter=pLock->pList; pIter; pIter=pIter->pNext){
+        if( pIter!=&p->lock && (
+          (eLock==SQLITE_LOCK_EXCLUSIVE && pIter->eLock>=SQLITE_LOCK_SHARED) ||
+          (eLock==SQLITE_LOCK_PENDING && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
+          (eLock==SQLITE_LOCK_RESERVED && pIter->eLock>=SQLITE_LOCK_RESERVED) ||
+          (eLock==SQLITE_LOCK_SHARED && pIter->eLock>=SQLITE_LOCK_PENDING)
+        )){
+          rc = SQLITE_BUSY;
+        }
+      }
+      if( rc==SQLITE_OK ){
+        p->lock.eLock = eLock;
+        p->lock.eAsyncLock = MAX(p->lock.eAsyncLock, eLock);
+      }
+      assert(p->lock.eAsyncLock>=p->lock.eLock);
+      if( rc==SQLITE_OK ){
+        rc = getFileLock(pLock);
+      }
+    }
+    pthread_mutex_unlock(&async.lockMutex);
+  }
+
+  ASYNC_TRACE(("LOCK %d (%s) rc=%d\n", eLock, p->zName, rc));
+  return rc;
+}
+static int asyncUnlock(sqlite3_file *pFile, int eLock){
+  int rc = SQLITE_OK;
+  AsyncFileData *p = ((AsyncFile *)pFile)->pData;
+  if( p->zName ){
+    AsyncFileLock *pLock = &p->lock;
+    pthread_mutex_lock(&async.lockMutex);
+    pLock->eLock = MIN(pLock->eLock, eLock);
+    pthread_mutex_unlock(&async.lockMutex);
+    rc = addNewAsyncWrite(p, ASYNC_UNLOCK, 0, eLock, 0);
+  }
+  return rc;
+}
+
+/*
+** This function is called when the pager layer first opens a database file
+** and is checking for a hot-journal.
+*/
+static int asyncCheckReservedLock(sqlite3_file *pFile, int *pResOut){
+  int ret = 0;
+  AsyncFileLock *pIter;
+  AsyncFileData *p = ((AsyncFile *)pFile)->pData;
+
+  pthread_mutex_lock(&async.lockMutex);
+  for(pIter=p->pLock->pList; pIter; pIter=pIter->pNext){
+    if( pIter->eLock>=SQLITE_LOCK_RESERVED ){
+      ret = 1;
+    }
+  }
+  pthread_mutex_unlock(&async.lockMutex);
+
+  ASYNC_TRACE(("CHECK-LOCK %d (%s)\n", ret, p->zName));
+  *pResOut = ret;
+  return SQLITE_OK;
+}
+
+/* 
+** sqlite3_file_control() implementation.
+*/
+static int asyncFileControl(sqlite3_file *id, int op, void *pArg){
+  switch( op ){
+    case SQLITE_FCNTL_LOCKSTATE: {
+      pthread_mutex_lock(&async.lockMutex);
+      *(int*)pArg = ((AsyncFile*)id)->pData->lock.eLock;
+      pthread_mutex_unlock(&async.lockMutex);
+      return SQLITE_OK;
+    }
+  }
+  return SQLITE_ERROR;
+}
+
+/* 
+** Return the device characteristics and sector-size of the device. It
+** is not tricky to implement these correctly, as this backend might 
+** not have an open file handle at this point.
+*/
+static int asyncSectorSize(sqlite3_file *pFile){
+  return 512;
+}
+static int asyncDeviceCharacteristics(sqlite3_file *pFile){
+  return 0;
+}
+
+static int unlinkAsyncFile(AsyncFileData *pData){
+  AsyncFileLock **ppIter;
+  int rc = SQLITE_OK;
+
+  if( pData->zName ){
+    AsyncLock *pLock = pData->pLock;
+    for(ppIter=&pLock->pList; *ppIter; ppIter=&((*ppIter)->pNext)){
+      if( (*ppIter)==&pData->lock ){
+        *ppIter = pData->lock.pNext;
+        break;
+      }
+    }
+    if( !pLock->pList ){
+      AsyncLock **pp;
+      if( pLock->pFile ){
+        pLock->pFile->pMethods->xClose(pLock->pFile);
+      }
+      for(pp=&async.pLock; *pp!=pLock; pp=&((*pp)->pNext));
+      *pp = pLock->pNext;
+      sqlite3_free(pLock);
+    }else{
+      rc = getFileLock(pLock);
+    }
+  }
+
+  return rc;
+}
+
+/*
+** Open a file.
+*/
+static int asyncOpen(
+  sqlite3_vfs *pAsyncVfs,
+  const char *zName,
+  sqlite3_file *pFile,
+  int flags,
+  int *pOutFlags
+){
+  static sqlite3_io_methods async_methods = {
+    1,                               /* iVersion */
+    asyncClose,                      /* xClose */
+    asyncRead,                       /* xRead */
+    asyncWrite,                      /* xWrite */
+    asyncTruncate,                   /* xTruncate */
+    asyncSync,                       /* xSync */
+    asyncFileSize,                   /* xFileSize */
+    asyncLock,                       /* xLock */
+    asyncUnlock,                     /* xUnlock */
+    asyncCheckReservedLock,          /* xCheckReservedLock */
+    asyncFileControl,                /* xFileControl */
+    asyncSectorSize,                 /* xSectorSize */
+    asyncDeviceCharacteristics       /* xDeviceCharacteristics */
+  };
+
+  sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
+  AsyncFile *p = (AsyncFile *)pFile;
+  int nName = 0;
+  int rc = SQLITE_OK;
+  int nByte;
+  AsyncFileData *pData;
+  AsyncLock *pLock = 0;
+  char *z;
+  int isExclusive = (flags&SQLITE_OPEN_EXCLUSIVE);
+
+  /* If zName is NULL, then the upper layer is requesting an anonymous file */
+  if( zName ){
+    nName = strlen(zName)+1;
+  }
+
+  nByte = (
+    sizeof(AsyncFileData) +        /* AsyncFileData structure */
+    2 * pVfs->szOsFile +           /* AsyncFileData.pBaseRead and pBaseWrite */
+    nName                          /* AsyncFileData.zName */
+  ); 
+  z = sqlite3_malloc(nByte);
+  if( !z ){
+    return SQLITE_NOMEM;
+  }
+  memset(z, 0, nByte);
+  pData = (AsyncFileData*)z;
+  z += sizeof(pData[0]);
+  pData->pBaseRead = (sqlite3_file*)z;
+  z += pVfs->szOsFile;
+  pData->pBaseWrite = (sqlite3_file*)z;
+  pData->close.pFileData = pData;
+  pData->close.op = ASYNC_CLOSE;
+
+  if( zName ){
+    z += pVfs->szOsFile;
+    pData->zName = z;
+    pData->nName = nName;
+    memcpy(pData->zName, zName, nName);
+  }
+
+  if( !isExclusive ){
+    rc = pVfs->xOpen(pVfs, zName, pData->pBaseRead, flags, pOutFlags);
+    if( rc==SQLITE_OK && ((*pOutFlags)&SQLITE_OPEN_READWRITE) ){
+      rc = pVfs->xOpen(pVfs, zName, pData->pBaseWrite, flags, 0);
+    }
+  }
+
+  pthread_mutex_lock(&async.lockMutex);
+
+  if( zName && rc==SQLITE_OK ){
+    pLock = findLock(pData->zName, pData->nName);
+    if( !pLock ){
+      int nByte = pVfs->szOsFile + sizeof(AsyncLock) + pData->nName + 1; 
+      pLock = (AsyncLock *)sqlite3_malloc(nByte);
+      if( pLock ){
+        memset(pLock, 0, nByte);
+#ifdef ENABLE_FILE_LOCKING
+        if( flags&SQLITE_OPEN_MAIN_DB ){
+          pLock->pFile = (sqlite3_file *)&pLock[1];
+          rc = pVfs->xOpen(pVfs, zName, pLock->pFile, flags, 0);
+          if( rc!=SQLITE_OK ){
+            sqlite3_free(pLock);
+            pLock = 0;
+          }
+        }
+#endif
+        if( pLock ){
+          pLock->nFile = pData->nName;
+          pLock->zFile = &((char *)(&pLock[1]))[pVfs->szOsFile];
+          memcpy(pLock->zFile, pData->zName, pLock->nFile);
+          pLock->pNext = async.pLock;
+          async.pLock = pLock;
+        }
+      }else{
+        rc = SQLITE_NOMEM;
+      }
+    }
+  }
+
+  if( rc==SQLITE_OK ){
+    p->pMethod = &async_methods;
+    p->pData = pData;
+
+    /* Link AsyncFileData.lock into the linked list of 
+    ** AsyncFileLock structures for this file.
+    */
+    if( zName ){
+      pData->lock.pNext = pLock->pList;
+      pLock->pList = &pData->lock;
+      pData->zName = pLock->zFile;
+    }
+  }else{
+    if( pData->pBaseRead->pMethods ){
+      pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
+    }
+    if( pData->pBaseWrite->pMethods ){
+      pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
+    }
+    sqlite3_free(pData);
+  }
+
+  pthread_mutex_unlock(&async.lockMutex);
+
+  if( rc==SQLITE_OK ){
+    incrOpenFileCount();
+    pData->pLock = pLock;
+  }
+
+  if( rc==SQLITE_OK && isExclusive ){
+    rc = addNewAsyncWrite(pData, ASYNC_OPENEXCLUSIVE, (sqlite3_int64)flags,0,0);
+    if( rc==SQLITE_OK ){
+      if( pOutFlags ) *pOutFlags = flags;
+    }else{
+      pthread_mutex_lock(&async.lockMutex);
+      unlinkAsyncFile(pData);
+      pthread_mutex_unlock(&async.lockMutex);
+      sqlite3_free(pData);
+    }
+  }
+  return rc;
+}
+
+/*
+** Implementation of sqlite3OsDelete. Add an entry to the end of the 
+** write-op queue to perform the delete.
+*/
+static int asyncDelete(sqlite3_vfs *pAsyncVfs, const char *z, int syncDir){
+  return addNewAsyncWrite(0, ASYNC_DELETE, syncDir, strlen(z)+1, z);
+}
+
+/*
+** Implementation of sqlite3OsAccess. This method holds the mutex from
+** start to finish.
+*/
+static int asyncAccess(
+  sqlite3_vfs *pAsyncVfs, 
+  const char *zName, 
+  int flags,
+  int *pResOut
+){
+  int rc;
+  int ret;
+  AsyncWrite *p;
+  sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
+
+  assert(flags==SQLITE_ACCESS_READWRITE 
+      || flags==SQLITE_ACCESS_READ 
+      || flags==SQLITE_ACCESS_EXISTS 
+  );
+
+  pthread_mutex_lock(&async.queueMutex);
+  rc = pVfs->xAccess(pVfs, zName, flags, &ret);
+  if( rc==SQLITE_OK && flags==SQLITE_ACCESS_EXISTS ){
+    for(p=async.pQueueFirst; p; p = p->pNext){
+      if( p->op==ASYNC_DELETE && 0==strcmp(p->zBuf, zName) ){
+        ret = 0;
+      }else if( p->op==ASYNC_OPENEXCLUSIVE 
+             && p->pFileData->zName
+             && 0==strcmp(p->pFileData->zName, zName) 
+      ){
+        ret = 1;
+      }
+    }
+  }
+  ASYNC_TRACE(("ACCESS(%s): %s = %d\n", 
+    flags==SQLITE_ACCESS_READWRITE?"read-write":
+    flags==SQLITE_ACCESS_READ?"read":"exists"
+    , zName, ret)
+  );
+  pthread_mutex_unlock(&async.queueMutex);
+  *pResOut = ret;
+  return rc;
+}
+
+/*
+** Fill in zPathOut with the full path to the file identified by zPath.
+*/
+static int asyncFullPathname(
+  sqlite3_vfs *pAsyncVfs, 
+  const char *zPath, 
+  int nPathOut,
+  char *zPathOut
+){
+  int rc;
+  sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
+  rc = pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut);
+
+  /* Because of the way intra-process file locking works, this backend
+  ** needs to return a canonical path. The following block assumes the
+  ** file-system uses unix style paths. 
+  */
+  if( rc==SQLITE_OK ){
+    int iIn;
+    int iOut = 0;
+    int nPathOut = strlen(zPathOut);
+
+    for(iIn=0; iIn<nPathOut; iIn++){
+
+      /* Replace any occurences of "//" with "/" */
+      if( iIn<=(nPathOut-2) && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='/'
+      ){
+        continue;
+      }
+
+      /* Replace any occurences of "/./" with "/" */
+      if( iIn<=(nPathOut-3) 
+       && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='.' && zPathOut[iIn+2]=='/'
+      ){
+        iIn++;
+        continue;
+      }
+
+      /* Replace any occurences of "<path-component>/../" with "" */
+      if( iOut>0 && iIn<=(nPathOut-4) 
+       && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='.' 
+       && zPathOut[iIn+2]=='.' && zPathOut[iIn+3]=='/'
+      ){
+        iIn += 3;
+        iOut--;
+        for( ; iOut>0 && zPathOut[iOut-1]!='/'; iOut--);
+        continue;
+      }
+
+      zPathOut[iOut++] = zPathOut[iIn];
+    }
+    zPathOut[iOut] = '\0';
+  }
+
+  return rc;
+}
+static void *asyncDlOpen(sqlite3_vfs *pAsyncVfs, const char *zPath){
+  sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
+  return pVfs->xDlOpen(pVfs, zPath);
+}
+static void asyncDlError(sqlite3_vfs *pAsyncVfs, int nByte, char *zErrMsg){
+  sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
+  pVfs->xDlError(pVfs, nByte, zErrMsg);
+}
+static void *asyncDlSym(
+  sqlite3_vfs *pAsyncVfs, 
+  void *pHandle, 
+  const char *zSymbol
+){
+  sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
+  return pVfs->xDlSym(pVfs, pHandle, zSymbol);
+}
+static void asyncDlClose(sqlite3_vfs *pAsyncVfs, void *pHandle){
+  sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
+  pVfs->xDlClose(pVfs, pHandle);
+}
+static int asyncRandomness(sqlite3_vfs *pAsyncVfs, int nByte, char *zBufOut){
+  sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
+  return pVfs->xRandomness(pVfs, nByte, zBufOut);
+}
+static int asyncSleep(sqlite3_vfs *pAsyncVfs, int nMicro){
+  sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
+  return pVfs->xSleep(pVfs, nMicro);
+}
+static int asyncCurrentTime(sqlite3_vfs *pAsyncVfs, double *pTimeOut){
+  sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData;
+  return pVfs->xCurrentTime(pVfs, pTimeOut);
+}
+
+static sqlite3_vfs async_vfs = {
+  1,                    /* iVersion */
+  sizeof(AsyncFile),    /* szOsFile */
+  0,                    /* mxPathname */
+  0,                    /* pNext */
+  "async",              /* zName */
+  0,                    /* pAppData */
+  asyncOpen,            /* xOpen */
+  asyncDelete,          /* xDelete */
+  asyncAccess,          /* xAccess */
+  asyncFullPathname,    /* xFullPathname */
+  asyncDlOpen,          /* xDlOpen */
+  asyncDlError,         /* xDlError */
+  asyncDlSym,           /* xDlSym */
+  asyncDlClose,         /* xDlClose */
+  asyncRandomness,      /* xDlError */
+  asyncSleep,           /* xDlSym */
+  asyncCurrentTime      /* xDlClose */
+};
+
+/*
+** Call this routine to enable or disable the
+** asynchronous IO features implemented in this file. 
+**
+** This routine is not even remotely threadsafe.  Do not call
+** this routine while any SQLite database connections are open.
+*/
+static void asyncEnable(int enable){
+  if( enable ){
+    if( !async_vfs.pAppData ){
+      async_vfs.pAppData = (void *)sqlite3_vfs_find(0);
+      async_vfs.mxPathname = ((sqlite3_vfs *)async_vfs.pAppData)->mxPathname;
+      sqlite3_vfs_register(&async_vfs, 1);
+    }
+  }else{
+    if( async_vfs.pAppData ){
+      sqlite3_vfs_unregister(&async_vfs);
+      async_vfs.pAppData = 0;
+    }
+  }
+}
+
+/* 
+** This procedure runs in a separate thread, reading messages off of the
+** write queue and processing them one by one.  
+**
+** If async.writerHaltNow is true, then this procedure exits
+** after processing a single message.
+**
+** If async.writerHaltWhenIdle is true, then this procedure exits when
+** the write queue is empty.
+**
+** If both of the above variables are false, this procedure runs
+** indefinately, waiting for operations to be added to the write queue
+** and processing them in the order in which they arrive.
+**
+** An artifical delay of async.ioDelay milliseconds is inserted before
+** each write operation in order to simulate the effect of a slow disk.
+**
+** Only one instance of this procedure may be running at a time.
+*/
+static void *asyncWriterThread(void *pIsStarted){
+  sqlite3_vfs *pVfs = (sqlite3_vfs *)(async_vfs.pAppData);
+  AsyncWrite *p = 0;
+  int rc = SQLITE_OK;
+  int holdingMutex = 0;
+
+  if( pthread_mutex_trylock(&async.writerMutex) ){
+    return 0;
+  }
+  (*(int *)pIsStarted) = 1;
+  while( async.writerHaltNow==0 ){
+    int doNotFree = 0;
+    sqlite3_file *pBase = 0;
+
+    if( !holdingMutex ){
+      pthread_mutex_lock(&async.queueMutex);
+    }
+    while( (p = async.pQueueFirst)==0 ){
+      pthread_cond_broadcast(&async.emptySignal);
+      if( async.writerHaltWhenIdle ){
+        pthread_mutex_unlock(&async.queueMutex);
+        break;
+      }else{
+        ASYNC_TRACE(("IDLE\n"));
+        pthread_cond_wait(&async.queueSignal, &async.queueMutex);
+        ASYNC_TRACE(("WAKEUP\n"));
+      }
+    }
+    if( p==0 ) break;
+    holdingMutex = 1;
+
+    /* Right now this thread is holding the mutex on the write-op queue.
+    ** Variable 'p' points to the first entry in the write-op queue. In
+    ** the general case, we hold on to the mutex for the entire body of
+    ** the loop. 
+    **
+    ** However in the cases enumerated below, we relinquish the mutex,
+    ** perform the IO, and then re-request the mutex before removing 'p' from
+    ** the head of the write-op queue. The idea is to increase concurrency with
+    ** sqlite threads.
+    **
+    **     * An ASYNC_CLOSE operation.
+    **     * An ASYNC_OPENEXCLUSIVE operation. For this one, we relinquish 
+    **       the mutex, call the underlying xOpenExclusive() function, then
+    **       re-aquire the mutex before seting the AsyncFile.pBaseRead 
+    **       variable.
+    **     * ASYNC_SYNC and ASYNC_WRITE operations, if 
+    **       SQLITE_ASYNC_TWO_FILEHANDLES was set at compile time and two
+    **       file-handles are open for the particular file being "synced".
+    */
+    if( async.ioError!=SQLITE_OK && p->op!=ASYNC_CLOSE ){
+      p->op = ASYNC_NOOP;
+    }
+    if( p->pFileData ){
+      pBase = p->pFileData->pBaseWrite;
+      if( 
+        p->op==ASYNC_CLOSE || 
+        p->op==ASYNC_OPENEXCLUSIVE ||
+        (pBase->pMethods && (p->op==ASYNC_SYNC || p->op==ASYNC_WRITE) ) 
+      ){
+        pthread_mutex_unlock(&async.queueMutex);
+        holdingMutex = 0;
+      }
+      if( !pBase->pMethods ){
+        pBase = p->pFileData->pBaseRead;
+      }
+    }
+
+    switch( p->op ){
+      case ASYNC_NOOP:
+        break;
+
+      case ASYNC_WRITE:
+        assert( pBase );
+        ASYNC_TRACE(("WRITE %s %d bytes at %d\n",
+                p->pFileData->zName, p->nByte, p->iOffset));
+        rc = pBase->pMethods->xWrite(pBase, (void *)(p->zBuf), p->nByte, p->iOffset);
+        break;
+
+      case ASYNC_SYNC:
+        assert( pBase );
+        ASYNC_TRACE(("SYNC %s\n", p->pFileData->zName));
+        rc = pBase->pMethods->xSync(pBase, p->nByte);
+        break;
+
+      case ASYNC_TRUNCATE:
+        assert( pBase );
+        ASYNC_TRACE(("TRUNCATE %s to %d bytes\n", 
+                p->pFileData->zName, p->iOffset));
+        rc = pBase->pMethods->xTruncate(pBase, p->iOffset);
+        break;
+
+      case ASYNC_CLOSE: {
+        AsyncFileData *pData = p->pFileData;
+        ASYNC_TRACE(("CLOSE %s\n", p->pFileData->zName));
+        if( pData->pBaseWrite->pMethods ){
+          pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite);
+        }
+        if( pData->pBaseRead->pMethods ){
+          pData->pBaseRead->pMethods->xClose(pData->pBaseRead);
+        }
+
+        /* Unlink AsyncFileData.lock from the linked list of AsyncFileLock 
+        ** structures for this file. Obtain the async.lockMutex mutex 
+        ** before doing so.
+        */
+        pthread_mutex_lock(&async.lockMutex);
+        rc = unlinkAsyncFile(pData);
+        pthread_mutex_unlock(&async.lockMutex);
+
+        if( !holdingMutex ){
+          pthread_mutex_lock(&async.queueMutex);
+          holdingMutex = 1;
+        }
+        assert_mutex_is_held(&async.queueMutex);
+        async.pQueueFirst = p->pNext;
+        sqlite3_free(pData);
+        doNotFree = 1;
+        break;
+      }
+
+      case ASYNC_UNLOCK: {
+        AsyncFileData *pData = p->pFileData;
+        int eLock = p->nByte;
+        pthread_mutex_lock(&async.lockMutex);
+        pData->lock.eAsyncLock = MIN(
+            pData->lock.eAsyncLock, MAX(pData->lock.eLock, eLock)
+        );
+        assert(pData->lock.eAsyncLock>=pData->lock.eLock);
+        rc = getFileLock(pData->pLock);
+        pthread_mutex_unlock(&async.lockMutex);
+        break;
+      }
+
+      case ASYNC_DELETE:
+        ASYNC_TRACE(("DELETE %s\n", p->zBuf));
+        rc = pVfs->xDelete(pVfs, p->zBuf, (int)p->iOffset);
+        break;
+
+      case ASYNC_OPENEXCLUSIVE: {
+        int flags = (int)p->iOffset;
+        AsyncFileData *pData = p->pFileData;
+        ASYNC_TRACE(("OPEN %s flags=%d\n", p->zBuf, (int)p->iOffset));
+        assert(pData->pBaseRead->pMethods==0 && pData->pBaseWrite->pMethods==0);
+        rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, 0);
+        assert( holdingMutex==0 );
+        pthread_mutex_lock(&async.queueMutex);
+        holdingMutex = 1;
+        break;
+      }
+
+      default: assert(!"Illegal value for AsyncWrite.op");
+    }
+
+    /* If we didn't hang on to the mutex during the IO op, obtain it now
+    ** so that the AsyncWrite structure can be safely removed from the 
+    ** global write-op queue.
+    */
+    if( !holdingMutex ){
+      pthread_mutex_lock(&async.queueMutex);
+      holdingMutex = 1;
+    }
+    /* ASYNC_TRACE(("UNLINK %p\n", p)); */
+    if( p==async.pQueueLast ){
+      async.pQueueLast = 0;
+    }
+    if( !doNotFree ){
+      assert_mutex_is_held(&async.queueMutex);
+      async.pQueueFirst = p->pNext;
+      sqlite3_free(p);
+    }
+    assert( holdingMutex );
+
+    /* An IO error has occured. We cannot report the error back to the
+    ** connection that requested the I/O since the error happened 
+    ** asynchronously.  The connection has already moved on.  There 
+    ** really is nobody to report the error to.
+    **
+    ** The file for which the error occured may have been a database or
+    ** journal file. Regardless, none of the currently queued operations
+    ** associated with the same database should now be performed. Nor should
+    ** any subsequently requested IO on either a database or journal file 
+    ** handle for the same database be accepted until the main database
+    ** file handle has been closed and reopened.
+    **
+    ** Furthermore, no further IO should be queued or performed on any file
+    ** handle associated with a database that may have been part of a 
+    ** multi-file transaction that included the database associated with 
+    ** the IO error (i.e. a database ATTACHed to the same handle at some 
+    ** point in time).
+    */
+    if( rc!=SQLITE_OK ){
+      async.ioError = rc;
+    }
+
+    if( async.ioError && !async.pQueueFirst ){
+      pthread_mutex_lock(&async.lockMutex);
+      if( 0==async.pLock ){
+        async.ioError = SQLITE_OK;
+      }
+      pthread_mutex_unlock(&async.lockMutex);
+    }
+
+    /* Drop the queue mutex before continuing to the next write operation
+    ** in order to give other threads a chance to work with the write queue.
+    */
+    if( !async.pQueueFirst || !async.ioError ){
+      pthread_mutex_unlock(&async.queueMutex);
+      holdingMutex = 0;
+      if( async.ioDelay>0 ){
+        pVfs->xSleep(pVfs, async.ioDelay);
+      }else{
+        sched_yield();
+      }
+    }
+  }
+  
+  pthread_mutex_unlock(&async.writerMutex);
+  return 0;
+}
+
+/**************************************************************************
+** The remaining code defines a Tcl interface for testing the asynchronous
+** IO implementation in this file.
+**
+** To adapt the code to a non-TCL environment, delete or comment out
+** the code that follows.
+*/
+
+/*
+** sqlite3async_enable ?YES/NO?
+**
+** Enable or disable the asynchronous I/O backend.  This command is
+** not thread-safe.  Do not call it while any database connections
+** are open.
+*/
+static int testAsyncEnable(
+  void * clientData,
+  Tcl_Interp *interp,
+  int objc,
+  Tcl_Obj *CONST objv[]
+){
+  if( objc!=1 && objc!=2 ){
+    Tcl_WrongNumArgs(interp, 1, objv, "?YES/NO?");
+    return TCL_ERROR;
+  }
+  if( objc==1 ){
+    Tcl_SetObjResult(interp, Tcl_NewBooleanObj(async_vfs.pAppData!=0));
+  }else{
+    int en;
+    if( Tcl_GetBooleanFromObj(interp, objv[1], &en) ) return TCL_ERROR;
+    asyncEnable(en);
+  }
+  return TCL_OK;
+}
+
+/*
+** sqlite3async_halt  "now"|"idle"|"never"
+**
+** Set the conditions at which the writer thread will halt.
+*/
+static int testAsyncHalt(
+  void * clientData,
+  Tcl_Interp *interp,
+  int objc,
+  Tcl_Obj *CONST objv[]
+){
+  const char *zCond;
+  if( objc!=2 ){
+    Tcl_WrongNumArgs(interp, 1, objv, "\"now\"|\"idle\"|\"never\"");
+    return TCL_ERROR;
+  }
+  zCond = Tcl_GetString(objv[1]);
+  if( strcmp(zCond, "now")==0 ){
+    async.writerHaltNow = 1;
+    pthread_cond_broadcast(&async.queueSignal);
+  }else if( strcmp(zCond, "idle")==0 ){
+    async.writerHaltWhenIdle = 1;
+    async.writerHaltNow = 0;
+    pthread_cond_broadcast(&async.queueSignal);
+  }else if( strcmp(zCond, "never")==0 ){
+    async.writerHaltWhenIdle = 0;
+    async.writerHaltNow = 0;
+  }else{
+    Tcl_AppendResult(interp, 
+      "should be one of: \"now\", \"idle\", or \"never\"", (char*)0);
+    return TCL_ERROR;
+  }
+  return TCL_OK;
+}
+
+/*
+** sqlite3async_delay ?MS?
+**
+** Query or set the number of milliseconds of delay in the writer
+** thread after each write operation.  The default is 0.  By increasing
+** the memory delay we can simulate the effect of slow disk I/O.
+*/
+static int testAsyncDelay(
+  void * clientData,
+  Tcl_Interp *interp,
+  int objc,
+  Tcl_Obj *CONST objv[]
+){
+  if( objc!=1 && objc!=2 ){
+    Tcl_WrongNumArgs(interp, 1, objv, "?MS?");
+    return TCL_ERROR;
+  }
+  if( objc==1 ){
+    Tcl_SetObjResult(interp, Tcl_NewIntObj(async.ioDelay));
+  }else{
+    int ioDelay;
+    if( Tcl_GetIntFromObj(interp, objv[1], &ioDelay) ) return TCL_ERROR;
+    async.ioDelay = ioDelay;
+  }
+  return TCL_OK;
+}
+
+/*
+** sqlite3async_start
+**
+** Start a new writer thread.
+*/
+static int testAsyncStart(
+  void * clientData,
+  Tcl_Interp *interp,
+  int objc,
+  Tcl_Obj *CONST objv[]
+){
+  pthread_t x;
+  int rc;
+  volatile int isStarted = 0;
+  rc = pthread_create(&x, 0, asyncWriterThread, (void *)&isStarted);
+  if( rc ){
+    Tcl_AppendResult(interp, "failed to create the thread", 0);
+    return TCL_ERROR;
+  }
+  pthread_detach(x);
+  while( isStarted==0 ){
+    sched_yield();
+  }
+  return TCL_OK;
+}
+
+/*
+** sqlite3async_wait
+**
+** Wait for the current writer thread to terminate.
+**
+** If the current writer thread is set to run forever then this
+** command would block forever.  To prevent that, an error is returned. 
+*/
+static int testAsyncWait(
+  void * clientData,
+  Tcl_Interp *interp,
+  int objc,
+  Tcl_Obj *CONST objv[]
+){
+  int cnt = 10;
+  if( async.writerHaltNow==0 && async.writerHaltWhenIdle==0 ){
+    Tcl_AppendResult(interp, "would block forever", (char*)0);
+    return TCL_ERROR;
+  }
+
+  while( cnt-- && !pthread_mutex_trylock(&async.writerMutex) ){
+    pthread_mutex_unlock(&async.writerMutex);
+    sched_yield();
+  }
+  if( cnt>=0 ){
+    ASYNC_TRACE(("WAIT\n"));
+    pthread_mutex_lock(&async.queueMutex);
+    pthread_cond_broadcast(&async.queueSignal);
+    pthread_mutex_unlock(&async.queueMutex);
+    pthread_mutex_lock(&async.writerMutex);
+    pthread_mutex_unlock(&async.writerMutex);
+  }else{
+    ASYNC_TRACE(("NO-WAIT\n"));
+  }
+  return TCL_OK;
+}
+
+
+#endif  /* SQLITE_OS_UNIX and SQLITE_THREADSAFE */
+
+/*
+** This routine registers the custom TCL commands defined in this
+** module.  This should be the only procedure visible from outside
+** of this module.
+*/
+int Sqlitetestasync_Init(Tcl_Interp *interp){
+#if SQLITE_OS_UNIX && SQLITE_THREADSAFE
+  Tcl_CreateObjCommand(interp,"sqlite3async_enable",testAsyncEnable,0,0);
+  Tcl_CreateObjCommand(interp,"sqlite3async_halt",testAsyncHalt,0,0);
+  Tcl_CreateObjCommand(interp,"sqlite3async_delay",testAsyncDelay,0,0);
+  Tcl_CreateObjCommand(interp,"sqlite3async_start",testAsyncStart,0,0);
+  Tcl_CreateObjCommand(interp,"sqlite3async_wait",testAsyncWait,0,0);
+  Tcl_LinkVar(interp, "sqlite3async_trace",
+      (char*)&sqlite3async_trace, TCL_LINK_INT);
+#endif  /* SQLITE_OS_UNIX and SQLITE_THREADSAFE */
+  return TCL_OK;
+}