|
1 /* |
|
2 ** 2005 December 14 |
|
3 ** |
|
4 ** The author disclaims copyright to this source code. In place of |
|
5 ** a legal notice, here is a blessing: |
|
6 ** |
|
7 ** May you do good and not evil. |
|
8 ** May you find forgiveness for yourself and forgive others. |
|
9 ** May you share freely, never taking more than you give. |
|
10 ** |
|
11 ************************************************************************* |
|
12 ** |
|
13 ** $Id: test_async.c,v 1.48 2008/09/26 20:02:50 drh Exp $ |
|
14 ** |
|
15 ** This file contains an example implementation of an asynchronous IO |
|
16 ** backend for SQLite. |
|
17 ** |
|
18 ** WHAT IS ASYNCHRONOUS I/O? |
|
19 ** |
|
20 ** With asynchronous I/O, write requests are handled by a separate thread |
|
21 ** running in the background. This means that the thread that initiates |
|
22 ** a database write does not have to wait for (sometimes slow) disk I/O |
|
23 ** to occur. The write seems to happen very quickly, though in reality |
|
24 ** it is happening at its usual slow pace in the background. |
|
25 ** |
|
26 ** Asynchronous I/O appears to give better responsiveness, but at a price. |
|
27 ** You lose the Durable property. With the default I/O backend of SQLite, |
|
28 ** once a write completes, you know that the information you wrote is |
|
29 ** safely on disk. With the asynchronous I/O, this is not the case. If |
|
30 ** your program crashes or if a power loss occurs after the database |
|
31 ** write but before the asynchronous write thread has completed, then the |
|
32 ** database change might never make it to disk and the next user of the |
|
33 ** database might not see your change. |
|
34 ** |
|
35 ** You lose Durability with asynchronous I/O, but you still retain the |
|
36 ** other parts of ACID: Atomic, Consistent, and Isolated. Many |
|
37 ** appliations get along fine without the Durablity. |
|
38 ** |
|
39 ** HOW IT WORKS |
|
40 ** |
|
41 ** Asynchronous I/O works by creating a special SQLite "vfs" structure |
|
42 ** and registering it with sqlite3_vfs_register(). When files opened via |
|
43 ** this vfs are written to (using sqlite3OsWrite()), the data is not |
|
44 ** written directly to disk, but is placed in the "write-queue" to be |
|
45 ** handled by the background thread. |
|
46 ** |
|
47 ** When files opened with the asynchronous vfs are read from |
|
48 ** (using sqlite3OsRead()), the data is read from the file on |
|
49 ** disk and the write-queue, so that from the point of view of |
|
50 ** the vfs reader the OsWrite() appears to have already completed. |
|
51 ** |
|
52 ** The special vfs is registered (and unregistered) by calls to |
|
53 ** function asyncEnable() (see below). |
|
54 ** |
|
55 ** LIMITATIONS |
|
56 ** |
|
57 ** This demonstration code is deliberately kept simple in order to keep |
|
58 ** the main ideas clear and easy to understand. Real applications that |
|
59 ** want to do asynchronous I/O might want to add additional capabilities. |
|
60 ** For example, in this demonstration if writes are happening at a steady |
|
61 ** stream that exceeds the I/O capability of the background writer thread, |
|
62 ** the queue of pending write operations will grow without bound until we |
|
63 ** run out of memory. Users of this technique may want to keep track of |
|
64 ** the quantity of pending writes and stop accepting new write requests |
|
65 ** when the buffer gets to be too big. |
|
66 ** |
|
67 ** LOCKING + CONCURRENCY |
|
68 ** |
|
69 ** Multiple connections from within a single process that use this |
|
70 ** implementation of asynchronous IO may access a single database |
|
71 ** file concurrently. From the point of view of the user, if all |
|
72 ** connections are from within a single process, there is no difference |
|
73 ** between the concurrency offered by "normal" SQLite and SQLite |
|
74 ** using the asynchronous backend. |
|
75 ** |
|
76 ** If connections from within multiple database files may access the |
|
77 ** database file, the ENABLE_FILE_LOCKING symbol (see below) must be |
|
78 ** defined. If it is not defined, then no locks are established on |
|
79 ** the database file. In this case, if multiple processes access |
|
80 ** the database file, corruption will quickly result. |
|
81 ** |
|
82 ** If ENABLE_FILE_LOCKING is defined (the default), then connections |
|
83 ** from within multiple processes may access a single database file |
|
84 ** without risking corruption. However concurrency is reduced as |
|
85 ** follows: |
|
86 ** |
|
87 ** * When a connection using asynchronous IO begins a database |
|
88 ** transaction, the database is locked immediately. However the |
|
89 ** lock is not released until after all relevant operations |
|
90 ** in the write-queue have been flushed to disk. This means |
|
91 ** (for example) that the database may remain locked for some |
|
92 ** time after a "COMMIT" or "ROLLBACK" is issued. |
|
93 ** |
|
94 ** * If an application using asynchronous IO executes transactions |
|
95 ** in quick succession, other database users may be effectively |
|
96 ** locked out of the database. This is because when a BEGIN |
|
97 ** is executed, a database lock is established immediately. But |
|
98 ** when the corresponding COMMIT or ROLLBACK occurs, the lock |
|
99 ** is not released until the relevant part of the write-queue |
|
100 ** has been flushed through. As a result, if a COMMIT is followed |
|
101 ** by a BEGIN before the write-queue is flushed through, the database |
|
102 ** is never unlocked,preventing other processes from accessing |
|
103 ** the database. |
|
104 ** |
|
105 ** Defining ENABLE_FILE_LOCKING when using an NFS or other remote |
|
106 ** file-system may slow things down, as synchronous round-trips to the |
|
107 ** server may be required to establish database file locks. |
|
108 */ |
|
109 #define ENABLE_FILE_LOCKING |
|
110 |
|
111 #ifndef SQLITE_AMALGAMATION |
|
112 # include "sqlite3.h" |
|
113 # include <assert.h> |
|
114 # include <string.h> |
|
115 #endif |
|
116 #include "tcl.h" |
|
117 |
|
118 /* |
|
119 ** This test uses pthreads and hence only works on unix and with |
|
120 ** a threadsafe build of SQLite. |
|
121 */ |
|
122 #if SQLITE_OS_UNIX && SQLITE_THREADSAFE |
|
123 |
|
124 /* |
|
125 ** This demo uses pthreads. If you do not have a pthreads implementation |
|
126 ** for your operating system, you will need to recode the threading |
|
127 ** logic. |
|
128 */ |
|
129 #include <pthread.h> |
|
130 #include <sched.h> |
|
131 |
|
132 /* Useful macros used in several places */ |
|
133 #define MIN(x,y) ((x)<(y)?(x):(y)) |
|
134 #define MAX(x,y) ((x)>(y)?(x):(y)) |
|
135 |
|
136 /* Forward references */ |
|
137 typedef struct AsyncWrite AsyncWrite; |
|
138 typedef struct AsyncFile AsyncFile; |
|
139 typedef struct AsyncFileData AsyncFileData; |
|
140 typedef struct AsyncFileLock AsyncFileLock; |
|
141 typedef struct AsyncLock AsyncLock; |
|
142 |
|
143 /* Enable for debugging */ |
|
144 static int sqlite3async_trace = 0; |
|
145 # define ASYNC_TRACE(X) if( sqlite3async_trace ) asyncTrace X |
|
146 static void asyncTrace(const char *zFormat, ...){ |
|
147 char *z; |
|
148 va_list ap; |
|
149 va_start(ap, zFormat); |
|
150 z = sqlite3_vmprintf(zFormat, ap); |
|
151 va_end(ap); |
|
152 fprintf(stderr, "[%d] %s", (int)pthread_self(), z); |
|
153 sqlite3_free(z); |
|
154 } |
|
155 |
|
156 /* |
|
157 ** THREAD SAFETY NOTES |
|
158 ** |
|
159 ** Basic rules: |
|
160 ** |
|
161 ** * Both read and write access to the global write-op queue must be |
|
162 ** protected by the async.queueMutex. As are the async.ioError and |
|
163 ** async.nFile variables. |
|
164 ** |
|
165 ** * The async.pLock list and all AsyncLock and AsyncFileLock |
|
166 ** structures must be protected by the async.lockMutex mutex. |
|
167 ** |
|
168 ** * The file handles from the underlying system are not assumed to |
|
169 ** be thread safe. |
|
170 ** |
|
171 ** * See the last two paragraphs under "The Writer Thread" for |
|
172 ** an assumption to do with file-handle synchronization by the Os. |
|
173 ** |
|
174 ** Deadlock prevention: |
|
175 ** |
|
176 ** There are three mutex used by the system: the "writer" mutex, |
|
177 ** the "queue" mutex and the "lock" mutex. Rules are: |
|
178 ** |
|
179 ** * It is illegal to block on the writer mutex when any other mutex |
|
180 ** are held, and |
|
181 ** |
|
182 ** * It is illegal to block on the queue mutex when the lock mutex |
|
183 ** is held. |
|
184 ** |
|
185 ** i.e. mutex's must be grabbed in the order "writer", "queue", "lock". |
|
186 ** |
|
187 ** File system operations (invoked by SQLite thread): |
|
188 ** |
|
189 ** xOpen |
|
190 ** xDelete |
|
191 ** xFileExists |
|
192 ** |
|
193 ** File handle operations (invoked by SQLite thread): |
|
194 ** |
|
195 ** asyncWrite, asyncClose, asyncTruncate, asyncSync |
|
196 ** |
|
197 ** The operations above add an entry to the global write-op list. They |
|
198 ** prepare the entry, acquire the async.queueMutex momentarily while |
|
199 ** list pointers are manipulated to insert the new entry, then release |
|
200 ** the mutex and signal the writer thread to wake up in case it happens |
|
201 ** to be asleep. |
|
202 ** |
|
203 ** |
|
204 ** asyncRead, asyncFileSize. |
|
205 ** |
|
206 ** Read operations. Both of these read from both the underlying file |
|
207 ** first then adjust their result based on pending writes in the |
|
208 ** write-op queue. So async.queueMutex is held for the duration |
|
209 ** of these operations to prevent other threads from changing the |
|
210 ** queue in mid operation. |
|
211 ** |
|
212 ** |
|
213 ** asyncLock, asyncUnlock, asyncCheckReservedLock |
|
214 ** |
|
215 ** These primitives implement in-process locking using a hash table |
|
216 ** on the file name. Files are locked correctly for connections coming |
|
217 ** from the same process. But other processes cannot see these locks |
|
218 ** and will therefore not honor them. |
|
219 ** |
|
220 ** |
|
221 ** The writer thread: |
|
222 ** |
|
223 ** The async.writerMutex is used to make sure only there is only |
|
224 ** a single writer thread running at a time. |
|
225 ** |
|
226 ** Inside the writer thread is a loop that works like this: |
|
227 ** |
|
228 ** WHILE (write-op list is not empty) |
|
229 ** Do IO operation at head of write-op list |
|
230 ** Remove entry from head of write-op list |
|
231 ** END WHILE |
|
232 ** |
|
233 ** The async.queueMutex is always held during the <write-op list is |
|
234 ** not empty> test, and when the entry is removed from the head |
|
235 ** of the write-op list. Sometimes it is held for the interim |
|
236 ** period (while the IO is performed), and sometimes it is |
|
237 ** relinquished. It is relinquished if (a) the IO op is an |
|
238 ** ASYNC_CLOSE or (b) when the file handle was opened, two of |
|
239 ** the underlying systems handles were opened on the same |
|
240 ** file-system entry. |
|
241 ** |
|
242 ** If condition (b) above is true, then one file-handle |
|
243 ** (AsyncFile.pBaseRead) is used exclusively by sqlite threads to read the |
|
244 ** file, the other (AsyncFile.pBaseWrite) by sqlite3_async_flush() |
|
245 ** threads to perform write() operations. This means that read |
|
246 ** operations are not blocked by asynchronous writes (although |
|
247 ** asynchronous writes may still be blocked by reads). |
|
248 ** |
|
249 ** This assumes that the OS keeps two handles open on the same file |
|
250 ** properly in sync. That is, any read operation that starts after a |
|
251 ** write operation on the same file system entry has completed returns |
|
252 ** data consistent with the write. We also assume that if one thread |
|
253 ** reads a file while another is writing it all bytes other than the |
|
254 ** ones actually being written contain valid data. |
|
255 ** |
|
256 ** If the above assumptions are not true, set the preprocessor symbol |
|
257 ** SQLITE_ASYNC_TWO_FILEHANDLES to 0. |
|
258 */ |
|
259 |
|
260 #ifndef SQLITE_ASYNC_TWO_FILEHANDLES |
|
261 /* #define SQLITE_ASYNC_TWO_FILEHANDLES 0 */ |
|
262 #define SQLITE_ASYNC_TWO_FILEHANDLES 1 |
|
263 #endif |
|
264 |
|
265 /* |
|
266 ** State information is held in the static variable "async" defined |
|
267 ** as the following structure. |
|
268 ** |
|
269 ** Both async.ioError and async.nFile are protected by async.queueMutex. |
|
270 */ |
|
271 static struct TestAsyncStaticData { |
|
272 pthread_mutex_t lockMutex; /* For access to aLock hash table */ |
|
273 pthread_mutex_t queueMutex; /* Mutex for access to write operation queue */ |
|
274 pthread_mutex_t writerMutex; /* Prevents multiple writer threads */ |
|
275 pthread_cond_t queueSignal; /* For waking up sleeping writer thread */ |
|
276 pthread_cond_t emptySignal; /* Notify when the write queue is empty */ |
|
277 AsyncWrite *pQueueFirst; /* Next write operation to be processed */ |
|
278 AsyncWrite *pQueueLast; /* Last write operation on the list */ |
|
279 AsyncLock *pLock; /* Linked list of all AsyncLock structures */ |
|
280 volatile int ioDelay; /* Extra delay between write operations */ |
|
281 volatile int writerHaltWhenIdle; /* Writer thread halts when queue empty */ |
|
282 volatile int writerHaltNow; /* Writer thread halts after next op */ |
|
283 int ioError; /* True if an IO error has occured */ |
|
284 int nFile; /* Number of open files (from sqlite pov) */ |
|
285 } async = { |
|
286 PTHREAD_MUTEX_INITIALIZER, |
|
287 PTHREAD_MUTEX_INITIALIZER, |
|
288 PTHREAD_MUTEX_INITIALIZER, |
|
289 PTHREAD_COND_INITIALIZER, |
|
290 PTHREAD_COND_INITIALIZER, |
|
291 }; |
|
292 |
|
293 /* Possible values of AsyncWrite.op */ |
|
294 #define ASYNC_NOOP 0 |
|
295 #define ASYNC_WRITE 1 |
|
296 #define ASYNC_SYNC 2 |
|
297 #define ASYNC_TRUNCATE 3 |
|
298 #define ASYNC_CLOSE 4 |
|
299 #define ASYNC_DELETE 5 |
|
300 #define ASYNC_OPENEXCLUSIVE 6 |
|
301 #define ASYNC_UNLOCK 7 |
|
302 |
|
303 /* Names of opcodes. Used for debugging only. |
|
304 ** Make sure these stay in sync with the macros above! |
|
305 */ |
|
306 static const char *azOpcodeName[] = { |
|
307 "NOOP", "WRITE", "SYNC", "TRUNCATE", "CLOSE", "DELETE", "OPENEX", "UNLOCK" |
|
308 }; |
|
309 |
|
310 /* |
|
311 ** Entries on the write-op queue are instances of the AsyncWrite |
|
312 ** structure, defined here. |
|
313 ** |
|
314 ** The interpretation of the iOffset and nByte variables varies depending |
|
315 ** on the value of AsyncWrite.op: |
|
316 ** |
|
317 ** ASYNC_NOOP: |
|
318 ** No values used. |
|
319 ** |
|
320 ** ASYNC_WRITE: |
|
321 ** iOffset -> Offset in file to write to. |
|
322 ** nByte -> Number of bytes of data to write (pointed to by zBuf). |
|
323 ** |
|
324 ** ASYNC_SYNC: |
|
325 ** nByte -> flags to pass to sqlite3OsSync(). |
|
326 ** |
|
327 ** ASYNC_TRUNCATE: |
|
328 ** iOffset -> Size to truncate file to. |
|
329 ** nByte -> Unused. |
|
330 ** |
|
331 ** ASYNC_CLOSE: |
|
332 ** iOffset -> Unused. |
|
333 ** nByte -> Unused. |
|
334 ** |
|
335 ** ASYNC_DELETE: |
|
336 ** iOffset -> Contains the "syncDir" flag. |
|
337 ** nByte -> Number of bytes of zBuf points to (file name). |
|
338 ** |
|
339 ** ASYNC_OPENEXCLUSIVE: |
|
340 ** iOffset -> Value of "delflag". |
|
341 ** nByte -> Number of bytes of zBuf points to (file name). |
|
342 ** |
|
343 ** ASYNC_UNLOCK: |
|
344 ** nByte -> Argument to sqlite3OsUnlock(). |
|
345 ** |
|
346 ** |
|
347 ** For an ASYNC_WRITE operation, zBuf points to the data to write to the file. |
|
348 ** This space is sqlite3_malloc()d along with the AsyncWrite structure in a |
|
349 ** single blob, so is deleted when sqlite3_free() is called on the parent |
|
350 ** structure. |
|
351 */ |
|
352 struct AsyncWrite { |
|
353 AsyncFileData *pFileData; /* File to write data to or sync */ |
|
354 int op; /* One of ASYNC_xxx etc. */ |
|
355 sqlite_int64 iOffset; /* See above */ |
|
356 int nByte; /* See above */ |
|
357 char *zBuf; /* Data to write to file (or NULL if op!=ASYNC_WRITE) */ |
|
358 AsyncWrite *pNext; /* Next write operation (to any file) */ |
|
359 }; |
|
360 |
|
361 /* |
|
362 ** An instance of this structure is created for each distinct open file |
|
363 ** (i.e. if two handles are opened on the one file, only one of these |
|
364 ** structures is allocated) and stored in the async.aLock hash table. The |
|
365 ** keys for async.aLock are the full pathnames of the opened files. |
|
366 ** |
|
367 ** AsyncLock.pList points to the head of a linked list of AsyncFileLock |
|
368 ** structures, one for each handle currently open on the file. |
|
369 ** |
|
370 ** If the opened file is not a main-database (the SQLITE_OPEN_MAIN_DB is |
|
371 ** not passed to the sqlite3OsOpen() call), or if ENABLE_FILE_LOCKING is |
|
372 ** not defined at compile time, variables AsyncLock.pFile and |
|
373 ** AsyncLock.eLock are never used. Otherwise, pFile is a file handle |
|
374 ** opened on the file in question and used to obtain the file-system |
|
375 ** locks required by database connections within this process. |
|
376 ** |
|
377 ** See comments above the asyncLock() function for more details on |
|
378 ** the implementation of database locking used by this backend. |
|
379 */ |
|
380 struct AsyncLock { |
|
381 char *zFile; |
|
382 int nFile; |
|
383 sqlite3_file *pFile; |
|
384 int eLock; |
|
385 AsyncFileLock *pList; |
|
386 AsyncLock *pNext; /* Next in linked list headed by async.pLock */ |
|
387 }; |
|
388 |
|
389 /* |
|
390 ** An instance of the following structure is allocated along with each |
|
391 ** AsyncFileData structure (see AsyncFileData.lock), but is only used if the |
|
392 ** file was opened with the SQLITE_OPEN_MAIN_DB. |
|
393 */ |
|
394 struct AsyncFileLock { |
|
395 int eLock; /* Internally visible lock state (sqlite pov) */ |
|
396 int eAsyncLock; /* Lock-state with write-queue unlock */ |
|
397 AsyncFileLock *pNext; |
|
398 }; |
|
399 |
|
400 /* |
|
401 ** The AsyncFile structure is a subclass of sqlite3_file used for |
|
402 ** asynchronous IO. |
|
403 ** |
|
404 ** All of the actual data for the structure is stored in the structure |
|
405 ** pointed to by AsyncFile.pData, which is allocated as part of the |
|
406 ** sqlite3OsOpen() using sqlite3_malloc(). The reason for this is that the |
|
407 ** lifetime of the AsyncFile structure is ended by the caller after OsClose() |
|
408 ** is called, but the data in AsyncFileData may be required by the |
|
409 ** writer thread after that point. |
|
410 */ |
|
411 struct AsyncFile { |
|
412 sqlite3_io_methods *pMethod; |
|
413 AsyncFileData *pData; |
|
414 }; |
|
415 struct AsyncFileData { |
|
416 char *zName; /* Underlying OS filename - used for debugging */ |
|
417 int nName; /* Number of characters in zName */ |
|
418 sqlite3_file *pBaseRead; /* Read handle to the underlying Os file */ |
|
419 sqlite3_file *pBaseWrite; /* Write handle to the underlying Os file */ |
|
420 AsyncFileLock lock; /* Lock state for this handle */ |
|
421 AsyncLock *pLock; /* AsyncLock object for this file system entry */ |
|
422 AsyncWrite close; |
|
423 }; |
|
424 |
|
425 /* |
|
426 ** The following async_XXX functions are debugging wrappers around the |
|
427 ** corresponding pthread_XXX functions: |
|
428 ** |
|
429 ** pthread_mutex_lock(); |
|
430 ** pthread_mutex_unlock(); |
|
431 ** pthread_mutex_trylock(); |
|
432 ** pthread_cond_wait(); |
|
433 ** |
|
434 ** It is illegal to pass any mutex other than those stored in the |
|
435 ** following global variables of these functions. |
|
436 ** |
|
437 ** async.queueMutex |
|
438 ** async.writerMutex |
|
439 ** async.lockMutex |
|
440 ** |
|
441 ** If NDEBUG is defined, these wrappers do nothing except call the |
|
442 ** corresponding pthreads function. If NDEBUG is not defined, then the |
|
443 ** following variables are used to store the thread-id (as returned |
|
444 ** by pthread_self()) currently holding the mutex, or 0 otherwise: |
|
445 ** |
|
446 ** asyncdebug.queueMutexHolder |
|
447 ** asyncdebug.writerMutexHolder |
|
448 ** asyncdebug.lockMutexHolder |
|
449 ** |
|
450 ** These variables are used by some assert() statements that verify |
|
451 ** the statements made in the "Deadlock Prevention" notes earlier |
|
452 ** in this file. |
|
453 */ |
|
454 #ifndef NDEBUG |
|
455 |
|
456 static struct TestAsyncDebugData { |
|
457 pthread_t lockMutexHolder; |
|
458 pthread_t queueMutexHolder; |
|
459 pthread_t writerMutexHolder; |
|
460 } asyncdebug = {0, 0, 0}; |
|
461 |
|
462 /* |
|
463 ** Wrapper around pthread_mutex_lock(). Checks that we have not violated |
|
464 ** the anti-deadlock rules (see "Deadlock prevention" above). |
|
465 */ |
|
466 static int async_mutex_lock(pthread_mutex_t *pMutex){ |
|
467 int iIdx; |
|
468 int rc; |
|
469 pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async); |
|
470 pthread_t *aHolder = (pthread_t *)(&asyncdebug); |
|
471 |
|
472 /* The code in this 'ifndef NDEBUG' block depends on a certain alignment |
|
473 * of the variables in TestAsyncStaticData and TestAsyncDebugData. The |
|
474 * following assert() statements check that this has not been changed. |
|
475 * |
|
476 * Really, these only need to be run once at startup time. |
|
477 */ |
|
478 assert(&(aMutex[0])==&async.lockMutex); |
|
479 assert(&(aMutex[1])==&async.queueMutex); |
|
480 assert(&(aMutex[2])==&async.writerMutex); |
|
481 assert(&(aHolder[0])==&asyncdebug.lockMutexHolder); |
|
482 assert(&(aHolder[1])==&asyncdebug.queueMutexHolder); |
|
483 assert(&(aHolder[2])==&asyncdebug.writerMutexHolder); |
|
484 |
|
485 assert( pthread_self()!=0 ); |
|
486 |
|
487 for(iIdx=0; iIdx<3; iIdx++){ |
|
488 if( pMutex==&aMutex[iIdx] ) break; |
|
489 |
|
490 /* This is the key assert(). Here we are checking that if the caller |
|
491 * is trying to block on async.writerMutex, neither of the other two |
|
492 * mutex are held. If the caller is trying to block on async.queueMutex, |
|
493 * lockMutex is not held. |
|
494 */ |
|
495 assert(!pthread_equal(aHolder[iIdx], pthread_self())); |
|
496 } |
|
497 assert(iIdx<3); |
|
498 |
|
499 rc = pthread_mutex_lock(pMutex); |
|
500 if( rc==0 ){ |
|
501 assert(aHolder[iIdx]==0); |
|
502 aHolder[iIdx] = pthread_self(); |
|
503 } |
|
504 return rc; |
|
505 } |
|
506 |
|
507 /* |
|
508 ** Wrapper around pthread_mutex_unlock(). |
|
509 */ |
|
510 static int async_mutex_unlock(pthread_mutex_t *pMutex){ |
|
511 int iIdx; |
|
512 int rc; |
|
513 pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async); |
|
514 pthread_t *aHolder = (pthread_t *)(&asyncdebug); |
|
515 |
|
516 for(iIdx=0; iIdx<3; iIdx++){ |
|
517 if( pMutex==&aMutex[iIdx] ) break; |
|
518 } |
|
519 assert(iIdx<3); |
|
520 |
|
521 assert(pthread_equal(aHolder[iIdx], pthread_self())); |
|
522 aHolder[iIdx] = 0; |
|
523 rc = pthread_mutex_unlock(pMutex); |
|
524 assert(rc==0); |
|
525 |
|
526 return 0; |
|
527 } |
|
528 |
|
529 /* |
|
530 ** Wrapper around pthread_mutex_trylock(). |
|
531 */ |
|
532 static int async_mutex_trylock(pthread_mutex_t *pMutex){ |
|
533 int iIdx; |
|
534 int rc; |
|
535 pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async); |
|
536 pthread_t *aHolder = (pthread_t *)(&asyncdebug); |
|
537 |
|
538 for(iIdx=0; iIdx<3; iIdx++){ |
|
539 if( pMutex==&aMutex[iIdx] ) break; |
|
540 } |
|
541 assert(iIdx<3); |
|
542 |
|
543 rc = pthread_mutex_trylock(pMutex); |
|
544 if( rc==0 ){ |
|
545 assert(aHolder[iIdx]==0); |
|
546 aHolder[iIdx] = pthread_self(); |
|
547 } |
|
548 return rc; |
|
549 } |
|
550 |
|
551 /* |
|
552 ** Wrapper around pthread_cond_wait(). |
|
553 */ |
|
554 static int async_cond_wait(pthread_cond_t *pCond, pthread_mutex_t *pMutex){ |
|
555 int iIdx; |
|
556 int rc; |
|
557 pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async); |
|
558 pthread_t *aHolder = (pthread_t *)(&asyncdebug); |
|
559 |
|
560 for(iIdx=0; iIdx<3; iIdx++){ |
|
561 if( pMutex==&aMutex[iIdx] ) break; |
|
562 } |
|
563 assert(iIdx<3); |
|
564 |
|
565 assert(pthread_equal(aHolder[iIdx],pthread_self())); |
|
566 aHolder[iIdx] = 0; |
|
567 rc = pthread_cond_wait(pCond, pMutex); |
|
568 if( rc==0 ){ |
|
569 aHolder[iIdx] = pthread_self(); |
|
570 } |
|
571 return rc; |
|
572 } |
|
573 |
|
574 /* |
|
575 ** Assert that the mutex is held by the current thread. |
|
576 */ |
|
577 static void assert_mutex_is_held(pthread_mutex_t *pMutex){ |
|
578 int iIdx; |
|
579 pthread_mutex_t *aMutex = (pthread_mutex_t *)(&async); |
|
580 pthread_t *aHolder = (pthread_t *)(&asyncdebug); |
|
581 |
|
582 for(iIdx=0; iIdx<3; iIdx++){ |
|
583 if( pMutex==&aMutex[iIdx] ) break; |
|
584 } |
|
585 assert(iIdx<3); |
|
586 assert( aHolder[iIdx]==pthread_self() ); |
|
587 } |
|
588 |
|
589 /* Call our async_XX wrappers instead of selected pthread_XX functions */ |
|
590 #define pthread_mutex_lock async_mutex_lock |
|
591 #define pthread_mutex_unlock async_mutex_unlock |
|
592 #define pthread_mutex_trylock async_mutex_trylock |
|
593 #define pthread_cond_wait async_cond_wait |
|
594 |
|
595 #else /* if defined(NDEBUG) */ |
|
596 |
|
597 #define assert_mutex_is_held(X) /* A no-op when not debugging */ |
|
598 |
|
599 #endif /* !defined(NDEBUG) */ |
|
600 |
|
601 /* |
|
602 ** Add an entry to the end of the global write-op list. pWrite should point |
|
603 ** to an AsyncWrite structure allocated using sqlite3_malloc(). The writer |
|
604 ** thread will call sqlite3_free() to free the structure after the specified |
|
605 ** operation has been completed. |
|
606 ** |
|
607 ** Once an AsyncWrite structure has been added to the list, it becomes the |
|
608 ** property of the writer thread and must not be read or modified by the |
|
609 ** caller. |
|
610 */ |
|
611 static void addAsyncWrite(AsyncWrite *pWrite){ |
|
612 /* We must hold the queue mutex in order to modify the queue pointers */ |
|
613 pthread_mutex_lock(&async.queueMutex); |
|
614 |
|
615 /* Add the record to the end of the write-op queue */ |
|
616 assert( !pWrite->pNext ); |
|
617 if( async.pQueueLast ){ |
|
618 assert( async.pQueueFirst ); |
|
619 async.pQueueLast->pNext = pWrite; |
|
620 }else{ |
|
621 async.pQueueFirst = pWrite; |
|
622 } |
|
623 async.pQueueLast = pWrite; |
|
624 ASYNC_TRACE(("PUSH %p (%s %s %d)\n", pWrite, azOpcodeName[pWrite->op], |
|
625 pWrite->pFileData ? pWrite->pFileData->zName : "-", pWrite->iOffset)); |
|
626 |
|
627 if( pWrite->op==ASYNC_CLOSE ){ |
|
628 async.nFile--; |
|
629 } |
|
630 |
|
631 /* Drop the queue mutex */ |
|
632 pthread_mutex_unlock(&async.queueMutex); |
|
633 |
|
634 /* The writer thread might have been idle because there was nothing |
|
635 ** on the write-op queue for it to do. So wake it up. */ |
|
636 pthread_cond_signal(&async.queueSignal); |
|
637 } |
|
638 |
|
639 /* |
|
640 ** Increment async.nFile in a thread-safe manner. |
|
641 */ |
|
642 static void incrOpenFileCount(){ |
|
643 /* We must hold the queue mutex in order to modify async.nFile */ |
|
644 pthread_mutex_lock(&async.queueMutex); |
|
645 if( async.nFile==0 ){ |
|
646 async.ioError = SQLITE_OK; |
|
647 } |
|
648 async.nFile++; |
|
649 pthread_mutex_unlock(&async.queueMutex); |
|
650 } |
|
651 |
|
652 /* |
|
653 ** This is a utility function to allocate and populate a new AsyncWrite |
|
654 ** structure and insert it (via addAsyncWrite() ) into the global list. |
|
655 */ |
|
656 static int addNewAsyncWrite( |
|
657 AsyncFileData *pFileData, |
|
658 int op, |
|
659 sqlite3_int64 iOffset, |
|
660 int nByte, |
|
661 const char *zByte |
|
662 ){ |
|
663 AsyncWrite *p; |
|
664 if( op!=ASYNC_CLOSE && async.ioError ){ |
|
665 return async.ioError; |
|
666 } |
|
667 p = sqlite3_malloc(sizeof(AsyncWrite) + (zByte?nByte:0)); |
|
668 if( !p ){ |
|
669 /* The upper layer does not expect operations like OsWrite() to |
|
670 ** return SQLITE_NOMEM. This is partly because under normal conditions |
|
671 ** SQLite is required to do rollback without calling malloc(). So |
|
672 ** if malloc() fails here, treat it as an I/O error. The above |
|
673 ** layer knows how to handle that. |
|
674 */ |
|
675 return SQLITE_IOERR; |
|
676 } |
|
677 p->op = op; |
|
678 p->iOffset = iOffset; |
|
679 p->nByte = nByte; |
|
680 p->pFileData = pFileData; |
|
681 p->pNext = 0; |
|
682 if( zByte ){ |
|
683 p->zBuf = (char *)&p[1]; |
|
684 memcpy(p->zBuf, zByte, nByte); |
|
685 }else{ |
|
686 p->zBuf = 0; |
|
687 } |
|
688 addAsyncWrite(p); |
|
689 return SQLITE_OK; |
|
690 } |
|
691 |
|
692 /* |
|
693 ** Close the file. This just adds an entry to the write-op list, the file is |
|
694 ** not actually closed. |
|
695 */ |
|
696 static int asyncClose(sqlite3_file *pFile){ |
|
697 AsyncFileData *p = ((AsyncFile *)pFile)->pData; |
|
698 |
|
699 /* Unlock the file, if it is locked */ |
|
700 pthread_mutex_lock(&async.lockMutex); |
|
701 p->lock.eLock = 0; |
|
702 pthread_mutex_unlock(&async.lockMutex); |
|
703 |
|
704 addAsyncWrite(&p->close); |
|
705 return SQLITE_OK; |
|
706 } |
|
707 |
|
708 /* |
|
709 ** Implementation of sqlite3OsWrite() for asynchronous files. Instead of |
|
710 ** writing to the underlying file, this function adds an entry to the end of |
|
711 ** the global AsyncWrite list. Either SQLITE_OK or SQLITE_NOMEM may be |
|
712 ** returned. |
|
713 */ |
|
714 static int asyncWrite( |
|
715 sqlite3_file *pFile, |
|
716 const void *pBuf, |
|
717 int amt, |
|
718 sqlite3_int64 iOff |
|
719 ){ |
|
720 AsyncFileData *p = ((AsyncFile *)pFile)->pData; |
|
721 return addNewAsyncWrite(p, ASYNC_WRITE, iOff, amt, pBuf); |
|
722 } |
|
723 |
|
724 /* |
|
725 ** Read data from the file. First we read from the filesystem, then adjust |
|
726 ** the contents of the buffer based on ASYNC_WRITE operations in the |
|
727 ** write-op queue. |
|
728 ** |
|
729 ** This method holds the mutex from start to finish. |
|
730 */ |
|
731 static int asyncRead( |
|
732 sqlite3_file *pFile, |
|
733 void *zOut, |
|
734 int iAmt, |
|
735 sqlite3_int64 iOffset |
|
736 ){ |
|
737 AsyncFileData *p = ((AsyncFile *)pFile)->pData; |
|
738 int rc = SQLITE_OK; |
|
739 sqlite3_int64 filesize; |
|
740 int nRead; |
|
741 sqlite3_file *pBase = p->pBaseRead; |
|
742 |
|
743 /* Grab the write queue mutex for the duration of the call */ |
|
744 pthread_mutex_lock(&async.queueMutex); |
|
745 |
|
746 /* If an I/O error has previously occurred in this virtual file |
|
747 ** system, then all subsequent operations fail. |
|
748 */ |
|
749 if( async.ioError!=SQLITE_OK ){ |
|
750 rc = async.ioError; |
|
751 goto asyncread_out; |
|
752 } |
|
753 |
|
754 if( pBase->pMethods ){ |
|
755 rc = pBase->pMethods->xFileSize(pBase, &filesize); |
|
756 if( rc!=SQLITE_OK ){ |
|
757 goto asyncread_out; |
|
758 } |
|
759 nRead = MIN(filesize - iOffset, iAmt); |
|
760 if( nRead>0 ){ |
|
761 rc = pBase->pMethods->xRead(pBase, zOut, nRead, iOffset); |
|
762 ASYNC_TRACE(("READ %s %d bytes at %d\n", p->zName, nRead, iOffset)); |
|
763 } |
|
764 } |
|
765 |
|
766 if( rc==SQLITE_OK ){ |
|
767 AsyncWrite *pWrite; |
|
768 char *zName = p->zName; |
|
769 |
|
770 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){ |
|
771 if( pWrite->op==ASYNC_WRITE && ( |
|
772 (pWrite->pFileData==p) || |
|
773 (zName && pWrite->pFileData->zName==zName) |
|
774 )){ |
|
775 int iBeginOut = (pWrite->iOffset-iOffset); |
|
776 int iBeginIn = -iBeginOut; |
|
777 int nCopy; |
|
778 |
|
779 if( iBeginIn<0 ) iBeginIn = 0; |
|
780 if( iBeginOut<0 ) iBeginOut = 0; |
|
781 nCopy = MIN(pWrite->nByte-iBeginIn, iAmt-iBeginOut); |
|
782 |
|
783 if( nCopy>0 ){ |
|
784 memcpy(&((char *)zOut)[iBeginOut], &pWrite->zBuf[iBeginIn], nCopy); |
|
785 ASYNC_TRACE(("OVERREAD %d bytes at %d\n", nCopy, iBeginOut+iOffset)); |
|
786 } |
|
787 } |
|
788 } |
|
789 } |
|
790 |
|
791 asyncread_out: |
|
792 pthread_mutex_unlock(&async.queueMutex); |
|
793 return rc; |
|
794 } |
|
795 |
|
796 /* |
|
797 ** Truncate the file to nByte bytes in length. This just adds an entry to |
|
798 ** the write-op list, no IO actually takes place. |
|
799 */ |
|
800 static int asyncTruncate(sqlite3_file *pFile, sqlite3_int64 nByte){ |
|
801 AsyncFileData *p = ((AsyncFile *)pFile)->pData; |
|
802 return addNewAsyncWrite(p, ASYNC_TRUNCATE, nByte, 0, 0); |
|
803 } |
|
804 |
|
805 /* |
|
806 ** Sync the file. This just adds an entry to the write-op list, the |
|
807 ** sync() is done later by sqlite3_async_flush(). |
|
808 */ |
|
809 static int asyncSync(sqlite3_file *pFile, int flags){ |
|
810 AsyncFileData *p = ((AsyncFile *)pFile)->pData; |
|
811 return addNewAsyncWrite(p, ASYNC_SYNC, 0, flags, 0); |
|
812 } |
|
813 |
|
814 /* |
|
815 ** Read the size of the file. First we read the size of the file system |
|
816 ** entry, then adjust for any ASYNC_WRITE or ASYNC_TRUNCATE operations |
|
817 ** currently in the write-op list. |
|
818 ** |
|
819 ** This method holds the mutex from start to finish. |
|
820 */ |
|
821 int asyncFileSize(sqlite3_file *pFile, sqlite3_int64 *piSize){ |
|
822 AsyncFileData *p = ((AsyncFile *)pFile)->pData; |
|
823 int rc = SQLITE_OK; |
|
824 sqlite3_int64 s = 0; |
|
825 sqlite3_file *pBase; |
|
826 |
|
827 pthread_mutex_lock(&async.queueMutex); |
|
828 |
|
829 /* Read the filesystem size from the base file. If pBaseRead is NULL, this |
|
830 ** means the file hasn't been opened yet. In this case all relevant data |
|
831 ** must be in the write-op queue anyway, so we can omit reading from the |
|
832 ** file-system. |
|
833 */ |
|
834 pBase = p->pBaseRead; |
|
835 if( pBase->pMethods ){ |
|
836 rc = pBase->pMethods->xFileSize(pBase, &s); |
|
837 } |
|
838 |
|
839 if( rc==SQLITE_OK ){ |
|
840 AsyncWrite *pWrite; |
|
841 for(pWrite=async.pQueueFirst; pWrite; pWrite = pWrite->pNext){ |
|
842 if( pWrite->op==ASYNC_DELETE |
|
843 && p->zName |
|
844 && strcmp(p->zName, pWrite->zBuf)==0 |
|
845 ){ |
|
846 s = 0; |
|
847 }else if( pWrite->pFileData && ( |
|
848 (pWrite->pFileData==p) |
|
849 || (p->zName && pWrite->pFileData->zName==p->zName) |
|
850 )){ |
|
851 switch( pWrite->op ){ |
|
852 case ASYNC_WRITE: |
|
853 s = MAX(pWrite->iOffset + (sqlite3_int64)(pWrite->nByte), s); |
|
854 break; |
|
855 case ASYNC_TRUNCATE: |
|
856 s = MIN(s, pWrite->iOffset); |
|
857 break; |
|
858 } |
|
859 } |
|
860 } |
|
861 *piSize = s; |
|
862 } |
|
863 pthread_mutex_unlock(&async.queueMutex); |
|
864 return rc; |
|
865 } |
|
866 |
|
867 /* |
|
868 ** Lock or unlock the actual file-system entry. |
|
869 */ |
|
870 static int getFileLock(AsyncLock *pLock){ |
|
871 int rc = SQLITE_OK; |
|
872 AsyncFileLock *pIter; |
|
873 int eRequired = 0; |
|
874 |
|
875 if( pLock->pFile ){ |
|
876 for(pIter=pLock->pList; pIter; pIter=pIter->pNext){ |
|
877 assert(pIter->eAsyncLock>=pIter->eLock); |
|
878 if( pIter->eAsyncLock>eRequired ){ |
|
879 eRequired = pIter->eAsyncLock; |
|
880 assert(eRequired>=0 && eRequired<=SQLITE_LOCK_EXCLUSIVE); |
|
881 } |
|
882 } |
|
883 |
|
884 if( eRequired>pLock->eLock ){ |
|
885 rc = pLock->pFile->pMethods->xLock(pLock->pFile, eRequired); |
|
886 if( rc==SQLITE_OK ){ |
|
887 pLock->eLock = eRequired; |
|
888 } |
|
889 } |
|
890 else if( eRequired<pLock->eLock && eRequired<=SQLITE_LOCK_SHARED ){ |
|
891 rc = pLock->pFile->pMethods->xUnlock(pLock->pFile, eRequired); |
|
892 if( rc==SQLITE_OK ){ |
|
893 pLock->eLock = eRequired; |
|
894 } |
|
895 } |
|
896 } |
|
897 |
|
898 return rc; |
|
899 } |
|
900 |
|
901 /* |
|
902 ** Return the AsyncLock structure from the global async.pLock list |
|
903 ** associated with the file-system entry identified by path zName |
|
904 ** (a string of nName bytes). If no such structure exists, return 0. |
|
905 */ |
|
906 static AsyncLock *findLock(const char *zName, int nName){ |
|
907 AsyncLock *p = async.pLock; |
|
908 while( p && (p->nFile!=nName || memcmp(p->zFile, zName, nName)) ){ |
|
909 p = p->pNext; |
|
910 } |
|
911 return p; |
|
912 } |
|
913 |
|
914 /* |
|
915 ** The following two methods - asyncLock() and asyncUnlock() - are used |
|
916 ** to obtain and release locks on database files opened with the |
|
917 ** asynchronous backend. |
|
918 */ |
|
919 static int asyncLock(sqlite3_file *pFile, int eLock){ |
|
920 int rc = SQLITE_OK; |
|
921 AsyncFileData *p = ((AsyncFile *)pFile)->pData; |
|
922 |
|
923 if( p->zName ){ |
|
924 pthread_mutex_lock(&async.lockMutex); |
|
925 if( p->lock.eLock<eLock ){ |
|
926 AsyncLock *pLock = p->pLock; |
|
927 AsyncFileLock *pIter; |
|
928 assert(pLock && pLock->pList); |
|
929 for(pIter=pLock->pList; pIter; pIter=pIter->pNext){ |
|
930 if( pIter!=&p->lock && ( |
|
931 (eLock==SQLITE_LOCK_EXCLUSIVE && pIter->eLock>=SQLITE_LOCK_SHARED) || |
|
932 (eLock==SQLITE_LOCK_PENDING && pIter->eLock>=SQLITE_LOCK_RESERVED) || |
|
933 (eLock==SQLITE_LOCK_RESERVED && pIter->eLock>=SQLITE_LOCK_RESERVED) || |
|
934 (eLock==SQLITE_LOCK_SHARED && pIter->eLock>=SQLITE_LOCK_PENDING) |
|
935 )){ |
|
936 rc = SQLITE_BUSY; |
|
937 } |
|
938 } |
|
939 if( rc==SQLITE_OK ){ |
|
940 p->lock.eLock = eLock; |
|
941 p->lock.eAsyncLock = MAX(p->lock.eAsyncLock, eLock); |
|
942 } |
|
943 assert(p->lock.eAsyncLock>=p->lock.eLock); |
|
944 if( rc==SQLITE_OK ){ |
|
945 rc = getFileLock(pLock); |
|
946 } |
|
947 } |
|
948 pthread_mutex_unlock(&async.lockMutex); |
|
949 } |
|
950 |
|
951 ASYNC_TRACE(("LOCK %d (%s) rc=%d\n", eLock, p->zName, rc)); |
|
952 return rc; |
|
953 } |
|
954 static int asyncUnlock(sqlite3_file *pFile, int eLock){ |
|
955 int rc = SQLITE_OK; |
|
956 AsyncFileData *p = ((AsyncFile *)pFile)->pData; |
|
957 if( p->zName ){ |
|
958 AsyncFileLock *pLock = &p->lock; |
|
959 pthread_mutex_lock(&async.lockMutex); |
|
960 pLock->eLock = MIN(pLock->eLock, eLock); |
|
961 pthread_mutex_unlock(&async.lockMutex); |
|
962 rc = addNewAsyncWrite(p, ASYNC_UNLOCK, 0, eLock, 0); |
|
963 } |
|
964 return rc; |
|
965 } |
|
966 |
|
967 /* |
|
968 ** This function is called when the pager layer first opens a database file |
|
969 ** and is checking for a hot-journal. |
|
970 */ |
|
971 static int asyncCheckReservedLock(sqlite3_file *pFile, int *pResOut){ |
|
972 int ret = 0; |
|
973 AsyncFileLock *pIter; |
|
974 AsyncFileData *p = ((AsyncFile *)pFile)->pData; |
|
975 |
|
976 pthread_mutex_lock(&async.lockMutex); |
|
977 for(pIter=p->pLock->pList; pIter; pIter=pIter->pNext){ |
|
978 if( pIter->eLock>=SQLITE_LOCK_RESERVED ){ |
|
979 ret = 1; |
|
980 } |
|
981 } |
|
982 pthread_mutex_unlock(&async.lockMutex); |
|
983 |
|
984 ASYNC_TRACE(("CHECK-LOCK %d (%s)\n", ret, p->zName)); |
|
985 *pResOut = ret; |
|
986 return SQLITE_OK; |
|
987 } |
|
988 |
|
989 /* |
|
990 ** sqlite3_file_control() implementation. |
|
991 */ |
|
992 static int asyncFileControl(sqlite3_file *id, int op, void *pArg){ |
|
993 switch( op ){ |
|
994 case SQLITE_FCNTL_LOCKSTATE: { |
|
995 pthread_mutex_lock(&async.lockMutex); |
|
996 *(int*)pArg = ((AsyncFile*)id)->pData->lock.eLock; |
|
997 pthread_mutex_unlock(&async.lockMutex); |
|
998 return SQLITE_OK; |
|
999 } |
|
1000 } |
|
1001 return SQLITE_ERROR; |
|
1002 } |
|
1003 |
|
1004 /* |
|
1005 ** Return the device characteristics and sector-size of the device. It |
|
1006 ** is not tricky to implement these correctly, as this backend might |
|
1007 ** not have an open file handle at this point. |
|
1008 */ |
|
1009 static int asyncSectorSize(sqlite3_file *pFile){ |
|
1010 return 512; |
|
1011 } |
|
1012 static int asyncDeviceCharacteristics(sqlite3_file *pFile){ |
|
1013 return 0; |
|
1014 } |
|
1015 |
|
1016 static int unlinkAsyncFile(AsyncFileData *pData){ |
|
1017 AsyncFileLock **ppIter; |
|
1018 int rc = SQLITE_OK; |
|
1019 |
|
1020 if( pData->zName ){ |
|
1021 AsyncLock *pLock = pData->pLock; |
|
1022 for(ppIter=&pLock->pList; *ppIter; ppIter=&((*ppIter)->pNext)){ |
|
1023 if( (*ppIter)==&pData->lock ){ |
|
1024 *ppIter = pData->lock.pNext; |
|
1025 break; |
|
1026 } |
|
1027 } |
|
1028 if( !pLock->pList ){ |
|
1029 AsyncLock **pp; |
|
1030 if( pLock->pFile ){ |
|
1031 pLock->pFile->pMethods->xClose(pLock->pFile); |
|
1032 } |
|
1033 for(pp=&async.pLock; *pp!=pLock; pp=&((*pp)->pNext)); |
|
1034 *pp = pLock->pNext; |
|
1035 sqlite3_free(pLock); |
|
1036 }else{ |
|
1037 rc = getFileLock(pLock); |
|
1038 } |
|
1039 } |
|
1040 |
|
1041 return rc; |
|
1042 } |
|
1043 |
|
1044 /* |
|
1045 ** Open a file. |
|
1046 */ |
|
1047 static int asyncOpen( |
|
1048 sqlite3_vfs *pAsyncVfs, |
|
1049 const char *zName, |
|
1050 sqlite3_file *pFile, |
|
1051 int flags, |
|
1052 int *pOutFlags |
|
1053 ){ |
|
1054 static sqlite3_io_methods async_methods = { |
|
1055 1, /* iVersion */ |
|
1056 asyncClose, /* xClose */ |
|
1057 asyncRead, /* xRead */ |
|
1058 asyncWrite, /* xWrite */ |
|
1059 asyncTruncate, /* xTruncate */ |
|
1060 asyncSync, /* xSync */ |
|
1061 asyncFileSize, /* xFileSize */ |
|
1062 asyncLock, /* xLock */ |
|
1063 asyncUnlock, /* xUnlock */ |
|
1064 asyncCheckReservedLock, /* xCheckReservedLock */ |
|
1065 asyncFileControl, /* xFileControl */ |
|
1066 asyncSectorSize, /* xSectorSize */ |
|
1067 asyncDeviceCharacteristics /* xDeviceCharacteristics */ |
|
1068 }; |
|
1069 |
|
1070 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; |
|
1071 AsyncFile *p = (AsyncFile *)pFile; |
|
1072 int nName = 0; |
|
1073 int rc = SQLITE_OK; |
|
1074 int nByte; |
|
1075 AsyncFileData *pData; |
|
1076 AsyncLock *pLock = 0; |
|
1077 char *z; |
|
1078 int isExclusive = (flags&SQLITE_OPEN_EXCLUSIVE); |
|
1079 |
|
1080 /* If zName is NULL, then the upper layer is requesting an anonymous file */ |
|
1081 if( zName ){ |
|
1082 nName = strlen(zName)+1; |
|
1083 } |
|
1084 |
|
1085 nByte = ( |
|
1086 sizeof(AsyncFileData) + /* AsyncFileData structure */ |
|
1087 2 * pVfs->szOsFile + /* AsyncFileData.pBaseRead and pBaseWrite */ |
|
1088 nName /* AsyncFileData.zName */ |
|
1089 ); |
|
1090 z = sqlite3_malloc(nByte); |
|
1091 if( !z ){ |
|
1092 return SQLITE_NOMEM; |
|
1093 } |
|
1094 memset(z, 0, nByte); |
|
1095 pData = (AsyncFileData*)z; |
|
1096 z += sizeof(pData[0]); |
|
1097 pData->pBaseRead = (sqlite3_file*)z; |
|
1098 z += pVfs->szOsFile; |
|
1099 pData->pBaseWrite = (sqlite3_file*)z; |
|
1100 pData->close.pFileData = pData; |
|
1101 pData->close.op = ASYNC_CLOSE; |
|
1102 |
|
1103 if( zName ){ |
|
1104 z += pVfs->szOsFile; |
|
1105 pData->zName = z; |
|
1106 pData->nName = nName; |
|
1107 memcpy(pData->zName, zName, nName); |
|
1108 } |
|
1109 |
|
1110 if( !isExclusive ){ |
|
1111 rc = pVfs->xOpen(pVfs, zName, pData->pBaseRead, flags, pOutFlags); |
|
1112 if( rc==SQLITE_OK && ((*pOutFlags)&SQLITE_OPEN_READWRITE) ){ |
|
1113 rc = pVfs->xOpen(pVfs, zName, pData->pBaseWrite, flags, 0); |
|
1114 } |
|
1115 } |
|
1116 |
|
1117 pthread_mutex_lock(&async.lockMutex); |
|
1118 |
|
1119 if( zName && rc==SQLITE_OK ){ |
|
1120 pLock = findLock(pData->zName, pData->nName); |
|
1121 if( !pLock ){ |
|
1122 int nByte = pVfs->szOsFile + sizeof(AsyncLock) + pData->nName + 1; |
|
1123 pLock = (AsyncLock *)sqlite3_malloc(nByte); |
|
1124 if( pLock ){ |
|
1125 memset(pLock, 0, nByte); |
|
1126 #ifdef ENABLE_FILE_LOCKING |
|
1127 if( flags&SQLITE_OPEN_MAIN_DB ){ |
|
1128 pLock->pFile = (sqlite3_file *)&pLock[1]; |
|
1129 rc = pVfs->xOpen(pVfs, zName, pLock->pFile, flags, 0); |
|
1130 if( rc!=SQLITE_OK ){ |
|
1131 sqlite3_free(pLock); |
|
1132 pLock = 0; |
|
1133 } |
|
1134 } |
|
1135 #endif |
|
1136 if( pLock ){ |
|
1137 pLock->nFile = pData->nName; |
|
1138 pLock->zFile = &((char *)(&pLock[1]))[pVfs->szOsFile]; |
|
1139 memcpy(pLock->zFile, pData->zName, pLock->nFile); |
|
1140 pLock->pNext = async.pLock; |
|
1141 async.pLock = pLock; |
|
1142 } |
|
1143 }else{ |
|
1144 rc = SQLITE_NOMEM; |
|
1145 } |
|
1146 } |
|
1147 } |
|
1148 |
|
1149 if( rc==SQLITE_OK ){ |
|
1150 p->pMethod = &async_methods; |
|
1151 p->pData = pData; |
|
1152 |
|
1153 /* Link AsyncFileData.lock into the linked list of |
|
1154 ** AsyncFileLock structures for this file. |
|
1155 */ |
|
1156 if( zName ){ |
|
1157 pData->lock.pNext = pLock->pList; |
|
1158 pLock->pList = &pData->lock; |
|
1159 pData->zName = pLock->zFile; |
|
1160 } |
|
1161 }else{ |
|
1162 if( pData->pBaseRead->pMethods ){ |
|
1163 pData->pBaseRead->pMethods->xClose(pData->pBaseRead); |
|
1164 } |
|
1165 if( pData->pBaseWrite->pMethods ){ |
|
1166 pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite); |
|
1167 } |
|
1168 sqlite3_free(pData); |
|
1169 } |
|
1170 |
|
1171 pthread_mutex_unlock(&async.lockMutex); |
|
1172 |
|
1173 if( rc==SQLITE_OK ){ |
|
1174 incrOpenFileCount(); |
|
1175 pData->pLock = pLock; |
|
1176 } |
|
1177 |
|
1178 if( rc==SQLITE_OK && isExclusive ){ |
|
1179 rc = addNewAsyncWrite(pData, ASYNC_OPENEXCLUSIVE, (sqlite3_int64)flags,0,0); |
|
1180 if( rc==SQLITE_OK ){ |
|
1181 if( pOutFlags ) *pOutFlags = flags; |
|
1182 }else{ |
|
1183 pthread_mutex_lock(&async.lockMutex); |
|
1184 unlinkAsyncFile(pData); |
|
1185 pthread_mutex_unlock(&async.lockMutex); |
|
1186 sqlite3_free(pData); |
|
1187 } |
|
1188 } |
|
1189 return rc; |
|
1190 } |
|
1191 |
|
1192 /* |
|
1193 ** Implementation of sqlite3OsDelete. Add an entry to the end of the |
|
1194 ** write-op queue to perform the delete. |
|
1195 */ |
|
1196 static int asyncDelete(sqlite3_vfs *pAsyncVfs, const char *z, int syncDir){ |
|
1197 return addNewAsyncWrite(0, ASYNC_DELETE, syncDir, strlen(z)+1, z); |
|
1198 } |
|
1199 |
|
1200 /* |
|
1201 ** Implementation of sqlite3OsAccess. This method holds the mutex from |
|
1202 ** start to finish. |
|
1203 */ |
|
1204 static int asyncAccess( |
|
1205 sqlite3_vfs *pAsyncVfs, |
|
1206 const char *zName, |
|
1207 int flags, |
|
1208 int *pResOut |
|
1209 ){ |
|
1210 int rc; |
|
1211 int ret; |
|
1212 AsyncWrite *p; |
|
1213 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; |
|
1214 |
|
1215 assert(flags==SQLITE_ACCESS_READWRITE |
|
1216 || flags==SQLITE_ACCESS_READ |
|
1217 || flags==SQLITE_ACCESS_EXISTS |
|
1218 ); |
|
1219 |
|
1220 pthread_mutex_lock(&async.queueMutex); |
|
1221 rc = pVfs->xAccess(pVfs, zName, flags, &ret); |
|
1222 if( rc==SQLITE_OK && flags==SQLITE_ACCESS_EXISTS ){ |
|
1223 for(p=async.pQueueFirst; p; p = p->pNext){ |
|
1224 if( p->op==ASYNC_DELETE && 0==strcmp(p->zBuf, zName) ){ |
|
1225 ret = 0; |
|
1226 }else if( p->op==ASYNC_OPENEXCLUSIVE |
|
1227 && p->pFileData->zName |
|
1228 && 0==strcmp(p->pFileData->zName, zName) |
|
1229 ){ |
|
1230 ret = 1; |
|
1231 } |
|
1232 } |
|
1233 } |
|
1234 ASYNC_TRACE(("ACCESS(%s): %s = %d\n", |
|
1235 flags==SQLITE_ACCESS_READWRITE?"read-write": |
|
1236 flags==SQLITE_ACCESS_READ?"read":"exists" |
|
1237 , zName, ret) |
|
1238 ); |
|
1239 pthread_mutex_unlock(&async.queueMutex); |
|
1240 *pResOut = ret; |
|
1241 return rc; |
|
1242 } |
|
1243 |
|
1244 /* |
|
1245 ** Fill in zPathOut with the full path to the file identified by zPath. |
|
1246 */ |
|
1247 static int asyncFullPathname( |
|
1248 sqlite3_vfs *pAsyncVfs, |
|
1249 const char *zPath, |
|
1250 int nPathOut, |
|
1251 char *zPathOut |
|
1252 ){ |
|
1253 int rc; |
|
1254 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; |
|
1255 rc = pVfs->xFullPathname(pVfs, zPath, nPathOut, zPathOut); |
|
1256 |
|
1257 /* Because of the way intra-process file locking works, this backend |
|
1258 ** needs to return a canonical path. The following block assumes the |
|
1259 ** file-system uses unix style paths. |
|
1260 */ |
|
1261 if( rc==SQLITE_OK ){ |
|
1262 int iIn; |
|
1263 int iOut = 0; |
|
1264 int nPathOut = strlen(zPathOut); |
|
1265 |
|
1266 for(iIn=0; iIn<nPathOut; iIn++){ |
|
1267 |
|
1268 /* Replace any occurences of "//" with "/" */ |
|
1269 if( iIn<=(nPathOut-2) && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='/' |
|
1270 ){ |
|
1271 continue; |
|
1272 } |
|
1273 |
|
1274 /* Replace any occurences of "/./" with "/" */ |
|
1275 if( iIn<=(nPathOut-3) |
|
1276 && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='.' && zPathOut[iIn+2]=='/' |
|
1277 ){ |
|
1278 iIn++; |
|
1279 continue; |
|
1280 } |
|
1281 |
|
1282 /* Replace any occurences of "<path-component>/../" with "" */ |
|
1283 if( iOut>0 && iIn<=(nPathOut-4) |
|
1284 && zPathOut[iIn]=='/' && zPathOut[iIn+1]=='.' |
|
1285 && zPathOut[iIn+2]=='.' && zPathOut[iIn+3]=='/' |
|
1286 ){ |
|
1287 iIn += 3; |
|
1288 iOut--; |
|
1289 for( ; iOut>0 && zPathOut[iOut-1]!='/'; iOut--); |
|
1290 continue; |
|
1291 } |
|
1292 |
|
1293 zPathOut[iOut++] = zPathOut[iIn]; |
|
1294 } |
|
1295 zPathOut[iOut] = '\0'; |
|
1296 } |
|
1297 |
|
1298 return rc; |
|
1299 } |
|
1300 static void *asyncDlOpen(sqlite3_vfs *pAsyncVfs, const char *zPath){ |
|
1301 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; |
|
1302 return pVfs->xDlOpen(pVfs, zPath); |
|
1303 } |
|
1304 static void asyncDlError(sqlite3_vfs *pAsyncVfs, int nByte, char *zErrMsg){ |
|
1305 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; |
|
1306 pVfs->xDlError(pVfs, nByte, zErrMsg); |
|
1307 } |
|
1308 static void *asyncDlSym( |
|
1309 sqlite3_vfs *pAsyncVfs, |
|
1310 void *pHandle, |
|
1311 const char *zSymbol |
|
1312 ){ |
|
1313 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; |
|
1314 return pVfs->xDlSym(pVfs, pHandle, zSymbol); |
|
1315 } |
|
1316 static void asyncDlClose(sqlite3_vfs *pAsyncVfs, void *pHandle){ |
|
1317 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; |
|
1318 pVfs->xDlClose(pVfs, pHandle); |
|
1319 } |
|
1320 static int asyncRandomness(sqlite3_vfs *pAsyncVfs, int nByte, char *zBufOut){ |
|
1321 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; |
|
1322 return pVfs->xRandomness(pVfs, nByte, zBufOut); |
|
1323 } |
|
1324 static int asyncSleep(sqlite3_vfs *pAsyncVfs, int nMicro){ |
|
1325 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; |
|
1326 return pVfs->xSleep(pVfs, nMicro); |
|
1327 } |
|
1328 static int asyncCurrentTime(sqlite3_vfs *pAsyncVfs, double *pTimeOut){ |
|
1329 sqlite3_vfs *pVfs = (sqlite3_vfs *)pAsyncVfs->pAppData; |
|
1330 return pVfs->xCurrentTime(pVfs, pTimeOut); |
|
1331 } |
|
1332 |
|
1333 static sqlite3_vfs async_vfs = { |
|
1334 1, /* iVersion */ |
|
1335 sizeof(AsyncFile), /* szOsFile */ |
|
1336 0, /* mxPathname */ |
|
1337 0, /* pNext */ |
|
1338 "async", /* zName */ |
|
1339 0, /* pAppData */ |
|
1340 asyncOpen, /* xOpen */ |
|
1341 asyncDelete, /* xDelete */ |
|
1342 asyncAccess, /* xAccess */ |
|
1343 asyncFullPathname, /* xFullPathname */ |
|
1344 asyncDlOpen, /* xDlOpen */ |
|
1345 asyncDlError, /* xDlError */ |
|
1346 asyncDlSym, /* xDlSym */ |
|
1347 asyncDlClose, /* xDlClose */ |
|
1348 asyncRandomness, /* xDlError */ |
|
1349 asyncSleep, /* xDlSym */ |
|
1350 asyncCurrentTime /* xDlClose */ |
|
1351 }; |
|
1352 |
|
1353 /* |
|
1354 ** Call this routine to enable or disable the |
|
1355 ** asynchronous IO features implemented in this file. |
|
1356 ** |
|
1357 ** This routine is not even remotely threadsafe. Do not call |
|
1358 ** this routine while any SQLite database connections are open. |
|
1359 */ |
|
1360 static void asyncEnable(int enable){ |
|
1361 if( enable ){ |
|
1362 if( !async_vfs.pAppData ){ |
|
1363 async_vfs.pAppData = (void *)sqlite3_vfs_find(0); |
|
1364 async_vfs.mxPathname = ((sqlite3_vfs *)async_vfs.pAppData)->mxPathname; |
|
1365 sqlite3_vfs_register(&async_vfs, 1); |
|
1366 } |
|
1367 }else{ |
|
1368 if( async_vfs.pAppData ){ |
|
1369 sqlite3_vfs_unregister(&async_vfs); |
|
1370 async_vfs.pAppData = 0; |
|
1371 } |
|
1372 } |
|
1373 } |
|
1374 |
|
1375 /* |
|
1376 ** This procedure runs in a separate thread, reading messages off of the |
|
1377 ** write queue and processing them one by one. |
|
1378 ** |
|
1379 ** If async.writerHaltNow is true, then this procedure exits |
|
1380 ** after processing a single message. |
|
1381 ** |
|
1382 ** If async.writerHaltWhenIdle is true, then this procedure exits when |
|
1383 ** the write queue is empty. |
|
1384 ** |
|
1385 ** If both of the above variables are false, this procedure runs |
|
1386 ** indefinately, waiting for operations to be added to the write queue |
|
1387 ** and processing them in the order in which they arrive. |
|
1388 ** |
|
1389 ** An artifical delay of async.ioDelay milliseconds is inserted before |
|
1390 ** each write operation in order to simulate the effect of a slow disk. |
|
1391 ** |
|
1392 ** Only one instance of this procedure may be running at a time. |
|
1393 */ |
|
1394 static void *asyncWriterThread(void *pIsStarted){ |
|
1395 sqlite3_vfs *pVfs = (sqlite3_vfs *)(async_vfs.pAppData); |
|
1396 AsyncWrite *p = 0; |
|
1397 int rc = SQLITE_OK; |
|
1398 int holdingMutex = 0; |
|
1399 |
|
1400 if( pthread_mutex_trylock(&async.writerMutex) ){ |
|
1401 return 0; |
|
1402 } |
|
1403 (*(int *)pIsStarted) = 1; |
|
1404 while( async.writerHaltNow==0 ){ |
|
1405 int doNotFree = 0; |
|
1406 sqlite3_file *pBase = 0; |
|
1407 |
|
1408 if( !holdingMutex ){ |
|
1409 pthread_mutex_lock(&async.queueMutex); |
|
1410 } |
|
1411 while( (p = async.pQueueFirst)==0 ){ |
|
1412 pthread_cond_broadcast(&async.emptySignal); |
|
1413 if( async.writerHaltWhenIdle ){ |
|
1414 pthread_mutex_unlock(&async.queueMutex); |
|
1415 break; |
|
1416 }else{ |
|
1417 ASYNC_TRACE(("IDLE\n")); |
|
1418 pthread_cond_wait(&async.queueSignal, &async.queueMutex); |
|
1419 ASYNC_TRACE(("WAKEUP\n")); |
|
1420 } |
|
1421 } |
|
1422 if( p==0 ) break; |
|
1423 holdingMutex = 1; |
|
1424 |
|
1425 /* Right now this thread is holding the mutex on the write-op queue. |
|
1426 ** Variable 'p' points to the first entry in the write-op queue. In |
|
1427 ** the general case, we hold on to the mutex for the entire body of |
|
1428 ** the loop. |
|
1429 ** |
|
1430 ** However in the cases enumerated below, we relinquish the mutex, |
|
1431 ** perform the IO, and then re-request the mutex before removing 'p' from |
|
1432 ** the head of the write-op queue. The idea is to increase concurrency with |
|
1433 ** sqlite threads. |
|
1434 ** |
|
1435 ** * An ASYNC_CLOSE operation. |
|
1436 ** * An ASYNC_OPENEXCLUSIVE operation. For this one, we relinquish |
|
1437 ** the mutex, call the underlying xOpenExclusive() function, then |
|
1438 ** re-aquire the mutex before seting the AsyncFile.pBaseRead |
|
1439 ** variable. |
|
1440 ** * ASYNC_SYNC and ASYNC_WRITE operations, if |
|
1441 ** SQLITE_ASYNC_TWO_FILEHANDLES was set at compile time and two |
|
1442 ** file-handles are open for the particular file being "synced". |
|
1443 */ |
|
1444 if( async.ioError!=SQLITE_OK && p->op!=ASYNC_CLOSE ){ |
|
1445 p->op = ASYNC_NOOP; |
|
1446 } |
|
1447 if( p->pFileData ){ |
|
1448 pBase = p->pFileData->pBaseWrite; |
|
1449 if( |
|
1450 p->op==ASYNC_CLOSE || |
|
1451 p->op==ASYNC_OPENEXCLUSIVE || |
|
1452 (pBase->pMethods && (p->op==ASYNC_SYNC || p->op==ASYNC_WRITE) ) |
|
1453 ){ |
|
1454 pthread_mutex_unlock(&async.queueMutex); |
|
1455 holdingMutex = 0; |
|
1456 } |
|
1457 if( !pBase->pMethods ){ |
|
1458 pBase = p->pFileData->pBaseRead; |
|
1459 } |
|
1460 } |
|
1461 |
|
1462 switch( p->op ){ |
|
1463 case ASYNC_NOOP: |
|
1464 break; |
|
1465 |
|
1466 case ASYNC_WRITE: |
|
1467 assert( pBase ); |
|
1468 ASYNC_TRACE(("WRITE %s %d bytes at %d\n", |
|
1469 p->pFileData->zName, p->nByte, p->iOffset)); |
|
1470 rc = pBase->pMethods->xWrite(pBase, (void *)(p->zBuf), p->nByte, p->iOffset); |
|
1471 break; |
|
1472 |
|
1473 case ASYNC_SYNC: |
|
1474 assert( pBase ); |
|
1475 ASYNC_TRACE(("SYNC %s\n", p->pFileData->zName)); |
|
1476 rc = pBase->pMethods->xSync(pBase, p->nByte); |
|
1477 break; |
|
1478 |
|
1479 case ASYNC_TRUNCATE: |
|
1480 assert( pBase ); |
|
1481 ASYNC_TRACE(("TRUNCATE %s to %d bytes\n", |
|
1482 p->pFileData->zName, p->iOffset)); |
|
1483 rc = pBase->pMethods->xTruncate(pBase, p->iOffset); |
|
1484 break; |
|
1485 |
|
1486 case ASYNC_CLOSE: { |
|
1487 AsyncFileData *pData = p->pFileData; |
|
1488 ASYNC_TRACE(("CLOSE %s\n", p->pFileData->zName)); |
|
1489 if( pData->pBaseWrite->pMethods ){ |
|
1490 pData->pBaseWrite->pMethods->xClose(pData->pBaseWrite); |
|
1491 } |
|
1492 if( pData->pBaseRead->pMethods ){ |
|
1493 pData->pBaseRead->pMethods->xClose(pData->pBaseRead); |
|
1494 } |
|
1495 |
|
1496 /* Unlink AsyncFileData.lock from the linked list of AsyncFileLock |
|
1497 ** structures for this file. Obtain the async.lockMutex mutex |
|
1498 ** before doing so. |
|
1499 */ |
|
1500 pthread_mutex_lock(&async.lockMutex); |
|
1501 rc = unlinkAsyncFile(pData); |
|
1502 pthread_mutex_unlock(&async.lockMutex); |
|
1503 |
|
1504 if( !holdingMutex ){ |
|
1505 pthread_mutex_lock(&async.queueMutex); |
|
1506 holdingMutex = 1; |
|
1507 } |
|
1508 assert_mutex_is_held(&async.queueMutex); |
|
1509 async.pQueueFirst = p->pNext; |
|
1510 sqlite3_free(pData); |
|
1511 doNotFree = 1; |
|
1512 break; |
|
1513 } |
|
1514 |
|
1515 case ASYNC_UNLOCK: { |
|
1516 AsyncFileData *pData = p->pFileData; |
|
1517 int eLock = p->nByte; |
|
1518 pthread_mutex_lock(&async.lockMutex); |
|
1519 pData->lock.eAsyncLock = MIN( |
|
1520 pData->lock.eAsyncLock, MAX(pData->lock.eLock, eLock) |
|
1521 ); |
|
1522 assert(pData->lock.eAsyncLock>=pData->lock.eLock); |
|
1523 rc = getFileLock(pData->pLock); |
|
1524 pthread_mutex_unlock(&async.lockMutex); |
|
1525 break; |
|
1526 } |
|
1527 |
|
1528 case ASYNC_DELETE: |
|
1529 ASYNC_TRACE(("DELETE %s\n", p->zBuf)); |
|
1530 rc = pVfs->xDelete(pVfs, p->zBuf, (int)p->iOffset); |
|
1531 break; |
|
1532 |
|
1533 case ASYNC_OPENEXCLUSIVE: { |
|
1534 int flags = (int)p->iOffset; |
|
1535 AsyncFileData *pData = p->pFileData; |
|
1536 ASYNC_TRACE(("OPEN %s flags=%d\n", p->zBuf, (int)p->iOffset)); |
|
1537 assert(pData->pBaseRead->pMethods==0 && pData->pBaseWrite->pMethods==0); |
|
1538 rc = pVfs->xOpen(pVfs, pData->zName, pData->pBaseRead, flags, 0); |
|
1539 assert( holdingMutex==0 ); |
|
1540 pthread_mutex_lock(&async.queueMutex); |
|
1541 holdingMutex = 1; |
|
1542 break; |
|
1543 } |
|
1544 |
|
1545 default: assert(!"Illegal value for AsyncWrite.op"); |
|
1546 } |
|
1547 |
|
1548 /* If we didn't hang on to the mutex during the IO op, obtain it now |
|
1549 ** so that the AsyncWrite structure can be safely removed from the |
|
1550 ** global write-op queue. |
|
1551 */ |
|
1552 if( !holdingMutex ){ |
|
1553 pthread_mutex_lock(&async.queueMutex); |
|
1554 holdingMutex = 1; |
|
1555 } |
|
1556 /* ASYNC_TRACE(("UNLINK %p\n", p)); */ |
|
1557 if( p==async.pQueueLast ){ |
|
1558 async.pQueueLast = 0; |
|
1559 } |
|
1560 if( !doNotFree ){ |
|
1561 assert_mutex_is_held(&async.queueMutex); |
|
1562 async.pQueueFirst = p->pNext; |
|
1563 sqlite3_free(p); |
|
1564 } |
|
1565 assert( holdingMutex ); |
|
1566 |
|
1567 /* An IO error has occured. We cannot report the error back to the |
|
1568 ** connection that requested the I/O since the error happened |
|
1569 ** asynchronously. The connection has already moved on. There |
|
1570 ** really is nobody to report the error to. |
|
1571 ** |
|
1572 ** The file for which the error occured may have been a database or |
|
1573 ** journal file. Regardless, none of the currently queued operations |
|
1574 ** associated with the same database should now be performed. Nor should |
|
1575 ** any subsequently requested IO on either a database or journal file |
|
1576 ** handle for the same database be accepted until the main database |
|
1577 ** file handle has been closed and reopened. |
|
1578 ** |
|
1579 ** Furthermore, no further IO should be queued or performed on any file |
|
1580 ** handle associated with a database that may have been part of a |
|
1581 ** multi-file transaction that included the database associated with |
|
1582 ** the IO error (i.e. a database ATTACHed to the same handle at some |
|
1583 ** point in time). |
|
1584 */ |
|
1585 if( rc!=SQLITE_OK ){ |
|
1586 async.ioError = rc; |
|
1587 } |
|
1588 |
|
1589 if( async.ioError && !async.pQueueFirst ){ |
|
1590 pthread_mutex_lock(&async.lockMutex); |
|
1591 if( 0==async.pLock ){ |
|
1592 async.ioError = SQLITE_OK; |
|
1593 } |
|
1594 pthread_mutex_unlock(&async.lockMutex); |
|
1595 } |
|
1596 |
|
1597 /* Drop the queue mutex before continuing to the next write operation |
|
1598 ** in order to give other threads a chance to work with the write queue. |
|
1599 */ |
|
1600 if( !async.pQueueFirst || !async.ioError ){ |
|
1601 pthread_mutex_unlock(&async.queueMutex); |
|
1602 holdingMutex = 0; |
|
1603 if( async.ioDelay>0 ){ |
|
1604 pVfs->xSleep(pVfs, async.ioDelay); |
|
1605 }else{ |
|
1606 sched_yield(); |
|
1607 } |
|
1608 } |
|
1609 } |
|
1610 |
|
1611 pthread_mutex_unlock(&async.writerMutex); |
|
1612 return 0; |
|
1613 } |
|
1614 |
|
1615 /************************************************************************** |
|
1616 ** The remaining code defines a Tcl interface for testing the asynchronous |
|
1617 ** IO implementation in this file. |
|
1618 ** |
|
1619 ** To adapt the code to a non-TCL environment, delete or comment out |
|
1620 ** the code that follows. |
|
1621 */ |
|
1622 |
|
1623 /* |
|
1624 ** sqlite3async_enable ?YES/NO? |
|
1625 ** |
|
1626 ** Enable or disable the asynchronous I/O backend. This command is |
|
1627 ** not thread-safe. Do not call it while any database connections |
|
1628 ** are open. |
|
1629 */ |
|
1630 static int testAsyncEnable( |
|
1631 void * clientData, |
|
1632 Tcl_Interp *interp, |
|
1633 int objc, |
|
1634 Tcl_Obj *CONST objv[] |
|
1635 ){ |
|
1636 if( objc!=1 && objc!=2 ){ |
|
1637 Tcl_WrongNumArgs(interp, 1, objv, "?YES/NO?"); |
|
1638 return TCL_ERROR; |
|
1639 } |
|
1640 if( objc==1 ){ |
|
1641 Tcl_SetObjResult(interp, Tcl_NewBooleanObj(async_vfs.pAppData!=0)); |
|
1642 }else{ |
|
1643 int en; |
|
1644 if( Tcl_GetBooleanFromObj(interp, objv[1], &en) ) return TCL_ERROR; |
|
1645 asyncEnable(en); |
|
1646 } |
|
1647 return TCL_OK; |
|
1648 } |
|
1649 |
|
1650 /* |
|
1651 ** sqlite3async_halt "now"|"idle"|"never" |
|
1652 ** |
|
1653 ** Set the conditions at which the writer thread will halt. |
|
1654 */ |
|
1655 static int testAsyncHalt( |
|
1656 void * clientData, |
|
1657 Tcl_Interp *interp, |
|
1658 int objc, |
|
1659 Tcl_Obj *CONST objv[] |
|
1660 ){ |
|
1661 const char *zCond; |
|
1662 if( objc!=2 ){ |
|
1663 Tcl_WrongNumArgs(interp, 1, objv, "\"now\"|\"idle\"|\"never\""); |
|
1664 return TCL_ERROR; |
|
1665 } |
|
1666 zCond = Tcl_GetString(objv[1]); |
|
1667 if( strcmp(zCond, "now")==0 ){ |
|
1668 async.writerHaltNow = 1; |
|
1669 pthread_cond_broadcast(&async.queueSignal); |
|
1670 }else if( strcmp(zCond, "idle")==0 ){ |
|
1671 async.writerHaltWhenIdle = 1; |
|
1672 async.writerHaltNow = 0; |
|
1673 pthread_cond_broadcast(&async.queueSignal); |
|
1674 }else if( strcmp(zCond, "never")==0 ){ |
|
1675 async.writerHaltWhenIdle = 0; |
|
1676 async.writerHaltNow = 0; |
|
1677 }else{ |
|
1678 Tcl_AppendResult(interp, |
|
1679 "should be one of: \"now\", \"idle\", or \"never\"", (char*)0); |
|
1680 return TCL_ERROR; |
|
1681 } |
|
1682 return TCL_OK; |
|
1683 } |
|
1684 |
|
1685 /* |
|
1686 ** sqlite3async_delay ?MS? |
|
1687 ** |
|
1688 ** Query or set the number of milliseconds of delay in the writer |
|
1689 ** thread after each write operation. The default is 0. By increasing |
|
1690 ** the memory delay we can simulate the effect of slow disk I/O. |
|
1691 */ |
|
1692 static int testAsyncDelay( |
|
1693 void * clientData, |
|
1694 Tcl_Interp *interp, |
|
1695 int objc, |
|
1696 Tcl_Obj *CONST objv[] |
|
1697 ){ |
|
1698 if( objc!=1 && objc!=2 ){ |
|
1699 Tcl_WrongNumArgs(interp, 1, objv, "?MS?"); |
|
1700 return TCL_ERROR; |
|
1701 } |
|
1702 if( objc==1 ){ |
|
1703 Tcl_SetObjResult(interp, Tcl_NewIntObj(async.ioDelay)); |
|
1704 }else{ |
|
1705 int ioDelay; |
|
1706 if( Tcl_GetIntFromObj(interp, objv[1], &ioDelay) ) return TCL_ERROR; |
|
1707 async.ioDelay = ioDelay; |
|
1708 } |
|
1709 return TCL_OK; |
|
1710 } |
|
1711 |
|
1712 /* |
|
1713 ** sqlite3async_start |
|
1714 ** |
|
1715 ** Start a new writer thread. |
|
1716 */ |
|
1717 static int testAsyncStart( |
|
1718 void * clientData, |
|
1719 Tcl_Interp *interp, |
|
1720 int objc, |
|
1721 Tcl_Obj *CONST objv[] |
|
1722 ){ |
|
1723 pthread_t x; |
|
1724 int rc; |
|
1725 volatile int isStarted = 0; |
|
1726 rc = pthread_create(&x, 0, asyncWriterThread, (void *)&isStarted); |
|
1727 if( rc ){ |
|
1728 Tcl_AppendResult(interp, "failed to create the thread", 0); |
|
1729 return TCL_ERROR; |
|
1730 } |
|
1731 pthread_detach(x); |
|
1732 while( isStarted==0 ){ |
|
1733 sched_yield(); |
|
1734 } |
|
1735 return TCL_OK; |
|
1736 } |
|
1737 |
|
1738 /* |
|
1739 ** sqlite3async_wait |
|
1740 ** |
|
1741 ** Wait for the current writer thread to terminate. |
|
1742 ** |
|
1743 ** If the current writer thread is set to run forever then this |
|
1744 ** command would block forever. To prevent that, an error is returned. |
|
1745 */ |
|
1746 static int testAsyncWait( |
|
1747 void * clientData, |
|
1748 Tcl_Interp *interp, |
|
1749 int objc, |
|
1750 Tcl_Obj *CONST objv[] |
|
1751 ){ |
|
1752 int cnt = 10; |
|
1753 if( async.writerHaltNow==0 && async.writerHaltWhenIdle==0 ){ |
|
1754 Tcl_AppendResult(interp, "would block forever", (char*)0); |
|
1755 return TCL_ERROR; |
|
1756 } |
|
1757 |
|
1758 while( cnt-- && !pthread_mutex_trylock(&async.writerMutex) ){ |
|
1759 pthread_mutex_unlock(&async.writerMutex); |
|
1760 sched_yield(); |
|
1761 } |
|
1762 if( cnt>=0 ){ |
|
1763 ASYNC_TRACE(("WAIT\n")); |
|
1764 pthread_mutex_lock(&async.queueMutex); |
|
1765 pthread_cond_broadcast(&async.queueSignal); |
|
1766 pthread_mutex_unlock(&async.queueMutex); |
|
1767 pthread_mutex_lock(&async.writerMutex); |
|
1768 pthread_mutex_unlock(&async.writerMutex); |
|
1769 }else{ |
|
1770 ASYNC_TRACE(("NO-WAIT\n")); |
|
1771 } |
|
1772 return TCL_OK; |
|
1773 } |
|
1774 |
|
1775 |
|
1776 #endif /* SQLITE_OS_UNIX and SQLITE_THREADSAFE */ |
|
1777 |
|
1778 /* |
|
1779 ** This routine registers the custom TCL commands defined in this |
|
1780 ** module. This should be the only procedure visible from outside |
|
1781 ** of this module. |
|
1782 */ |
|
1783 int Sqlitetestasync_Init(Tcl_Interp *interp){ |
|
1784 #if SQLITE_OS_UNIX && SQLITE_THREADSAFE |
|
1785 Tcl_CreateObjCommand(interp,"sqlite3async_enable",testAsyncEnable,0,0); |
|
1786 Tcl_CreateObjCommand(interp,"sqlite3async_halt",testAsyncHalt,0,0); |
|
1787 Tcl_CreateObjCommand(interp,"sqlite3async_delay",testAsyncDelay,0,0); |
|
1788 Tcl_CreateObjCommand(interp,"sqlite3async_start",testAsyncStart,0,0); |
|
1789 Tcl_CreateObjCommand(interp,"sqlite3async_wait",testAsyncWait,0,0); |
|
1790 Tcl_LinkVar(interp, "sqlite3async_trace", |
|
1791 (char*)&sqlite3async_trace, TCL_LINK_INT); |
|
1792 #endif /* SQLITE_OS_UNIX and SQLITE_THREADSAFE */ |
|
1793 return TCL_OK; |
|
1794 } |