|
1 # 2002 May 24 |
|
2 # |
|
3 # The author disclaims copyright to this source code. In place of |
|
4 # a legal notice, here is a blessing: |
|
5 # |
|
6 # May you do good and not evil. |
|
7 # May you find forgiveness for yourself and forgive others. |
|
8 # May you share freely, never taking more than you give. |
|
9 # |
|
10 #*********************************************************************** |
|
11 # This file implements regression tests for SQLite library. The focus of |
|
12 # this file is testing the SQLite routines used for converting between the |
|
13 # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and |
|
14 # UTF-16be). |
|
15 # |
|
16 # $Id: enc2.test,v 1.29 2007/10/09 08:29:32 danielk1977 Exp $ |
|
17 |
|
18 set testdir [file dirname $argv0] |
|
19 source $testdir/tester.tcl |
|
20 |
|
21 # If UTF16 support is disabled, ignore the tests in this file |
|
22 # |
|
23 ifcapable {!utf16} { |
|
24 finish_test |
|
25 return |
|
26 } |
|
27 |
|
28 # The rough organisation of tests in this file is: |
|
29 # |
|
30 # enc2.1.*: Simple tests with a UTF-8 db. |
|
31 # enc2.2.*: Simple tests with a UTF-16LE db. |
|
32 # enc2.3.*: Simple tests with a UTF-16BE db. |
|
33 # enc2.4.*: Test that attached databases must have the same text encoding |
|
34 # as the main database. |
|
35 # enc2.5.*: Test the behaviour of the library when a collation sequence is |
|
36 # not available for the most desirable text encoding. |
|
37 # enc2.6.*: Similar test for user functions. |
|
38 # enc2.7.*: Test that the VerifyCookie opcode protects against assuming the |
|
39 # wrong text encoding for the database. |
|
40 # enc2.8.*: Test sqlite3_complete16() |
|
41 # |
|
42 |
|
43 db close |
|
44 |
|
45 # Return the UTF-8 representation of the supplied UTF-16 string $str. |
|
46 proc utf8 {str} { |
|
47 # If $str ends in two 0x00 0x00 bytes, knock these off before |
|
48 # converting to UTF-8 using TCL. |
|
49 binary scan $str \c* vals |
|
50 if {[lindex $vals end]==0 && [lindex $vals end-1]==0} { |
|
51 set str [binary format \c* [lrange $vals 0 end-2]] |
|
52 } |
|
53 |
|
54 set r [encoding convertfrom unicode $str] |
|
55 return $r |
|
56 } |
|
57 |
|
58 # |
|
59 # This proc contains all the tests in this file. It is run |
|
60 # three times. Each time the file 'test.db' contains a database |
|
61 # with the following contents: |
|
62 set dbcontents { |
|
63 CREATE TABLE t1(a PRIMARY KEY, b, c); |
|
64 INSERT INTO t1 VALUES('one', 'I', 1); |
|
65 } |
|
66 # This proc tests that we can open and manipulate the test.db |
|
67 # database, and that it is possible to retreive values in |
|
68 # various text encodings. |
|
69 # |
|
70 proc run_test_script {t enc} { |
|
71 |
|
72 # Open the database and pull out a (the) row. |
|
73 do_test $t.1 { |
|
74 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
|
75 execsql {SELECT * FROM t1} |
|
76 } {one I 1} |
|
77 |
|
78 # Insert some data |
|
79 do_test $t.2 { |
|
80 execsql {INSERT INTO t1 VALUES('two', 'II', 2);} |
|
81 execsql {SELECT * FROM t1} |
|
82 } {one I 1 two II 2} |
|
83 |
|
84 # Insert some data |
|
85 do_test $t.3 { |
|
86 execsql { |
|
87 INSERT INTO t1 VALUES('three','III',3); |
|
88 INSERT INTO t1 VALUES('four','IV',4); |
|
89 INSERT INTO t1 VALUES('five','V',5); |
|
90 } |
|
91 execsql {SELECT * FROM t1} |
|
92 } {one I 1 two II 2 three III 3 four IV 4 five V 5} |
|
93 |
|
94 # Use the index |
|
95 do_test $t.4 { |
|
96 execsql { |
|
97 SELECT * FROM t1 WHERE a = 'one'; |
|
98 } |
|
99 } {one I 1} |
|
100 do_test $t.5 { |
|
101 execsql { |
|
102 SELECT * FROM t1 WHERE a = 'four'; |
|
103 } |
|
104 } {four IV 4} |
|
105 ifcapable subquery { |
|
106 do_test $t.6 { |
|
107 execsql { |
|
108 SELECT * FROM t1 WHERE a IN ('one', 'two'); |
|
109 } |
|
110 } {one I 1 two II 2} |
|
111 } |
|
112 |
|
113 # Now check that we can retrieve data in both UTF-16 and UTF-8 |
|
114 do_test $t.7 { |
|
115 set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL] |
|
116 sqlite3_step $STMT |
|
117 sqlite3_column_text $STMT 0 |
|
118 } {four} |
|
119 |
|
120 do_test $t.8 { |
|
121 sqlite3_step $STMT |
|
122 utf8 [sqlite3_column_text16 $STMT 0] |
|
123 } {five} |
|
124 |
|
125 do_test $t.9 { |
|
126 sqlite3_finalize $STMT |
|
127 } SQLITE_OK |
|
128 |
|
129 ifcapable vacuum { |
|
130 execsql VACUUM |
|
131 } |
|
132 |
|
133 do_test $t.10 { |
|
134 db eval {PRAGMA encoding} |
|
135 } $enc |
|
136 |
|
137 } |
|
138 |
|
139 # The three unicode encodings understood by SQLite. |
|
140 set encodings [list UTF-8 UTF-16le UTF-16be] |
|
141 |
|
142 set sqlite_os_trace 0 |
|
143 set i 1 |
|
144 foreach enc $encodings { |
|
145 file delete -force test.db |
|
146 sqlite3 db test.db |
|
147 db eval "PRAGMA encoding = \"$enc\"" |
|
148 execsql $dbcontents |
|
149 do_test enc2-$i.0.1 { |
|
150 db eval {PRAGMA encoding} |
|
151 } $enc |
|
152 do_test enc2-$i.0.2 { |
|
153 db eval {PRAGMA encoding=UTF8} |
|
154 db eval {PRAGMA encoding} |
|
155 } $enc |
|
156 do_test enc2-$i.0.3 { |
|
157 db eval {PRAGMA encoding=UTF16le} |
|
158 db eval {PRAGMA encoding} |
|
159 } $enc |
|
160 do_test enc2-$i.0.4 { |
|
161 db eval {PRAGMA encoding=UTF16be} |
|
162 db eval {PRAGMA encoding} |
|
163 } $enc |
|
164 |
|
165 db close |
|
166 run_test_script enc2-$i $enc |
|
167 db close |
|
168 incr i |
|
169 } |
|
170 |
|
171 # Test that it is an error to try to attach a database with a different |
|
172 # encoding to the main database. |
|
173 ifcapable attach { |
|
174 do_test enc2-4.1 { |
|
175 file delete -force test.db |
|
176 sqlite3 db test.db |
|
177 db eval "PRAGMA encoding = 'UTF-8'" |
|
178 db eval "CREATE TABLE abc(a, b, c);" |
|
179 } {} |
|
180 do_test enc2-4.2 { |
|
181 file delete -force test2.db |
|
182 sqlite3 db2 test2.db |
|
183 db2 eval "PRAGMA encoding = 'UTF-16'" |
|
184 db2 eval "CREATE TABLE abc(a, b, c);" |
|
185 } {} |
|
186 do_test enc2-4.3 { |
|
187 catchsql { |
|
188 ATTACH 'test2.db' as aux; |
|
189 } |
|
190 } {1 {attached databases must use the same text encoding as main database}} |
|
191 db2 close |
|
192 db close |
|
193 } |
|
194 |
|
195 # The following tests - enc2-5.* - test that SQLite selects the correct |
|
196 # collation sequence when more than one is available. |
|
197 |
|
198 set ::values [list one two three four five] |
|
199 set ::test_collate_enc INVALID |
|
200 proc test_collate {enc lhs rhs} { |
|
201 set ::test_collate_enc $enc |
|
202 set l [lsearch -exact $::values $lhs] |
|
203 set r [lsearch -exact $::values $rhs] |
|
204 set res [expr $l - $r] |
|
205 # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res" |
|
206 return $res |
|
207 } |
|
208 |
|
209 file delete -force test.db |
|
210 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
|
211 do_test enc2-5.0 { |
|
212 execsql { |
|
213 CREATE TABLE t5(a); |
|
214 INSERT INTO t5 VALUES('one'); |
|
215 INSERT INTO t5 VALUES('two'); |
|
216 INSERT INTO t5 VALUES('five'); |
|
217 INSERT INTO t5 VALUES('three'); |
|
218 INSERT INTO t5 VALUES('four'); |
|
219 } |
|
220 } {} |
|
221 do_test enc2-5.1 { |
|
222 add_test_collate $DB 1 1 1 |
|
223 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}] |
|
224 lappend res $::test_collate_enc |
|
225 } {one two three four five UTF-8} |
|
226 do_test enc2-5.2 { |
|
227 add_test_collate $DB 0 1 0 |
|
228 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] |
|
229 lappend res $::test_collate_enc |
|
230 } {one two three four five UTF-16LE} |
|
231 do_test enc2-5.3 { |
|
232 add_test_collate $DB 0 0 1 |
|
233 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] |
|
234 lappend res $::test_collate_enc |
|
235 } {one two three four five UTF-16BE} |
|
236 |
|
237 db close |
|
238 file delete -force test.db |
|
239 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
|
240 execsql {pragma encoding = 'UTF-16LE'} |
|
241 do_test enc2-5.4 { |
|
242 execsql { |
|
243 CREATE TABLE t5(a); |
|
244 INSERT INTO t5 VALUES('one'); |
|
245 INSERT INTO t5 VALUES('two'); |
|
246 INSERT INTO t5 VALUES('five'); |
|
247 INSERT INTO t5 VALUES('three'); |
|
248 INSERT INTO t5 VALUES('four'); |
|
249 } |
|
250 } {} |
|
251 do_test enc2-5.5 { |
|
252 add_test_collate $DB 1 1 1 |
|
253 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] |
|
254 lappend res $::test_collate_enc |
|
255 } {one two three four five UTF-16LE} |
|
256 do_test enc2-5.6 { |
|
257 add_test_collate $DB 1 0 1 |
|
258 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] |
|
259 lappend res $::test_collate_enc |
|
260 } {one two three four five UTF-16BE} |
|
261 do_test enc2-5.7 { |
|
262 add_test_collate $DB 1 0 0 |
|
263 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] |
|
264 lappend res $::test_collate_enc |
|
265 } {one two three four five UTF-8} |
|
266 |
|
267 db close |
|
268 file delete -force test.db |
|
269 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
|
270 execsql {pragma encoding = 'UTF-16BE'} |
|
271 do_test enc2-5.8 { |
|
272 execsql { |
|
273 CREATE TABLE t5(a); |
|
274 INSERT INTO t5 VALUES('one'); |
|
275 INSERT INTO t5 VALUES('two'); |
|
276 INSERT INTO t5 VALUES('five'); |
|
277 INSERT INTO t5 VALUES('three'); |
|
278 INSERT INTO t5 VALUES('four'); |
|
279 } |
|
280 } {} |
|
281 do_test enc2-5.9 { |
|
282 add_test_collate $DB 1 1 1 |
|
283 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] |
|
284 lappend res $::test_collate_enc |
|
285 } {one two three four five UTF-16BE} |
|
286 do_test enc2-5.10 { |
|
287 add_test_collate $DB 1 1 0 |
|
288 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] |
|
289 lappend res $::test_collate_enc |
|
290 } {one two three four five UTF-16LE} |
|
291 do_test enc2-5.11 { |
|
292 add_test_collate $DB 1 0 0 |
|
293 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}] |
|
294 lappend res $::test_collate_enc |
|
295 } {one two three four five UTF-8} |
|
296 |
|
297 # Also test that a UTF-16 collation factory works. |
|
298 do_test enc2-5-12 { |
|
299 add_test_collate $DB 0 0 0 |
|
300 catchsql { |
|
301 SELECT * FROM t5 ORDER BY 1 COLLATE test_collate |
|
302 } |
|
303 } {1 {no such collation sequence: test_collate}} |
|
304 do_test enc2-5.13 { |
|
305 add_test_collate_needed $DB |
|
306 set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }] |
|
307 lappend res $::test_collate_enc |
|
308 } {one two three four five UTF-16BE} |
|
309 do_test enc2-5.14 { |
|
310 set ::sqlite_last_needed_collation |
|
311 } test_collate |
|
312 |
|
313 db close |
|
314 file delete -force test.db |
|
315 |
|
316 do_test enc2-5.15 { |
|
317 sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db] |
|
318 add_test_collate_needed $::DB |
|
319 set ::sqlite_last_needed_collation |
|
320 } {} |
|
321 do_test enc2-5.16 { |
|
322 execsql {CREATE TABLE t1(a varchar collate test_collate);} |
|
323 } {} |
|
324 do_test enc2-5.17 { |
|
325 set ::sqlite_last_needed_collation |
|
326 } {test_collate} |
|
327 |
|
328 # The following tests - enc2-6.* - test that SQLite selects the correct |
|
329 # user function when more than one is available. |
|
330 |
|
331 proc test_function {enc arg} { |
|
332 return "$enc $arg" |
|
333 } |
|
334 |
|
335 db close |
|
336 file delete -force test.db |
|
337 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
|
338 execsql {pragma encoding = 'UTF-8'} |
|
339 do_test enc2-6.0 { |
|
340 execsql { |
|
341 CREATE TABLE t5(a); |
|
342 INSERT INTO t5 VALUES('one'); |
|
343 } |
|
344 } {} |
|
345 do_test enc2-6.1 { |
|
346 add_test_function $DB 1 1 1 |
|
347 execsql { |
|
348 SELECT test_function('sqlite') |
|
349 } |
|
350 } {{UTF-8 sqlite}} |
|
351 db close |
|
352 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
|
353 do_test enc2-6.2 { |
|
354 add_test_function $DB 0 1 0 |
|
355 execsql { |
|
356 SELECT test_function('sqlite') |
|
357 } |
|
358 } {{UTF-16LE sqlite}} |
|
359 db close |
|
360 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
|
361 do_test enc2-6.3 { |
|
362 add_test_function $DB 0 0 1 |
|
363 execsql { |
|
364 SELECT test_function('sqlite') |
|
365 } |
|
366 } {{UTF-16BE sqlite}} |
|
367 |
|
368 db close |
|
369 file delete -force test.db |
|
370 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
|
371 execsql {pragma encoding = 'UTF-16LE'} |
|
372 do_test enc2-6.3 { |
|
373 execsql { |
|
374 CREATE TABLE t5(a); |
|
375 INSERT INTO t5 VALUES('sqlite'); |
|
376 } |
|
377 } {} |
|
378 do_test enc2-6.4 { |
|
379 add_test_function $DB 1 1 1 |
|
380 execsql { |
|
381 SELECT test_function('sqlite') |
|
382 } |
|
383 } {{UTF-16LE sqlite}} |
|
384 db close |
|
385 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
|
386 do_test enc2-6.5 { |
|
387 add_test_function $DB 0 1 0 |
|
388 execsql { |
|
389 SELECT test_function('sqlite') |
|
390 } |
|
391 } {{UTF-16LE sqlite}} |
|
392 db close |
|
393 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
|
394 do_test enc2-6.6 { |
|
395 add_test_function $DB 0 0 1 |
|
396 execsql { |
|
397 SELECT test_function('sqlite') |
|
398 } |
|
399 } {{UTF-16BE sqlite}} |
|
400 |
|
401 db close |
|
402 file delete -force test.db |
|
403 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
|
404 execsql {pragma encoding = 'UTF-16BE'} |
|
405 do_test enc2-6.7 { |
|
406 execsql { |
|
407 CREATE TABLE t5(a); |
|
408 INSERT INTO t5 VALUES('sqlite'); |
|
409 } |
|
410 } {} |
|
411 do_test enc2-6.8 { |
|
412 add_test_function $DB 1 1 1 |
|
413 execsql { |
|
414 SELECT test_function('sqlite') |
|
415 } |
|
416 } {{UTF-16BE sqlite}} |
|
417 db close |
|
418 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
|
419 do_test enc2-6.9 { |
|
420 add_test_function $DB 0 1 0 |
|
421 execsql { |
|
422 SELECT test_function('sqlite') |
|
423 } |
|
424 } {{UTF-16LE sqlite}} |
|
425 db close |
|
426 sqlite3 db test.db; set DB [sqlite3_connection_pointer db] |
|
427 do_test enc2-6.10 { |
|
428 add_test_function $DB 0 0 1 |
|
429 execsql { |
|
430 SELECT test_function('sqlite') |
|
431 } |
|
432 } {{UTF-16BE sqlite}} |
|
433 |
|
434 |
|
435 db close |
|
436 file delete -force test.db |
|
437 |
|
438 # The following tests - enc2-7.* - function as follows: |
|
439 # |
|
440 # 1: Open an empty database file assuming UTF-16 encoding. |
|
441 # 2: Open the same database with a different handle assuming UTF-8. Create |
|
442 # a table using this handle. |
|
443 # 3: Read the sqlite_master table from the first handle. |
|
444 # 4: Ensure the first handle recognises the database encoding is UTF-8. |
|
445 # |
|
446 do_test enc2-7.1 { |
|
447 sqlite3 db test.db |
|
448 execsql { |
|
449 PRAGMA encoding = 'UTF-16'; |
|
450 SELECT * FROM sqlite_master; |
|
451 } |
|
452 } {} |
|
453 do_test enc2-7.2 { |
|
454 set enc [execsql { |
|
455 PRAGMA encoding; |
|
456 }] |
|
457 string range $enc 0 end-2 ;# Chop off the "le" or "be" |
|
458 } {UTF-16} |
|
459 do_test enc2-7.3 { |
|
460 sqlite3 db2 test.db |
|
461 execsql { |
|
462 PRAGMA encoding = 'UTF-8'; |
|
463 CREATE TABLE abc(a, b, c); |
|
464 } db2 |
|
465 } {} |
|
466 do_test enc2-7.4 { |
|
467 execsql { |
|
468 SELECT * FROM sqlite_master; |
|
469 } |
|
470 } "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}" |
|
471 do_test enc2-7.5 { |
|
472 execsql { |
|
473 PRAGMA encoding; |
|
474 } |
|
475 } {UTF-8} |
|
476 |
|
477 db close |
|
478 db2 close |
|
479 |
|
480 proc utf16 {utf8} { |
|
481 set utf16 [encoding convertto unicode $utf8] |
|
482 append utf16 "\x00\x00" |
|
483 return $utf16 |
|
484 } |
|
485 ifcapable {complete} { |
|
486 do_test enc2-8.1 { |
|
487 sqlite3_complete16 [utf16 "SELECT * FROM t1;"] |
|
488 } {1} |
|
489 do_test enc2-8.2 { |
|
490 sqlite3_complete16 [utf16 "SELECT * FROM"] |
|
491 } {0} |
|
492 } |
|
493 |
|
494 # Test that the encoding of an empty database may still be set after the |
|
495 # (empty) schema has been initialized. |
|
496 file delete -force test.db |
|
497 do_test enc2-9.1 { |
|
498 sqlite3 db test.db |
|
499 execsql { |
|
500 PRAGMA encoding = 'UTF-8'; |
|
501 PRAGMA encoding; |
|
502 } |
|
503 } {UTF-8} |
|
504 do_test enc2-9.2 { |
|
505 sqlite3 db test.db |
|
506 execsql { |
|
507 PRAGMA encoding = 'UTF-16le'; |
|
508 PRAGMA encoding; |
|
509 } |
|
510 } {UTF-16le} |
|
511 do_test enc2-9.3 { |
|
512 sqlite3 db test.db |
|
513 execsql { |
|
514 SELECT * FROM sqlite_master; |
|
515 PRAGMA encoding = 'UTF-8'; |
|
516 PRAGMA encoding; |
|
517 } |
|
518 } {UTF-8} |
|
519 do_test enc2-9.4 { |
|
520 sqlite3 db test.db |
|
521 execsql { |
|
522 PRAGMA encoding = 'UTF-16le'; |
|
523 CREATE TABLE abc(a, b, c); |
|
524 PRAGMA encoding; |
|
525 } |
|
526 } {UTF-16le} |
|
527 do_test enc2-9.5 { |
|
528 sqlite3 db test.db |
|
529 execsql { |
|
530 PRAGMA encoding = 'UTF-8'; |
|
531 PRAGMA encoding; |
|
532 } |
|
533 } {UTF-16le} |
|
534 |
|
535 # Ticket #1987. |
|
536 # Disallow encoding changes once the encoding has been set. |
|
537 # |
|
538 do_test enc2-10.1 { |
|
539 db close |
|
540 file delete -force test.db test.db-journal |
|
541 sqlite3 db test.db |
|
542 db eval { |
|
543 PRAGMA encoding=UTF16; |
|
544 CREATE TABLE t1(a); |
|
545 PRAGMA encoding=UTF8; |
|
546 CREATE TABLE t2(b); |
|
547 } |
|
548 db close |
|
549 sqlite3 db test.db |
|
550 db eval { |
|
551 SELECT name FROM sqlite_master |
|
552 } |
|
553 } {t1 t2} |
|
554 |
|
555 finish_test |