persistentstorage/sqlite3api/TEST/TclScript/enc2.test
changeset 0 08ec8eefde2f
equal deleted inserted replaced
-1:000000000000 0:08ec8eefde2f
       
     1 # 2002 May 24
       
     2 #
       
     3 # The author disclaims copyright to this source code.  In place of
       
     4 # a legal notice, here is a blessing:
       
     5 #
       
     6 #    May you do good and not evil.
       
     7 #    May you find forgiveness for yourself and forgive others.
       
     8 #    May you share freely, never taking more than you give.
       
     9 #
       
    10 #***********************************************************************
       
    11 # This file implements regression tests for SQLite library.  The focus of
       
    12 # this file is testing the SQLite routines used for converting between the
       
    13 # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
       
    14 # UTF-16be).
       
    15 #
       
    16 # $Id: enc2.test,v 1.29 2007/10/09 08:29:32 danielk1977 Exp $
       
    17 
       
    18 set testdir [file dirname $argv0]
       
    19 source $testdir/tester.tcl
       
    20 
       
    21 # If UTF16 support is disabled, ignore the tests in this file
       
    22 #
       
    23 ifcapable {!utf16} {
       
    24   finish_test
       
    25   return
       
    26 }
       
    27 
       
    28 # The rough organisation of tests in this file is:
       
    29 #
       
    30 # enc2.1.*: Simple tests with a UTF-8 db.
       
    31 # enc2.2.*: Simple tests with a UTF-16LE db.
       
    32 # enc2.3.*: Simple tests with a UTF-16BE db.
       
    33 # enc2.4.*: Test that attached databases must have the same text encoding
       
    34 #           as the main database.
       
    35 # enc2.5.*: Test the behaviour of the library when a collation sequence is
       
    36 #           not available for the most desirable text encoding.
       
    37 # enc2.6.*: Similar test for user functions.
       
    38 # enc2.7.*: Test that the VerifyCookie opcode protects against assuming the
       
    39 #           wrong text encoding for the database.
       
    40 # enc2.8.*: Test sqlite3_complete16()
       
    41 #
       
    42 
       
    43 db close
       
    44 
       
    45 # Return the UTF-8 representation of the supplied UTF-16 string $str. 
       
    46 proc utf8 {str} {
       
    47   # If $str ends in two 0x00 0x00 bytes, knock these off before
       
    48   # converting to UTF-8 using TCL.
       
    49   binary scan $str \c* vals
       
    50   if {[lindex $vals end]==0 && [lindex $vals end-1]==0} {
       
    51     set str [binary format \c* [lrange $vals 0 end-2]]
       
    52   }
       
    53 
       
    54   set r [encoding convertfrom unicode $str]
       
    55   return $r
       
    56 }
       
    57 
       
    58 #
       
    59 # This proc contains all the tests in this file. It is run
       
    60 # three times. Each time the file 'test.db' contains a database
       
    61 # with the following contents:
       
    62 set dbcontents {
       
    63   CREATE TABLE t1(a PRIMARY KEY, b, c);
       
    64   INSERT INTO t1 VALUES('one', 'I', 1);
       
    65 }
       
    66 # This proc tests that we can open and manipulate the test.db 
       
    67 # database, and that it is possible to retreive values in
       
    68 # various text encodings.
       
    69 #
       
    70 proc run_test_script {t enc} {
       
    71 
       
    72 # Open the database and pull out a (the) row.
       
    73 do_test $t.1 {
       
    74   sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
       
    75   execsql {SELECT * FROM t1}
       
    76 } {one I 1}
       
    77 
       
    78 # Insert some data
       
    79 do_test $t.2 {
       
    80   execsql {INSERT INTO t1 VALUES('two', 'II', 2);}
       
    81   execsql {SELECT * FROM t1}
       
    82 } {one I 1 two II 2}
       
    83 
       
    84 # Insert some data 
       
    85 do_test $t.3 {
       
    86   execsql {
       
    87     INSERT INTO t1 VALUES('three','III',3);
       
    88     INSERT INTO t1 VALUES('four','IV',4);
       
    89     INSERT INTO t1 VALUES('five','V',5);
       
    90   }
       
    91   execsql {SELECT * FROM t1}
       
    92 } {one I 1 two II 2 three III 3 four IV 4 five V 5}
       
    93 
       
    94 # Use the index
       
    95 do_test $t.4 {
       
    96   execsql {
       
    97     SELECT * FROM t1 WHERE a = 'one';
       
    98   }
       
    99 } {one I 1}
       
   100 do_test $t.5 {
       
   101   execsql {
       
   102     SELECT * FROM t1 WHERE a = 'four';
       
   103   }
       
   104 } {four IV 4}
       
   105 ifcapable subquery {
       
   106   do_test $t.6 {
       
   107     execsql {
       
   108       SELECT * FROM t1 WHERE a IN ('one', 'two');
       
   109     }
       
   110   } {one I 1 two II 2}
       
   111 }
       
   112 
       
   113 # Now check that we can retrieve data in both UTF-16 and UTF-8
       
   114 do_test $t.7 {
       
   115   set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL]
       
   116   sqlite3_step $STMT
       
   117   sqlite3_column_text $STMT 0
       
   118 } {four}
       
   119 
       
   120 do_test $t.8 {
       
   121   sqlite3_step $STMT
       
   122   utf8 [sqlite3_column_text16 $STMT 0]
       
   123 } {five}
       
   124 
       
   125 do_test $t.9 {
       
   126   sqlite3_finalize $STMT
       
   127 } SQLITE_OK
       
   128 
       
   129 ifcapable vacuum {
       
   130   execsql VACUUM
       
   131 }
       
   132 
       
   133 do_test $t.10 {
       
   134   db eval {PRAGMA encoding}
       
   135 } $enc
       
   136 
       
   137 }
       
   138 
       
   139 # The three unicode encodings understood by SQLite.
       
   140 set encodings [list UTF-8 UTF-16le UTF-16be]
       
   141 
       
   142 set sqlite_os_trace 0
       
   143 set i 1
       
   144 foreach enc $encodings {
       
   145   file delete -force test.db
       
   146   sqlite3 db test.db
       
   147   db eval "PRAGMA encoding = \"$enc\""
       
   148   execsql $dbcontents
       
   149   do_test enc2-$i.0.1 {
       
   150     db eval {PRAGMA encoding}
       
   151   } $enc
       
   152   do_test enc2-$i.0.2 {
       
   153     db eval {PRAGMA encoding=UTF8}
       
   154     db eval {PRAGMA encoding}
       
   155   } $enc
       
   156   do_test enc2-$i.0.3 {
       
   157     db eval {PRAGMA encoding=UTF16le}
       
   158     db eval {PRAGMA encoding}
       
   159   } $enc
       
   160   do_test enc2-$i.0.4 {
       
   161     db eval {PRAGMA encoding=UTF16be}
       
   162     db eval {PRAGMA encoding}
       
   163   } $enc
       
   164 
       
   165   db close
       
   166   run_test_script enc2-$i $enc
       
   167   db close
       
   168   incr i
       
   169 }
       
   170 
       
   171 # Test that it is an error to try to attach a database with a different
       
   172 # encoding to the main database.
       
   173 ifcapable attach {
       
   174   do_test enc2-4.1 {
       
   175     file delete -force test.db
       
   176     sqlite3 db test.db
       
   177     db eval "PRAGMA encoding = 'UTF-8'"
       
   178     db eval "CREATE TABLE abc(a, b, c);"
       
   179   } {}
       
   180   do_test enc2-4.2 {
       
   181     file delete -force test2.db
       
   182     sqlite3 db2 test2.db
       
   183     db2 eval "PRAGMA encoding = 'UTF-16'"
       
   184     db2 eval "CREATE TABLE abc(a, b, c);"
       
   185   } {}
       
   186   do_test enc2-4.3 {
       
   187     catchsql {
       
   188       ATTACH 'test2.db' as aux;
       
   189     }
       
   190   } {1 {attached databases must use the same text encoding as main database}}
       
   191   db2 close
       
   192   db close
       
   193 }
       
   194 
       
   195 # The following tests - enc2-5.* - test that SQLite selects the correct
       
   196 # collation sequence when more than one is available.
       
   197 
       
   198 set ::values [list one two three four five]
       
   199 set ::test_collate_enc INVALID
       
   200 proc test_collate {enc lhs rhs} {
       
   201   set ::test_collate_enc $enc
       
   202   set l [lsearch -exact $::values $lhs]
       
   203   set r [lsearch -exact $::values $rhs]
       
   204   set res [expr $l - $r]
       
   205   # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res"
       
   206   return $res
       
   207 }
       
   208 
       
   209 file delete -force test.db
       
   210 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
       
   211 do_test enc2-5.0 {
       
   212   execsql {
       
   213     CREATE TABLE t5(a);
       
   214     INSERT INTO t5 VALUES('one');
       
   215     INSERT INTO t5 VALUES('two');
       
   216     INSERT INTO t5 VALUES('five');
       
   217     INSERT INTO t5 VALUES('three');
       
   218     INSERT INTO t5 VALUES('four');
       
   219   }
       
   220 } {}
       
   221 do_test enc2-5.1 {
       
   222   add_test_collate $DB 1 1 1
       
   223   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}]
       
   224   lappend res $::test_collate_enc
       
   225 } {one two three four five UTF-8}
       
   226 do_test enc2-5.2 {
       
   227   add_test_collate $DB 0 1 0
       
   228   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
       
   229   lappend res $::test_collate_enc
       
   230 } {one two three four five UTF-16LE}
       
   231 do_test enc2-5.3 {
       
   232   add_test_collate $DB 0 0 1
       
   233   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
       
   234   lappend res $::test_collate_enc
       
   235 } {one two three four five UTF-16BE}
       
   236 
       
   237 db close
       
   238 file delete -force test.db
       
   239 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
       
   240 execsql {pragma encoding = 'UTF-16LE'}
       
   241 do_test enc2-5.4 {
       
   242   execsql {
       
   243     CREATE TABLE t5(a);
       
   244     INSERT INTO t5 VALUES('one');
       
   245     INSERT INTO t5 VALUES('two');
       
   246     INSERT INTO t5 VALUES('five');
       
   247     INSERT INTO t5 VALUES('three');
       
   248     INSERT INTO t5 VALUES('four');
       
   249   }
       
   250 } {}
       
   251 do_test enc2-5.5 {
       
   252   add_test_collate $DB 1 1 1
       
   253   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
       
   254   lappend res $::test_collate_enc
       
   255 } {one two three four five UTF-16LE}
       
   256 do_test enc2-5.6 {
       
   257   add_test_collate $DB 1 0 1
       
   258   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
       
   259   lappend res $::test_collate_enc
       
   260 } {one two three four five UTF-16BE}
       
   261 do_test enc2-5.7 {
       
   262   add_test_collate $DB 1 0 0
       
   263   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
       
   264   lappend res $::test_collate_enc
       
   265 } {one two three four five UTF-8}
       
   266 
       
   267 db close
       
   268 file delete -force test.db
       
   269 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
       
   270 execsql {pragma encoding = 'UTF-16BE'}
       
   271 do_test enc2-5.8 {
       
   272   execsql {
       
   273     CREATE TABLE t5(a);
       
   274     INSERT INTO t5 VALUES('one');
       
   275     INSERT INTO t5 VALUES('two');
       
   276     INSERT INTO t5 VALUES('five');
       
   277     INSERT INTO t5 VALUES('three');
       
   278     INSERT INTO t5 VALUES('four');
       
   279   }
       
   280 } {}
       
   281 do_test enc2-5.9 {
       
   282   add_test_collate $DB 1 1 1
       
   283   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
       
   284   lappend res $::test_collate_enc
       
   285 } {one two three four five UTF-16BE}
       
   286 do_test enc2-5.10 {
       
   287   add_test_collate $DB 1 1 0
       
   288   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
       
   289   lappend res $::test_collate_enc
       
   290 } {one two three four five UTF-16LE}
       
   291 do_test enc2-5.11 {
       
   292   add_test_collate $DB 1 0 0
       
   293   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
       
   294   lappend res $::test_collate_enc
       
   295 } {one two three four five UTF-8}
       
   296 
       
   297 # Also test that a UTF-16 collation factory works.
       
   298 do_test enc2-5-12 {
       
   299   add_test_collate $DB 0 0 0
       
   300   catchsql {
       
   301     SELECT * FROM t5 ORDER BY 1 COLLATE test_collate
       
   302   }
       
   303 } {1 {no such collation sequence: test_collate}}
       
   304 do_test enc2-5.13 {
       
   305   add_test_collate_needed $DB 
       
   306   set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }]
       
   307   lappend res $::test_collate_enc
       
   308 } {one two three four five UTF-16BE}
       
   309 do_test enc2-5.14 {
       
   310   set ::sqlite_last_needed_collation
       
   311 } test_collate
       
   312 
       
   313 db close
       
   314 file delete -force test.db
       
   315 
       
   316 do_test enc2-5.15 {
       
   317   sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db]
       
   318   add_test_collate_needed $::DB
       
   319   set ::sqlite_last_needed_collation
       
   320 } {}
       
   321 do_test enc2-5.16 {
       
   322   execsql {CREATE TABLE t1(a varchar collate test_collate);}
       
   323 } {}
       
   324 do_test enc2-5.17 {
       
   325   set ::sqlite_last_needed_collation
       
   326 } {test_collate}
       
   327 
       
   328 # The following tests - enc2-6.* - test that SQLite selects the correct
       
   329 # user function when more than one is available.
       
   330 
       
   331 proc test_function {enc arg} {
       
   332   return "$enc $arg"
       
   333 }
       
   334 
       
   335 db close
       
   336 file delete -force test.db
       
   337 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
       
   338 execsql {pragma encoding = 'UTF-8'}
       
   339 do_test enc2-6.0 {
       
   340   execsql {
       
   341     CREATE TABLE t5(a);
       
   342     INSERT INTO t5 VALUES('one');
       
   343   }
       
   344 } {}
       
   345 do_test enc2-6.1 {
       
   346   add_test_function $DB 1 1 1
       
   347   execsql {
       
   348     SELECT test_function('sqlite')
       
   349   }
       
   350 } {{UTF-8 sqlite}}
       
   351 db close
       
   352 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
       
   353 do_test enc2-6.2 {
       
   354   add_test_function $DB 0 1 0
       
   355   execsql {
       
   356     SELECT test_function('sqlite')
       
   357   }
       
   358 } {{UTF-16LE sqlite}}
       
   359 db close
       
   360 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
       
   361 do_test enc2-6.3 {
       
   362   add_test_function $DB 0 0 1
       
   363   execsql {
       
   364     SELECT test_function('sqlite')
       
   365   }
       
   366 } {{UTF-16BE sqlite}}
       
   367 
       
   368 db close
       
   369 file delete -force test.db
       
   370 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
       
   371 execsql {pragma encoding = 'UTF-16LE'}
       
   372 do_test enc2-6.3 {
       
   373   execsql {
       
   374     CREATE TABLE t5(a);
       
   375     INSERT INTO t5 VALUES('sqlite');
       
   376   }
       
   377 } {}
       
   378 do_test enc2-6.4 {
       
   379   add_test_function $DB 1 1 1
       
   380   execsql {
       
   381     SELECT test_function('sqlite')
       
   382   }
       
   383 } {{UTF-16LE sqlite}}
       
   384 db close
       
   385 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
       
   386 do_test enc2-6.5 {
       
   387   add_test_function $DB 0 1 0
       
   388   execsql {
       
   389     SELECT test_function('sqlite')
       
   390   }
       
   391 } {{UTF-16LE sqlite}}
       
   392 db close
       
   393 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
       
   394 do_test enc2-6.6 {
       
   395   add_test_function $DB 0 0 1
       
   396   execsql {
       
   397     SELECT test_function('sqlite')
       
   398   }
       
   399 } {{UTF-16BE sqlite}}
       
   400 
       
   401 db close
       
   402 file delete -force test.db
       
   403 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
       
   404 execsql {pragma encoding = 'UTF-16BE'}
       
   405 do_test enc2-6.7 {
       
   406   execsql {
       
   407     CREATE TABLE t5(a);
       
   408     INSERT INTO t5 VALUES('sqlite');
       
   409   }
       
   410 } {}
       
   411 do_test enc2-6.8 {
       
   412   add_test_function $DB 1 1 1
       
   413   execsql {
       
   414     SELECT test_function('sqlite')
       
   415   }
       
   416 } {{UTF-16BE sqlite}}
       
   417 db close
       
   418 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
       
   419 do_test enc2-6.9 {
       
   420   add_test_function $DB 0 1 0
       
   421   execsql {
       
   422     SELECT test_function('sqlite')
       
   423   }
       
   424 } {{UTF-16LE sqlite}}
       
   425 db close
       
   426 sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
       
   427 do_test enc2-6.10 {
       
   428   add_test_function $DB 0 0 1
       
   429   execsql {
       
   430     SELECT test_function('sqlite')
       
   431   }
       
   432 } {{UTF-16BE sqlite}}
       
   433 
       
   434 
       
   435 db close
       
   436 file delete -force test.db
       
   437 
       
   438 # The following tests - enc2-7.* - function as follows:
       
   439 #
       
   440 # 1: Open an empty database file assuming UTF-16 encoding.
       
   441 # 2: Open the same database with a different handle assuming UTF-8. Create
       
   442 #    a table using this handle.
       
   443 # 3: Read the sqlite_master table from the first handle. 
       
   444 # 4: Ensure the first handle recognises the database encoding is UTF-8.
       
   445 #
       
   446 do_test enc2-7.1 {
       
   447   sqlite3 db test.db
       
   448   execsql {
       
   449     PRAGMA encoding = 'UTF-16';
       
   450     SELECT * FROM sqlite_master;
       
   451   }
       
   452 } {}
       
   453 do_test enc2-7.2 {
       
   454   set enc [execsql {
       
   455     PRAGMA encoding;
       
   456   }]
       
   457   string range $enc 0 end-2 ;# Chop off the "le" or "be"
       
   458 } {UTF-16}
       
   459 do_test enc2-7.3 {
       
   460   sqlite3 db2 test.db
       
   461   execsql {
       
   462     PRAGMA encoding = 'UTF-8';
       
   463     CREATE TABLE abc(a, b, c);
       
   464   } db2
       
   465 } {}
       
   466 do_test enc2-7.4 {
       
   467   execsql {
       
   468     SELECT * FROM sqlite_master;
       
   469   }
       
   470 } "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}"
       
   471 do_test enc2-7.5 {
       
   472   execsql {
       
   473     PRAGMA encoding;
       
   474   }
       
   475 } {UTF-8}
       
   476 
       
   477 db close
       
   478 db2 close
       
   479 
       
   480 proc utf16 {utf8} {
       
   481   set utf16 [encoding convertto unicode $utf8]
       
   482   append utf16 "\x00\x00"
       
   483   return $utf16
       
   484 }
       
   485 ifcapable {complete} {
       
   486   do_test enc2-8.1 {
       
   487     sqlite3_complete16 [utf16 "SELECT * FROM t1;"]
       
   488   } {1}
       
   489   do_test enc2-8.2 {
       
   490     sqlite3_complete16 [utf16 "SELECT * FROM"]
       
   491   } {0}
       
   492 }
       
   493 
       
   494 # Test that the encoding of an empty database may still be set after the
       
   495 # (empty) schema has been initialized.
       
   496 file delete -force test.db
       
   497 do_test enc2-9.1 {
       
   498   sqlite3 db test.db
       
   499   execsql {
       
   500     PRAGMA encoding = 'UTF-8';
       
   501     PRAGMA encoding;
       
   502   }
       
   503 } {UTF-8}
       
   504 do_test enc2-9.2 {
       
   505   sqlite3 db test.db
       
   506   execsql {
       
   507     PRAGMA encoding = 'UTF-16le';
       
   508     PRAGMA encoding;
       
   509   }
       
   510 } {UTF-16le}
       
   511 do_test enc2-9.3 {
       
   512   sqlite3 db test.db
       
   513   execsql {
       
   514     SELECT * FROM sqlite_master;
       
   515     PRAGMA encoding = 'UTF-8';
       
   516     PRAGMA encoding;
       
   517   }
       
   518 } {UTF-8}
       
   519 do_test enc2-9.4 {
       
   520   sqlite3 db test.db
       
   521   execsql {
       
   522     PRAGMA encoding = 'UTF-16le';
       
   523     CREATE TABLE abc(a, b, c);
       
   524     PRAGMA encoding;
       
   525   }
       
   526 } {UTF-16le}
       
   527 do_test enc2-9.5 {
       
   528   sqlite3 db test.db
       
   529   execsql {
       
   530     PRAGMA encoding = 'UTF-8';
       
   531     PRAGMA encoding;
       
   532   }
       
   533 } {UTF-16le}
       
   534 
       
   535 # Ticket #1987.
       
   536 # Disallow encoding changes once the encoding has been set.
       
   537 #
       
   538 do_test enc2-10.1 {
       
   539   db close
       
   540   file delete -force test.db test.db-journal
       
   541   sqlite3 db test.db
       
   542   db eval {
       
   543     PRAGMA encoding=UTF16;
       
   544     CREATE TABLE t1(a);
       
   545     PRAGMA encoding=UTF8;
       
   546     CREATE TABLE t2(b);
       
   547   }
       
   548   db close
       
   549   sqlite3 db test.db
       
   550   db eval {
       
   551     SELECT name FROM sqlite_master
       
   552   }
       
   553 } {t1 t2}
       
   554 
       
   555 finish_test