persistentstorage/sqlite3api/TEST/TclScript/enc.test
changeset 0 08ec8eefde2f
equal deleted inserted replaced
-1:000000000000 0:08ec8eefde2f
       
     1 # 2002 May 24
       
     2 #
       
     3 # The author disclaims copyright to this source code.  In place of
       
     4 # a legal notice, here is a blessing:
       
     5 #
       
     6 #    May you do good and not evil.
       
     7 #    May you find forgiveness for yourself and forgive others.
       
     8 #    May you share freely, never taking more than you give.
       
     9 #
       
    10 #***********************************************************************
       
    11 # This file implements regression tests for SQLite library.  The focus of
       
    12 # this file is testing the SQLite routines used for converting between the
       
    13 # various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
       
    14 # UTF-16be).
       
    15 #
       
    16 # $Id: enc.test,v 1.7 2007/05/23 16:23:09 danielk1977 Exp $
       
    17 
       
    18 set testdir [file dirname $argv0]
       
    19 source $testdir/tester.tcl
       
    20 
       
    21 # Skip this test if the build does not support multiple encodings.
       
    22 #
       
    23 ifcapable {!utf16} {
       
    24   finish_test
       
    25   return
       
    26 }
       
    27 
       
    28 proc do_bincmp_test {testname got expect} {
       
    29   binary scan $expect \c* expectvals
       
    30   binary scan $got \c* gotvals
       
    31   do_test $testname [list set dummy $gotvals] $expectvals
       
    32 }
       
    33 
       
    34 # $utf16 is a UTF-16 encoded string. Swap each pair of bytes around
       
    35 # to change the byte-order of the string.
       
    36 proc swap_byte_order {utf16} {
       
    37   binary scan $utf16 \c* ints
       
    38 
       
    39   foreach {a b} $ints {
       
    40     lappend ints2 $b
       
    41     lappend ints2 $a
       
    42   }
       
    43 
       
    44   return [binary format \c* $ints2]
       
    45 }
       
    46 
       
    47 #
       
    48 # Test that the SQLite routines for converting between UTF encodings
       
    49 # produce the same results as their TCL counterparts.
       
    50 #
       
    51 # $testname is the prefix to be used for the test names.
       
    52 # $str is a string to use for testing (encoded in UTF-8, as normal for TCL).
       
    53 #
       
    54 # The test procedure is:
       
    55 # 1. Convert the string from UTF-8 to UTF-16le and check that the TCL and
       
    56 #    SQLite routines produce the same results.
       
    57 #
       
    58 # 2. Convert the string from UTF-8 to UTF-16be and check that the TCL and
       
    59 #    SQLite routines produce the same results.
       
    60 #
       
    61 # 3. Use the SQLite routines to convert the native machine order UTF-16
       
    62 #    representation back to the original UTF-8. Check that the result
       
    63 #    matches the original representation.
       
    64 #
       
    65 # 4. Add a byte-order mark to each of the UTF-16 representations and
       
    66 #    check that the SQLite routines can convert them back to UTF-8.  For
       
    67 #    byte-order mark info, refer to section 3.10 of the unicode standard.
       
    68 #
       
    69 # 5. Take the byte-order marked UTF-16 strings from step 4 and ensure
       
    70 #    that SQLite can convert them both to native byte order UTF-16 
       
    71 #    strings, sans BOM.
       
    72 #
       
    73 # Coverage:
       
    74 #
       
    75 # sqlite_utf8to16be (step 2)
       
    76 # sqlite_utf8to16le (step 1)
       
    77 # sqlite_utf16to8 (steps 3, 4)
       
    78 # sqlite_utf16to16le (step 5)
       
    79 # sqlite_utf16to16be (step 5)
       
    80 #
       
    81 proc test_conversion {testname str} {
       
    82  
       
    83   # Step 1.
       
    84   set utf16le_sqlite3 [test_translate $str UTF8 UTF16LE]
       
    85   set utf16le_tcl [encoding convertto unicode $str]
       
    86   append utf16le_tcl "\x00\x00"
       
    87   if { $::tcl_platform(byteOrder)!="littleEndian" } {
       
    88     set utf16le_tcl [swap_byte_order $utf16le_tcl]
       
    89   }
       
    90   do_bincmp_test $testname.1 $utf16le_sqlite3 $utf16le_tcl
       
    91   set utf16le $utf16le_tcl
       
    92 
       
    93   # Step 2.
       
    94   set utf16be_sqlite3 [test_translate $str UTF8 UTF16BE]
       
    95   set utf16be_tcl [encoding convertto unicode $str]
       
    96   append utf16be_tcl "\x00\x00"
       
    97   if { $::tcl_platform(byteOrder)=="littleEndian" } {
       
    98     set utf16be_tcl [swap_byte_order $utf16be_tcl]
       
    99   }
       
   100   do_bincmp_test $testname.2 $utf16be_sqlite3 $utf16be_tcl
       
   101   set utf16be $utf16be_tcl
       
   102  
       
   103   # Step 3.
       
   104   if { $::tcl_platform(byteOrder)=="littleEndian" } {
       
   105     set utf16 $utf16le
       
   106   } else {
       
   107     set utf16 $utf16be
       
   108   }
       
   109   set utf8_sqlite3 [test_translate $utf16 UTF16 UTF8]
       
   110   do_bincmp_test $testname.3 $utf8_sqlite3 [binarize $str]
       
   111 
       
   112   # Step 4 (little endian).
       
   113   append utf16le_bom "\xFF\xFE" $utf16le
       
   114   set utf8_sqlite3 [test_translate $utf16le_bom UTF16 UTF8 1]
       
   115   do_bincmp_test $testname.4.le $utf8_sqlite3 [binarize $str]
       
   116 
       
   117   # Step 4 (big endian).
       
   118   append utf16be_bom "\xFE\xFF" $utf16be
       
   119   set utf8_sqlite3 [test_translate $utf16be_bom UTF16 UTF8]
       
   120   do_bincmp_test $testname.4.be $utf8_sqlite3 [binarize $str]
       
   121 
       
   122   # Step 5 (little endian to little endian).
       
   123   set utf16_sqlite3 [test_translate $utf16le_bom UTF16LE UTF16LE]
       
   124   do_bincmp_test $testname.5.le.le $utf16_sqlite3 $utf16le
       
   125 
       
   126   # Step 5 (big endian to big endian).
       
   127   set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16BE]
       
   128   do_bincmp_test $testname.5.be.be $utf16_sqlite3 $utf16be
       
   129 
       
   130   # Step 5 (big endian to little endian).
       
   131   set utf16_sqlite3 [test_translate $utf16be_bom UTF16 UTF16LE]
       
   132   do_bincmp_test $testname.5.be.le $utf16_sqlite3 $utf16le
       
   133 
       
   134   # Step 5 (little endian to big endian).
       
   135   set utf16_sqlite3 [test_translate $utf16le_bom UTF16 UTF16BE]
       
   136   do_bincmp_test $testname.5.le.be $utf16_sqlite3 $utf16be
       
   137 }
       
   138 
       
   139 translate_selftest
       
   140 
       
   141 test_conversion enc-1 "hello world"
       
   142 test_conversion enc-2 "sqlite"
       
   143 test_conversion enc-3 ""
       
   144 test_conversion enc-X "\u0100"
       
   145 test_conversion enc-4 "\u1234"
       
   146 test_conversion enc-5 "\u4321abc"
       
   147 test_conversion enc-6 "\u4321\u1234"
       
   148 test_conversion enc-7 [string repeat "abcde\u00EF\u00EE\uFFFCabc" 100]
       
   149 test_conversion enc-8 [string repeat "\u007E\u007F\u0080\u0081" 100]
       
   150 test_conversion enc-9 [string repeat "\u07FE\u07FF\u0800\u0801\uFFF0" 100]
       
   151 test_conversion enc-10 [string repeat "\uE000" 100]
       
   152 
       
   153 proc test_collate {enc zLeft zRight} {
       
   154   return [string compare $zLeft $zRight]
       
   155 }
       
   156 add_test_collate $::DB 0 0 1
       
   157 do_test enc-11.1 {
       
   158   execsql {
       
   159     CREATE TABLE ab(a COLLATE test_collate, b);
       
   160     INSERT INTO ab VALUES(CAST (X'C388' AS TEXT), X'888800');
       
   161     INSERT INTO ab VALUES(CAST (X'C0808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808080808388' AS TEXT), X'888800');
       
   162     CREATE INDEX ab_i ON ab(a, b);
       
   163   }
       
   164 } {}
       
   165 do_test enc-11.2 {
       
   166   set cp200 "\u00C8"
       
   167   execsql {
       
   168     SELECT count(*) FROM ab WHERE a = $::cp200;
       
   169   }
       
   170 } {2}
       
   171 
       
   172 finish_test