persistentstorage/sqlite3api/TEST/TclScript/enc2.test
changeset 0 08ec8eefde2f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/persistentstorage/sqlite3api/TEST/TclScript/enc2.test	Fri Jan 22 11:06:30 2010 +0200
@@ -0,0 +1,555 @@
+# 2002 May 24
+#
+# The author disclaims copyright to this source code.  In place of
+# a legal notice, here is a blessing:
+#
+#    May you do good and not evil.
+#    May you find forgiveness for yourself and forgive others.
+#    May you share freely, never taking more than you give.
+#
+#***********************************************************************
+# This file implements regression tests for SQLite library.  The focus of
+# this file is testing the SQLite routines used for converting between the
+# various suported unicode encodings (UTF-8, UTF-16, UTF-16le and
+# UTF-16be).
+#
+# $Id: enc2.test,v 1.29 2007/10/09 08:29:32 danielk1977 Exp $
+
+set testdir [file dirname $argv0]
+source $testdir/tester.tcl
+
+# If UTF16 support is disabled, ignore the tests in this file
+#
+ifcapable {!utf16} {
+  finish_test
+  return
+}
+
+# The rough organisation of tests in this file is:
+#
+# enc2.1.*: Simple tests with a UTF-8 db.
+# enc2.2.*: Simple tests with a UTF-16LE db.
+# enc2.3.*: Simple tests with a UTF-16BE db.
+# enc2.4.*: Test that attached databases must have the same text encoding
+#           as the main database.
+# enc2.5.*: Test the behaviour of the library when a collation sequence is
+#           not available for the most desirable text encoding.
+# enc2.6.*: Similar test for user functions.
+# enc2.7.*: Test that the VerifyCookie opcode protects against assuming the
+#           wrong text encoding for the database.
+# enc2.8.*: Test sqlite3_complete16()
+#
+
+db close
+
+# Return the UTF-8 representation of the supplied UTF-16 string $str. 
+proc utf8 {str} {
+  # If $str ends in two 0x00 0x00 bytes, knock these off before
+  # converting to UTF-8 using TCL.
+  binary scan $str \c* vals
+  if {[lindex $vals end]==0 && [lindex $vals end-1]==0} {
+    set str [binary format \c* [lrange $vals 0 end-2]]
+  }
+
+  set r [encoding convertfrom unicode $str]
+  return $r
+}
+
+#
+# This proc contains all the tests in this file. It is run
+# three times. Each time the file 'test.db' contains a database
+# with the following contents:
+set dbcontents {
+  CREATE TABLE t1(a PRIMARY KEY, b, c);
+  INSERT INTO t1 VALUES('one', 'I', 1);
+}
+# This proc tests that we can open and manipulate the test.db 
+# database, and that it is possible to retreive values in
+# various text encodings.
+#
+proc run_test_script {t enc} {
+
+# Open the database and pull out a (the) row.
+do_test $t.1 {
+  sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
+  execsql {SELECT * FROM t1}
+} {one I 1}
+
+# Insert some data
+do_test $t.2 {
+  execsql {INSERT INTO t1 VALUES('two', 'II', 2);}
+  execsql {SELECT * FROM t1}
+} {one I 1 two II 2}
+
+# Insert some data 
+do_test $t.3 {
+  execsql {
+    INSERT INTO t1 VALUES('three','III',3);
+    INSERT INTO t1 VALUES('four','IV',4);
+    INSERT INTO t1 VALUES('five','V',5);
+  }
+  execsql {SELECT * FROM t1}
+} {one I 1 two II 2 three III 3 four IV 4 five V 5}
+
+# Use the index
+do_test $t.4 {
+  execsql {
+    SELECT * FROM t1 WHERE a = 'one';
+  }
+} {one I 1}
+do_test $t.5 {
+  execsql {
+    SELECT * FROM t1 WHERE a = 'four';
+  }
+} {four IV 4}
+ifcapable subquery {
+  do_test $t.6 {
+    execsql {
+      SELECT * FROM t1 WHERE a IN ('one', 'two');
+    }
+  } {one I 1 two II 2}
+}
+
+# Now check that we can retrieve data in both UTF-16 and UTF-8
+do_test $t.7 {
+  set STMT [sqlite3_prepare $DB "SELECT a FROM t1 WHERE c>3;" -1 TAIL]
+  sqlite3_step $STMT
+  sqlite3_column_text $STMT 0
+} {four}
+
+do_test $t.8 {
+  sqlite3_step $STMT
+  utf8 [sqlite3_column_text16 $STMT 0]
+} {five}
+
+do_test $t.9 {
+  sqlite3_finalize $STMT
+} SQLITE_OK
+
+ifcapable vacuum {
+  execsql VACUUM
+}
+
+do_test $t.10 {
+  db eval {PRAGMA encoding}
+} $enc
+
+}
+
+# The three unicode encodings understood by SQLite.
+set encodings [list UTF-8 UTF-16le UTF-16be]
+
+set sqlite_os_trace 0
+set i 1
+foreach enc $encodings {
+  file delete -force test.db
+  sqlite3 db test.db
+  db eval "PRAGMA encoding = \"$enc\""
+  execsql $dbcontents
+  do_test enc2-$i.0.1 {
+    db eval {PRAGMA encoding}
+  } $enc
+  do_test enc2-$i.0.2 {
+    db eval {PRAGMA encoding=UTF8}
+    db eval {PRAGMA encoding}
+  } $enc
+  do_test enc2-$i.0.3 {
+    db eval {PRAGMA encoding=UTF16le}
+    db eval {PRAGMA encoding}
+  } $enc
+  do_test enc2-$i.0.4 {
+    db eval {PRAGMA encoding=UTF16be}
+    db eval {PRAGMA encoding}
+  } $enc
+
+  db close
+  run_test_script enc2-$i $enc
+  db close
+  incr i
+}
+
+# Test that it is an error to try to attach a database with a different
+# encoding to the main database.
+ifcapable attach {
+  do_test enc2-4.1 {
+    file delete -force test.db
+    sqlite3 db test.db
+    db eval "PRAGMA encoding = 'UTF-8'"
+    db eval "CREATE TABLE abc(a, b, c);"
+  } {}
+  do_test enc2-4.2 {
+    file delete -force test2.db
+    sqlite3 db2 test2.db
+    db2 eval "PRAGMA encoding = 'UTF-16'"
+    db2 eval "CREATE TABLE abc(a, b, c);"
+  } {}
+  do_test enc2-4.3 {
+    catchsql {
+      ATTACH 'test2.db' as aux;
+    }
+  } {1 {attached databases must use the same text encoding as main database}}
+  db2 close
+  db close
+}
+
+# The following tests - enc2-5.* - test that SQLite selects the correct
+# collation sequence when more than one is available.
+
+set ::values [list one two three four five]
+set ::test_collate_enc INVALID
+proc test_collate {enc lhs rhs} {
+  set ::test_collate_enc $enc
+  set l [lsearch -exact $::values $lhs]
+  set r [lsearch -exact $::values $rhs]
+  set res [expr $l - $r]
+  # puts "enc=$enc lhs=$lhs/$l rhs=$rhs/$r res=$res"
+  return $res
+}
+
+file delete -force test.db
+sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
+do_test enc2-5.0 {
+  execsql {
+    CREATE TABLE t5(a);
+    INSERT INTO t5 VALUES('one');
+    INSERT INTO t5 VALUES('two');
+    INSERT INTO t5 VALUES('five');
+    INSERT INTO t5 VALUES('three');
+    INSERT INTO t5 VALUES('four');
+  }
+} {}
+do_test enc2-5.1 {
+  add_test_collate $DB 1 1 1
+  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate;}]
+  lappend res $::test_collate_enc
+} {one two three four five UTF-8}
+do_test enc2-5.2 {
+  add_test_collate $DB 0 1 0
+  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
+  lappend res $::test_collate_enc
+} {one two three four five UTF-16LE}
+do_test enc2-5.3 {
+  add_test_collate $DB 0 0 1
+  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
+  lappend res $::test_collate_enc
+} {one two three four five UTF-16BE}
+
+db close
+file delete -force test.db
+sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
+execsql {pragma encoding = 'UTF-16LE'}
+do_test enc2-5.4 {
+  execsql {
+    CREATE TABLE t5(a);
+    INSERT INTO t5 VALUES('one');
+    INSERT INTO t5 VALUES('two');
+    INSERT INTO t5 VALUES('five');
+    INSERT INTO t5 VALUES('three');
+    INSERT INTO t5 VALUES('four');
+  }
+} {}
+do_test enc2-5.5 {
+  add_test_collate $DB 1 1 1
+  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
+  lappend res $::test_collate_enc
+} {one two three four five UTF-16LE}
+do_test enc2-5.6 {
+  add_test_collate $DB 1 0 1
+  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
+  lappend res $::test_collate_enc
+} {one two three four five UTF-16BE}
+do_test enc2-5.7 {
+  add_test_collate $DB 1 0 0
+  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
+  lappend res $::test_collate_enc
+} {one two three four five UTF-8}
+
+db close
+file delete -force test.db
+sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
+execsql {pragma encoding = 'UTF-16BE'}
+do_test enc2-5.8 {
+  execsql {
+    CREATE TABLE t5(a);
+    INSERT INTO t5 VALUES('one');
+    INSERT INTO t5 VALUES('two');
+    INSERT INTO t5 VALUES('five');
+    INSERT INTO t5 VALUES('three');
+    INSERT INTO t5 VALUES('four');
+  }
+} {}
+do_test enc2-5.9 {
+  add_test_collate $DB 1 1 1
+  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
+  lappend res $::test_collate_enc
+} {one two three four five UTF-16BE}
+do_test enc2-5.10 {
+  add_test_collate $DB 1 1 0
+  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
+  lappend res $::test_collate_enc
+} {one two three four five UTF-16LE}
+do_test enc2-5.11 {
+  add_test_collate $DB 1 0 0
+  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate}]
+  lappend res $::test_collate_enc
+} {one two three four five UTF-8}
+
+# Also test that a UTF-16 collation factory works.
+do_test enc2-5-12 {
+  add_test_collate $DB 0 0 0
+  catchsql {
+    SELECT * FROM t5 ORDER BY 1 COLLATE test_collate
+  }
+} {1 {no such collation sequence: test_collate}}
+do_test enc2-5.13 {
+  add_test_collate_needed $DB 
+  set res [execsql {SELECT * FROM t5 ORDER BY 1 COLLATE test_collate; }]
+  lappend res $::test_collate_enc
+} {one two three four five UTF-16BE}
+do_test enc2-5.14 {
+  set ::sqlite_last_needed_collation
+} test_collate
+
+db close
+file delete -force test.db
+
+do_test enc2-5.15 {
+  sqlite3 db test.db; set ::DB [sqlite3_connection_pointer db]
+  add_test_collate_needed $::DB
+  set ::sqlite_last_needed_collation
+} {}
+do_test enc2-5.16 {
+  execsql {CREATE TABLE t1(a varchar collate test_collate);}
+} {}
+do_test enc2-5.17 {
+  set ::sqlite_last_needed_collation
+} {test_collate}
+
+# The following tests - enc2-6.* - test that SQLite selects the correct
+# user function when more than one is available.
+
+proc test_function {enc arg} {
+  return "$enc $arg"
+}
+
+db close
+file delete -force test.db
+sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
+execsql {pragma encoding = 'UTF-8'}
+do_test enc2-6.0 {
+  execsql {
+    CREATE TABLE t5(a);
+    INSERT INTO t5 VALUES('one');
+  }
+} {}
+do_test enc2-6.1 {
+  add_test_function $DB 1 1 1
+  execsql {
+    SELECT test_function('sqlite')
+  }
+} {{UTF-8 sqlite}}
+db close
+sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
+do_test enc2-6.2 {
+  add_test_function $DB 0 1 0
+  execsql {
+    SELECT test_function('sqlite')
+  }
+} {{UTF-16LE sqlite}}
+db close
+sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
+do_test enc2-6.3 {
+  add_test_function $DB 0 0 1
+  execsql {
+    SELECT test_function('sqlite')
+  }
+} {{UTF-16BE sqlite}}
+
+db close
+file delete -force test.db
+sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
+execsql {pragma encoding = 'UTF-16LE'}
+do_test enc2-6.3 {
+  execsql {
+    CREATE TABLE t5(a);
+    INSERT INTO t5 VALUES('sqlite');
+  }
+} {}
+do_test enc2-6.4 {
+  add_test_function $DB 1 1 1
+  execsql {
+    SELECT test_function('sqlite')
+  }
+} {{UTF-16LE sqlite}}
+db close
+sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
+do_test enc2-6.5 {
+  add_test_function $DB 0 1 0
+  execsql {
+    SELECT test_function('sqlite')
+  }
+} {{UTF-16LE sqlite}}
+db close
+sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
+do_test enc2-6.6 {
+  add_test_function $DB 0 0 1
+  execsql {
+    SELECT test_function('sqlite')
+  }
+} {{UTF-16BE sqlite}}
+
+db close
+file delete -force test.db
+sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
+execsql {pragma encoding = 'UTF-16BE'}
+do_test enc2-6.7 {
+  execsql {
+    CREATE TABLE t5(a);
+    INSERT INTO t5 VALUES('sqlite');
+  }
+} {}
+do_test enc2-6.8 {
+  add_test_function $DB 1 1 1
+  execsql {
+    SELECT test_function('sqlite')
+  }
+} {{UTF-16BE sqlite}}
+db close
+sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
+do_test enc2-6.9 {
+  add_test_function $DB 0 1 0
+  execsql {
+    SELECT test_function('sqlite')
+  }
+} {{UTF-16LE sqlite}}
+db close
+sqlite3 db test.db; set DB [sqlite3_connection_pointer db]
+do_test enc2-6.10 {
+  add_test_function $DB 0 0 1
+  execsql {
+    SELECT test_function('sqlite')
+  }
+} {{UTF-16BE sqlite}}
+
+
+db close
+file delete -force test.db
+
+# The following tests - enc2-7.* - function as follows:
+#
+# 1: Open an empty database file assuming UTF-16 encoding.
+# 2: Open the same database with a different handle assuming UTF-8. Create
+#    a table using this handle.
+# 3: Read the sqlite_master table from the first handle. 
+# 4: Ensure the first handle recognises the database encoding is UTF-8.
+#
+do_test enc2-7.1 {
+  sqlite3 db test.db
+  execsql {
+    PRAGMA encoding = 'UTF-16';
+    SELECT * FROM sqlite_master;
+  }
+} {}
+do_test enc2-7.2 {
+  set enc [execsql {
+    PRAGMA encoding;
+  }]
+  string range $enc 0 end-2 ;# Chop off the "le" or "be"
+} {UTF-16}
+do_test enc2-7.3 {
+  sqlite3 db2 test.db
+  execsql {
+    PRAGMA encoding = 'UTF-8';
+    CREATE TABLE abc(a, b, c);
+  } db2
+} {}
+do_test enc2-7.4 {
+  execsql {
+    SELECT * FROM sqlite_master;
+  }
+} "table abc abc [expr $AUTOVACUUM?3:2] {CREATE TABLE abc(a, b, c)}"
+do_test enc2-7.5 {
+  execsql {
+    PRAGMA encoding;
+  }
+} {UTF-8}
+
+db close
+db2 close
+
+proc utf16 {utf8} {
+  set utf16 [encoding convertto unicode $utf8]
+  append utf16 "\x00\x00"
+  return $utf16
+}
+ifcapable {complete} {
+  do_test enc2-8.1 {
+    sqlite3_complete16 [utf16 "SELECT * FROM t1;"]
+  } {1}
+  do_test enc2-8.2 {
+    sqlite3_complete16 [utf16 "SELECT * FROM"]
+  } {0}
+}
+
+# Test that the encoding of an empty database may still be set after the
+# (empty) schema has been initialized.
+file delete -force test.db
+do_test enc2-9.1 {
+  sqlite3 db test.db
+  execsql {
+    PRAGMA encoding = 'UTF-8';
+    PRAGMA encoding;
+  }
+} {UTF-8}
+do_test enc2-9.2 {
+  sqlite3 db test.db
+  execsql {
+    PRAGMA encoding = 'UTF-16le';
+    PRAGMA encoding;
+  }
+} {UTF-16le}
+do_test enc2-9.3 {
+  sqlite3 db test.db
+  execsql {
+    SELECT * FROM sqlite_master;
+    PRAGMA encoding = 'UTF-8';
+    PRAGMA encoding;
+  }
+} {UTF-8}
+do_test enc2-9.4 {
+  sqlite3 db test.db
+  execsql {
+    PRAGMA encoding = 'UTF-16le';
+    CREATE TABLE abc(a, b, c);
+    PRAGMA encoding;
+  }
+} {UTF-16le}
+do_test enc2-9.5 {
+  sqlite3 db test.db
+  execsql {
+    PRAGMA encoding = 'UTF-8';
+    PRAGMA encoding;
+  }
+} {UTF-16le}
+
+# Ticket #1987.
+# Disallow encoding changes once the encoding has been set.
+#
+do_test enc2-10.1 {
+  db close
+  file delete -force test.db test.db-journal
+  sqlite3 db test.db
+  db eval {
+    PRAGMA encoding=UTF16;
+    CREATE TABLE t1(a);
+    PRAGMA encoding=UTF8;
+    CREATE TABLE t2(b);
+  }
+  db close
+  sqlite3 db test.db
+  db eval {
+    SELECT name FROM sqlite_master
+  }
+} {t1 t2}
+
+finish_test