persistentstorage/sqlite3api/TEST/TclScript/fts3an.test
changeset 0 08ec8eefde2f
equal deleted inserted replaced
-1:000000000000 0:08ec8eefde2f
       
     1 # 2007 April 26
       
     2 #
       
     3 # The author disclaims copyright to this source code.
       
     4 #
       
     5 #*************************************************************************
       
     6 # This file implements tests for prefix-searching in the fts3
       
     7 # component of the SQLite library.
       
     8 #
       
     9 # $Id: fts3an.test,v 1.2 2007/12/13 21:54:11 drh Exp $
       
    10 #
       
    11 
       
    12 set testdir [file dirname $argv0]
       
    13 source $testdir/tester.tcl
       
    14 
       
    15 # If SQLITE_ENABLE_FTS3 is defined, omit this file.
       
    16 ifcapable !fts3 {
       
    17   finish_test
       
    18   return
       
    19 }
       
    20 
       
    21 # A large string to prime the pump with.
       
    22 set text {
       
    23   Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas
       
    24   iaculis mollis ipsum. Praesent rhoncus placerat justo. Duis non quam
       
    25   sed turpis posuere placerat. Curabitur et lorem in lorem porttitor
       
    26   aliquet. Pellentesque bibendum tincidunt diam. Vestibulum blandit
       
    27   ante nec elit. In sapien diam, facilisis eget, dictum sed, viverra
       
    28   at, felis. Vestibulum magna. Sed magna dolor, vestibulum rhoncus,
       
    29   ornare vel, vulputate sit amet, felis. Integer malesuada, tellus at
       
    30   luctus gravida, diam nunc porta nibh, nec imperdiet massa metus eu
       
    31   lectus. Aliquam nisi. Nunc fringilla nulla at lectus. Suspendisse
       
    32   potenti. Cum sociis natoque penatibus et magnis dis parturient
       
    33   montes, nascetur ridiculus mus. Pellentesque odio nulla, feugiat eu,
       
    34   suscipit nec, consequat quis, risus.
       
    35 }
       
    36 
       
    37 db eval {
       
    38   CREATE VIRTUAL TABLE t1 USING fts3(c);
       
    39 
       
    40   INSERT INTO t1(rowid, c) VALUES(1, $text);
       
    41   INSERT INTO t1(rowid, c) VALUES(2, 'Another lovely row');
       
    42 }
       
    43 
       
    44 # Exact match
       
    45 do_test fts3an-1.1 {
       
    46   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem'"
       
    47 } {1}
       
    48 
       
    49 # And a prefix
       
    50 do_test fts3an-1.2 {
       
    51   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore*'"
       
    52 } {1}
       
    53 
       
    54 # Prefix includes exact match
       
    55 do_test fts3an-1.3 {
       
    56   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem*'"
       
    57 } {1}
       
    58 
       
    59 # Make certain everything isn't considered a prefix!
       
    60 do_test fts3an-1.4 {
       
    61   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore'"
       
    62 } {}
       
    63 
       
    64 # Prefix across multiple rows.
       
    65 do_test fts3an-1.5 {
       
    66   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo*'"
       
    67 } {1 2}
       
    68 
       
    69 # Likewise, with multiple hits in one document.
       
    70 do_test fts3an-1.6 {
       
    71   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'l*'"
       
    72 } {1 2}
       
    73 
       
    74 # Prefix which should only hit one document.
       
    75 do_test fts3an-1.7 {
       
    76   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lov*'"
       
    77 } {2}
       
    78 
       
    79 # * not at end is dropped.
       
    80 do_test fts3an-1.8 {
       
    81   execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo *'"
       
    82 } {}
       
    83 
       
    84 # Stand-alone * is dropped.
       
    85 do_test fts3an-1.9 {
       
    86   execsql "SELECT rowid FROM t1 WHERE t1 MATCH '*'"
       
    87 } {}
       
    88 
       
    89 # Phrase-query prefix.
       
    90 do_test fts3an-1.10 {
       
    91   execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r*\"'"
       
    92 } {2}
       
    93 do_test fts3an-1.11 {
       
    94   execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r\"'"
       
    95 } {}
       
    96 
       
    97 # Phrase query with multiple prefix matches.
       
    98 do_test fts3an-1.12 {
       
    99   execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l*\"'"
       
   100 } {1 2}
       
   101 
       
   102 # Phrase query with multiple prefix matches.
       
   103 do_test fts3an-1.13 {
       
   104   execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l* row\"'"
       
   105 } {2}
       
   106 
       
   107 
       
   108 
       
   109 
       
   110 # Test across updates (and, by implication, deletes).
       
   111 
       
   112 # Version of text without "lorem".
       
   113 regsub -all {[Ll]orem} $text '' ntext
       
   114 
       
   115 db eval {
       
   116   CREATE VIRTUAL TABLE t2 USING fts3(c);
       
   117 
       
   118   INSERT INTO t2(rowid, c) VALUES(1, $text);
       
   119   INSERT INTO t2(rowid, c) VALUES(2, 'Another lovely row');
       
   120   UPDATE t2 SET c = $ntext WHERE rowid = 1;
       
   121 }
       
   122 
       
   123 # Can't see lorem as an exact match.
       
   124 do_test fts3an-2.1 {
       
   125   execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lorem'"
       
   126 } {}
       
   127 
       
   128 # Can't see a prefix of lorem, either.
       
   129 do_test fts3an-2.2 {
       
   130   execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lore*'"
       
   131 } {}
       
   132 
       
   133 # Can see lovely in the other document.
       
   134 do_test fts3an-2.3 {
       
   135   execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lo*'"
       
   136 } {2}
       
   137 
       
   138 # Can still see other hits.
       
   139 do_test fts3an-2.4 {
       
   140   execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'l*'"
       
   141 } {1 2}
       
   142 
       
   143 # Prefix which should only hit one document.
       
   144 do_test fts3an-2.5 {
       
   145   execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lov*'"
       
   146 } {2}
       
   147 
       
   148 
       
   149 
       
   150 # Test with a segment which will have multiple levels in the tree.
       
   151 
       
   152 # Build a big document with lots of unique terms.
       
   153 set bigtext $text
       
   154 foreach c {a b c d e} {
       
   155   regsub -all {[A-Za-z]+} $bigtext "&$c" t
       
   156   append bigtext $t
       
   157 }
       
   158 
       
   159 # Populate a table with many copies of the big document, so that we
       
   160 # can test the number of hits found.  Populate $ret with the expected
       
   161 # hit counts for each row.  offsets() returns 4 elements for every
       
   162 # hit.  We'll have 6 hits for row 1, 1 for row 2, and 6*(2^5)==192 for
       
   163 # $bigtext.
       
   164 set ret {6 1}
       
   165 db eval {
       
   166   BEGIN;
       
   167   CREATE VIRTUAL TABLE t3 USING fts3(c);
       
   168 
       
   169   INSERT INTO t3(rowid, c) VALUES(1, $text);
       
   170   INSERT INTO t3(rowid, c) VALUES(2, 'Another lovely row');
       
   171 }
       
   172 for {set i 0} {$i<100} {incr i} {
       
   173   db eval {INSERT INTO t3(rowid, c) VALUES(3+$i, $bigtext)}
       
   174   lappend ret 192
       
   175 }
       
   176 db eval {COMMIT;}
       
   177 
       
   178 # Test that we get the expected number of hits.
       
   179 do_test fts3an-3.1 {
       
   180   set t {}
       
   181   db eval {SELECT offsets(t3) as o FROM t3 WHERE t3 MATCH 'l*'} {
       
   182     set l [llength $o]
       
   183     lappend t [expr {$l/4}]
       
   184   }
       
   185   set t
       
   186 } $ret
       
   187 
       
   188 # TODO(shess) It would be useful to test a couple edge cases, but I
       
   189 # don't know if we have the precision to manage it from here at this
       
   190 # time.  Prefix hits can cross leaves, which the code above _should_
       
   191 # hit by virtue of size.  There are two variations on this.  If the
       
   192 # tree is 2 levels high, the code will find the leaf-node extent
       
   193 # directly, but if it is higher, the code will have to follow two
       
   194 # separate interior branches down the tree.  Both should be tested.
       
   195 
       
   196 finish_test