|
1 # 2007 April 26 |
|
2 # |
|
3 # The author disclaims copyright to this source code. |
|
4 # |
|
5 #************************************************************************* |
|
6 # This file implements tests for prefix-searching in the fts3 |
|
7 # component of the SQLite library. |
|
8 # |
|
9 # $Id: fts3an.test,v 1.2 2007/12/13 21:54:11 drh Exp $ |
|
10 # |
|
11 |
|
12 set testdir [file dirname $argv0] |
|
13 source $testdir/tester.tcl |
|
14 |
|
15 # If SQLITE_ENABLE_FTS3 is defined, omit this file. |
|
16 ifcapable !fts3 { |
|
17 finish_test |
|
18 return |
|
19 } |
|
20 |
|
21 # A large string to prime the pump with. |
|
22 set text { |
|
23 Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Maecenas |
|
24 iaculis mollis ipsum. Praesent rhoncus placerat justo. Duis non quam |
|
25 sed turpis posuere placerat. Curabitur et lorem in lorem porttitor |
|
26 aliquet. Pellentesque bibendum tincidunt diam. Vestibulum blandit |
|
27 ante nec elit. In sapien diam, facilisis eget, dictum sed, viverra |
|
28 at, felis. Vestibulum magna. Sed magna dolor, vestibulum rhoncus, |
|
29 ornare vel, vulputate sit amet, felis. Integer malesuada, tellus at |
|
30 luctus gravida, diam nunc porta nibh, nec imperdiet massa metus eu |
|
31 lectus. Aliquam nisi. Nunc fringilla nulla at lectus. Suspendisse |
|
32 potenti. Cum sociis natoque penatibus et magnis dis parturient |
|
33 montes, nascetur ridiculus mus. Pellentesque odio nulla, feugiat eu, |
|
34 suscipit nec, consequat quis, risus. |
|
35 } |
|
36 |
|
37 db eval { |
|
38 CREATE VIRTUAL TABLE t1 USING fts3(c); |
|
39 |
|
40 INSERT INTO t1(rowid, c) VALUES(1, $text); |
|
41 INSERT INTO t1(rowid, c) VALUES(2, 'Another lovely row'); |
|
42 } |
|
43 |
|
44 # Exact match |
|
45 do_test fts3an-1.1 { |
|
46 execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem'" |
|
47 } {1} |
|
48 |
|
49 # And a prefix |
|
50 do_test fts3an-1.2 { |
|
51 execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore*'" |
|
52 } {1} |
|
53 |
|
54 # Prefix includes exact match |
|
55 do_test fts3an-1.3 { |
|
56 execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lorem*'" |
|
57 } {1} |
|
58 |
|
59 # Make certain everything isn't considered a prefix! |
|
60 do_test fts3an-1.4 { |
|
61 execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lore'" |
|
62 } {} |
|
63 |
|
64 # Prefix across multiple rows. |
|
65 do_test fts3an-1.5 { |
|
66 execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo*'" |
|
67 } {1 2} |
|
68 |
|
69 # Likewise, with multiple hits in one document. |
|
70 do_test fts3an-1.6 { |
|
71 execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'l*'" |
|
72 } {1 2} |
|
73 |
|
74 # Prefix which should only hit one document. |
|
75 do_test fts3an-1.7 { |
|
76 execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lov*'" |
|
77 } {2} |
|
78 |
|
79 # * not at end is dropped. |
|
80 do_test fts3an-1.8 { |
|
81 execsql "SELECT rowid FROM t1 WHERE t1 MATCH 'lo *'" |
|
82 } {} |
|
83 |
|
84 # Stand-alone * is dropped. |
|
85 do_test fts3an-1.9 { |
|
86 execsql "SELECT rowid FROM t1 WHERE t1 MATCH '*'" |
|
87 } {} |
|
88 |
|
89 # Phrase-query prefix. |
|
90 do_test fts3an-1.10 { |
|
91 execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r*\"'" |
|
92 } {2} |
|
93 do_test fts3an-1.11 { |
|
94 execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"lovely r\"'" |
|
95 } {} |
|
96 |
|
97 # Phrase query with multiple prefix matches. |
|
98 do_test fts3an-1.12 { |
|
99 execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l*\"'" |
|
100 } {1 2} |
|
101 |
|
102 # Phrase query with multiple prefix matches. |
|
103 do_test fts3an-1.13 { |
|
104 execsql "SELECT rowid FROM t1 WHERE t1 MATCH '\"a* l* row\"'" |
|
105 } {2} |
|
106 |
|
107 |
|
108 |
|
109 |
|
110 # Test across updates (and, by implication, deletes). |
|
111 |
|
112 # Version of text without "lorem". |
|
113 regsub -all {[Ll]orem} $text '' ntext |
|
114 |
|
115 db eval { |
|
116 CREATE VIRTUAL TABLE t2 USING fts3(c); |
|
117 |
|
118 INSERT INTO t2(rowid, c) VALUES(1, $text); |
|
119 INSERT INTO t2(rowid, c) VALUES(2, 'Another lovely row'); |
|
120 UPDATE t2 SET c = $ntext WHERE rowid = 1; |
|
121 } |
|
122 |
|
123 # Can't see lorem as an exact match. |
|
124 do_test fts3an-2.1 { |
|
125 execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lorem'" |
|
126 } {} |
|
127 |
|
128 # Can't see a prefix of lorem, either. |
|
129 do_test fts3an-2.2 { |
|
130 execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lore*'" |
|
131 } {} |
|
132 |
|
133 # Can see lovely in the other document. |
|
134 do_test fts3an-2.3 { |
|
135 execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lo*'" |
|
136 } {2} |
|
137 |
|
138 # Can still see other hits. |
|
139 do_test fts3an-2.4 { |
|
140 execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'l*'" |
|
141 } {1 2} |
|
142 |
|
143 # Prefix which should only hit one document. |
|
144 do_test fts3an-2.5 { |
|
145 execsql "SELECT rowid FROM t2 WHERE t2 MATCH 'lov*'" |
|
146 } {2} |
|
147 |
|
148 |
|
149 |
|
150 # Test with a segment which will have multiple levels in the tree. |
|
151 |
|
152 # Build a big document with lots of unique terms. |
|
153 set bigtext $text |
|
154 foreach c {a b c d e} { |
|
155 regsub -all {[A-Za-z]+} $bigtext "&$c" t |
|
156 append bigtext $t |
|
157 } |
|
158 |
|
159 # Populate a table with many copies of the big document, so that we |
|
160 # can test the number of hits found. Populate $ret with the expected |
|
161 # hit counts for each row. offsets() returns 4 elements for every |
|
162 # hit. We'll have 6 hits for row 1, 1 for row 2, and 6*(2^5)==192 for |
|
163 # $bigtext. |
|
164 set ret {6 1} |
|
165 db eval { |
|
166 BEGIN; |
|
167 CREATE VIRTUAL TABLE t3 USING fts3(c); |
|
168 |
|
169 INSERT INTO t3(rowid, c) VALUES(1, $text); |
|
170 INSERT INTO t3(rowid, c) VALUES(2, 'Another lovely row'); |
|
171 } |
|
172 for {set i 0} {$i<100} {incr i} { |
|
173 db eval {INSERT INTO t3(rowid, c) VALUES(3+$i, $bigtext)} |
|
174 lappend ret 192 |
|
175 } |
|
176 db eval {COMMIT;} |
|
177 |
|
178 # Test that we get the expected number of hits. |
|
179 do_test fts3an-3.1 { |
|
180 set t {} |
|
181 db eval {SELECT offsets(t3) as o FROM t3 WHERE t3 MATCH 'l*'} { |
|
182 set l [llength $o] |
|
183 lappend t [expr {$l/4}] |
|
184 } |
|
185 set t |
|
186 } $ret |
|
187 |
|
188 # TODO(shess) It would be useful to test a couple edge cases, but I |
|
189 # don't know if we have the precision to manage it from here at this |
|
190 # time. Prefix hits can cross leaves, which the code above _should_ |
|
191 # hit by virtue of size. There are two variations on this. If the |
|
192 # tree is 2 levels high, the code will find the leaf-node extent |
|
193 # directly, but if it is higher, the code will have to follow two |
|
194 # separate interior branches down the tree. Both should be tested. |
|
195 |
|
196 finish_test |