64 } |
66 } |
65 |
67 |
66 |
68 |
67 void TestTokenization6(Itk::TestMgr * ) |
69 void TestTokenization6(Itk::TestMgr * ) |
68 { |
70 { |
69 Cpix::AnalyzerExp::Tokenizer tokenizer; |
71 char *xml_file = (char*)__FUNCTION__; |
|
72 assert_failed = 0; |
|
73 Cpix::AnalyzerExp::Tokenizer tokenizer; |
70 Tokens source(tokenizer, |
74 Tokens source(tokenizer, |
71 L"switch { " |
75 L"switch { " |
72 L"case '_docuid', '_mimetype': keywords;" |
76 L"case '_docuid', '_mimetype': keywords;" |
73 L"case '_baseappclass': whitespace>lowercase;" |
77 L"case '_baseappclass': whitespace>lowercase;" |
74 L"default: natural(en); " |
78 L"default: natural(en); " |
75 L"}"); |
79 L"}"); |
76 WhiteSpaceFilter |
80 WhiteSpaceFilter |
77 tokens(source); |
81 tokens(source); |
78 |
82 |
79 while (tokens) PrintToken(tokens++); |
83 while (tokens) PrintToken(tokens++); |
|
84 testResultXml(xml_file); |
80 } |
85 } |
81 |
86 |
82 void TestParsing(Itk::TestMgr* ) |
87 void TestParsing(Itk::TestMgr* ) |
83 { |
88 { |
84 Cpix::AnalyzerExp::Tokenizer tokenizer; |
89 Cpix::AnalyzerExp::Tokenizer tokenizer; |
85 |
90 char *xml_file = (char*)__FUNCTION__; |
|
91 assert_failed = 0; |
86 Tokens source(tokenizer, L"foobar(zap, foo, 'bar', 'a', raboof)"); |
92 Tokens source(tokenizer, L"foobar(zap, foo, 'bar', 'a', raboof)"); |
87 WhiteSpaceFilter tokens(source); |
93 WhiteSpaceFilter tokens(source); |
88 Lexer lexer(tokens); |
94 Lexer lexer(tokens); |
89 |
95 |
90 Tokens source2(tokenizer, L" stdtokens >lowercase>stopwords(fin)>stopwords('a', 'an','the')>stem(fin) "); |
96 Tokens source2(tokenizer, L" stdtokens >lowercase>stopwords(fin)>stopwords('a', 'an','the')>stem(fin) "); |
110 invoke = ParseInvokation(lexer3); |
116 invoke = ParseInvokation(lexer3); |
111 lexer3.eatEof(); |
117 lexer3.eatEof(); |
112 printf("Invoke identifier: %S\n", (invoke->id()).c_str()); |
118 printf("Invoke identifier: %S\n", (invoke->id()).c_str()); |
113 printf("%d parameters\n", invoke->params().size()); |
119 printf("%d parameters\n", invoke->params().size()); |
114 } catch (ParseException& e) { |
120 } catch (ParseException& e) { |
|
121 assert_failed = 1; |
115 printf("ParseException: %S\n", e.wWhat()); |
122 printf("ParseException: %S\n", e.wWhat()); |
116 } catch (LexException& e) { |
123 } catch (LexException& e) { |
|
124 assert_failed = 1; |
117 printf("LexException: %S\n", e.wWhat()); |
125 printf("LexException: %S\n", e.wWhat()); |
118 } |
126 } |
|
127 testResultXml(xml_file); |
119 } |
128 } |
120 |
129 |
121 void TestSwitch(Itk::TestMgr* ) |
130 void TestSwitch(Itk::TestMgr* ) |
122 { |
131 { |
123 Cpix::AnalyzerExp::Tokenizer tokenizer; |
132 Cpix::AnalyzerExp::Tokenizer tokenizer; |
124 |
133 char *xml_file = (char*)__FUNCTION__; |
|
134 assert_failed = 0; |
125 const wchar_t* text; |
135 const wchar_t* text; |
126 Tokens source(tokenizer, text = |
136 Tokens source(tokenizer, text = |
127 L"switch { " |
137 L"switch { " |
128 L"case '_docuid', '_mimetype': keywords;" |
138 L"case '_docuid', '_mimetype': keywords;" |
129 L"case '_baseappclass': whitespace>lowercase;" |
139 L"case '_baseappclass': whitespace>lowercase;" |
148 } |
158 } |
149 printf("default: ...\n");//<<s->def().tokenizer().id()<<"...;"; |
159 printf("default: ...\n");//<<s->def().tokenizer().id()<<"...;"; |
150 } |
160 } |
151 } catch (ParseException& e) { |
161 } catch (ParseException& e) { |
152 // OBS wcout<<L"ParseException: "<<e.describe(text)<<endl; |
162 // OBS wcout<<L"ParseException: "<<e.describe(text)<<endl; |
|
163 assert_failed = 1; |
153 e.setContext(text); |
164 e.setContext(text); |
154 printf("ParseException: %S\n", e.wWhat()); |
165 printf("ParseException: %S\n", e.wWhat()); |
155 } catch (LexException& e) { |
166 } catch (LexException& e) { |
156 // OBS wcout<<L"LexException: "<<e.describe(text)<<endl; |
167 // OBS wcout<<L"LexException: "<<e.describe(text)<<endl; |
|
168 assert_failed = 1; |
157 e.setContext(text); |
169 e.setContext(text); |
158 printf("LexException: %S\n", e.wWhat()); |
170 printf("LexException: %S\n", e.wWhat()); |
159 } |
171 } |
|
172 testResultXml(xml_file); |
160 } |
173 } |
161 |
174 |
162 void TestParsingErrors(Itk::TestMgr* ) |
175 void TestParsingErrors(Itk::TestMgr* ) |
163 { |
176 { |
|
177 char *xml_file = (char*)__FUNCTION__; |
|
178 assert_failed = 0; |
164 Cpix::AnalyzerExp::Tokenizer tokenizer; |
179 Cpix::AnalyzerExp::Tokenizer tokenizer; |
165 // eof |
180 // eof |
166 const wchar_t* text; |
181 const wchar_t* text; |
167 StdLexer eof(tokenizer, text = L"foobar(zap, foo, 'bar', 'raf', do, "); |
182 StdLexer eof(tokenizer, text = L"foobar(zap, foo, 'bar', 'raf', do, "); |
168 try { |
183 try { |
209 } catch (ParseException& e) { |
224 } catch (ParseException& e) { |
210 // OBS wcout<<L"ParseException: "<<e.describe(text)<<endl; |
225 // OBS wcout<<L"ParseException: "<<e.describe(text)<<endl; |
211 e.setContext(text); |
226 e.setContext(text); |
212 printf("ParseException: %S\n", e.wWhat()); |
227 printf("ParseException: %S\n", e.wWhat()); |
213 } |
228 } |
214 |
229 testResultXml(xml_file); |
215 } |
230 } |
216 |
231 |
217 |
232 |
218 const char * CustomAnalyzerTestDocs[] = { |
233 const char * CustomAnalyzerTestDocs[] = { |
219 FILE_TEST_CORPUS_PATH "\\en\\1.txt", |
234 FILE_TEST_CORPUS_PATH "\\en\\1.txt", |
263 } |
278 } |
264 } |
279 } |
265 |
280 |
266 void TestCustomAnalyzers(Itk::TestMgr * testMgr) |
281 void TestCustomAnalyzers(Itk::TestMgr * testMgr) |
267 { |
282 { |
|
283 char *xml_file = (char*)__FUNCTION__; |
|
284 assert_failed = 0; |
268 TestCustomAnalyzer(testMgr, L"stdtokens"); |
285 TestCustomAnalyzer(testMgr, L"stdtokens"); |
269 TestCustomAnalyzer(testMgr, L"whitespace"); |
286 TestCustomAnalyzer(testMgr, L"whitespace"); |
270 TestCustomAnalyzer(testMgr, L"whitespace>lowercase"); |
287 TestCustomAnalyzer(testMgr, L"whitespace>lowercase"); |
271 TestCustomAnalyzer(testMgr, L"whitespace>accent"); |
288 TestCustomAnalyzer(testMgr, L"whitespace>accent"); |
272 TestCustomAnalyzer(testMgr, L"letter"); |
289 TestCustomAnalyzer(testMgr, L"letter"); |
275 TestCustomAnalyzer(testMgr, L"keyword>lowercase"); |
292 TestCustomAnalyzer(testMgr, L"keyword>lowercase"); |
276 TestCustomAnalyzer(testMgr, L"stdtokens>lowercase>accent>stem(en)"); |
293 TestCustomAnalyzer(testMgr, L"stdtokens>lowercase>accent>stem(en)"); |
277 TestCustomAnalyzer(testMgr, L"letter>lowercase>accent>stop(en)"); |
294 TestCustomAnalyzer(testMgr, L"letter>lowercase>accent>stop(en)"); |
278 TestCustomAnalyzer(testMgr, L"letter>lowercase>stop('i', 'oh', 'nyt', 'näin')"); |
295 TestCustomAnalyzer(testMgr, L"letter>lowercase>stop('i', 'oh', 'nyt', 'näin')"); |
279 TestCustomAnalyzer(testMgr, L"letter>length(2, 4)"); |
296 TestCustomAnalyzer(testMgr, L"letter>length(2, 4)"); |
|
297 testResultXml(xml_file); |
280 } |
298 } |
281 |
299 |
282 void TestAnalyzerWithField(Itk::TestMgr * , const wchar_t* definition, const wchar_t* field) |
300 void TestAnalyzerWithField(Itk::TestMgr * , const wchar_t* definition, const wchar_t* field) |
283 { |
301 { |
284 using namespace lucene::analysis; |
302 using namespace lucene::analysis; |
296 _CLDELETE( stream ); |
314 _CLDELETE( stream ); |
297 } |
315 } |
298 |
316 |
299 void TestSwitchAnalyzers(Itk::TestMgr * testMgr) |
317 void TestSwitchAnalyzers(Itk::TestMgr * testMgr) |
300 { |
318 { |
301 const wchar_t* sw = L"\n" |
319 char *xml_file = (char*)__FUNCTION__; |
|
320 assert_failed = 0; |
|
321 const wchar_t* sw = L"\n" |
302 L"switch {\n" |
322 L"switch {\n" |
303 L" case '_docuid': keyword;\n" |
323 L" case '_docuid': keyword;\n" |
304 L" case '_appclass': whitespace>lowercase;\n" |
324 L" case '_appclass': whitespace>lowercase;\n" |
305 L" case 'title', 'message': stdtokens>accent>lowercase>stem(en)>stop(en);\n" |
325 L" case 'title', 'message': stdtokens>accent>lowercase>stem(en)>stop(en);\n" |
306 L" default: letter>lowercase>stop('i');\n" |
326 L" default: letter>lowercase>stop('i');\n" |
308 TestAnalyzerWithField(testMgr, sw, L"_docuid"); |
328 TestAnalyzerWithField(testMgr, sw, L"_docuid"); |
309 TestAnalyzerWithField(testMgr, sw, L"_appclass"); |
329 TestAnalyzerWithField(testMgr, sw, L"_appclass"); |
310 TestAnalyzerWithField(testMgr, sw, L"Title"); |
330 TestAnalyzerWithField(testMgr, sw, L"Title"); |
311 TestAnalyzerWithField(testMgr, sw, L"message"); |
331 TestAnalyzerWithField(testMgr, sw, L"message"); |
312 TestAnalyzerWithField(testMgr, sw, L"field"); |
332 TestAnalyzerWithField(testMgr, sw, L"field"); |
|
333 testResultXml(xml_file); |
313 } |
334 } |
314 |
335 |
315 |
336 |
316 Itk::TesterBase * CreateAnalysisWhiteBoxTests() |
337 Itk::TesterBase * CreateAnalysisWhiteBoxTests() |
317 { |
338 { |
318 using namespace Itk; |
339 using namespace Itk; |
319 |
340 |
320 SuiteTester |
341 SuiteTester |
321 * analysisTests = new SuiteTester("whitebox"); |
342 * analysisTests = new SuiteTester("analysiswhitebox"); |
322 |
343 |
323 analysisTests->add("analyzer", |
344 analysisTests->add("analyzer", |
324 &TestCustomAnalyzers, |
345 &TestCustomAnalyzers, |
325 "analyzer"); |
346 "analyzer"); |
326 analysisTests->add("switchAnalyzer", |
347 analysisTests->add("switchanalyzer", |
327 &TestSwitchAnalyzers, |
348 &TestSwitchAnalyzers, |
328 "switchAnalyzer"); |
349 "switchanalyzer"); |
329 analysisTests->add("tokenization", |
350 analysisTests->add("tokenization", |
330 TestTokenization6, |
351 TestTokenization6, |
331 "tokenization"); |
352 "tokenization"); |
332 analysisTests->add("parsing", |
353 analysisTests->add("parsing", |
333 TestParsing, |
354 TestParsing, |