author | hgs |
Mon, 28 Jun 2010 10:34:53 +0530 | |
changeset 8 | 6547bf8ca13a |
parent 7 | a5fbfefd615f |
permissions | -rw-r--r-- |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
1 |
#include <wchar.h> |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
2 |
#include <stddef.h> |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
3 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
4 |
#include <iostream> |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
5 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
6 |
#include "cpixidxdb.h" |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
7 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
8 |
#include "itk.h" |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
9 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
10 |
#include "config.h" |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
11 |
#include "testutils.h" |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
12 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
13 |
// For testing custom analyzer |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
14 |
#include "CLucene.h" |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
15 |
#include "CLucene\analysis\AnalysisHeader.h" |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
16 |
#include "CLucene\util\stringreader.h" |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
17 |
#include "analyzerexp.h" |
8 | 18 |
#include "customanalyzer.h" |
19 |
||
20 |
#include "localetestinfos.h" |
|
21 |
||
22 |
#include "spi/locale.h" |
|
23 |
#include "cpixstrtools.h" |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
24 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
25 |
using namespace Cpt::Lex; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
26 |
using namespace Cpt::Parser; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
27 |
using namespace Cpix::AnalyzerExp; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
28 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
29 |
void PrintToken(Cpt::Lex::Token token) { |
8 | 30 |
printf("%S('%S')", token.type(), token.text()); |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
31 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
32 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
33 |
|
8 | 34 |
void TestTokenization6(Itk::TestMgr * testMgr) |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
35 |
{ |
8 | 36 |
Cpix::AnalyzerExp::Tokenizer tokenizer; |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
37 |
Tokens source(tokenizer, |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
38 |
L"switch { " |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
39 |
L"case '_docuid', '_mimetype': keywords;" |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
40 |
L"case '_baseappclass': whitespace>lowercase;" |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
41 |
L"default: natural(en); " |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
42 |
L"}"); |
8 | 43 |
StdFilter |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
44 |
tokens(source); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
45 |
|
8 | 46 |
while (tokens) PrintToken(tokens++); |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
47 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
48 |
|
8 | 49 |
void TestParsing(Itk::TestMgr* mgr) |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
50 |
{ |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
51 |
Cpix::AnalyzerExp::Tokenizer tokenizer; |
8 | 52 |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
53 |
Tokens source(tokenizer, L"foobar(zap, foo, 'bar', 'a', raboof)"); |
8 | 54 |
StdFilter tokens(source); |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
55 |
Lexer lexer(tokens); |
8 | 56 |
|
57 |
const wchar_t* text = L" stdtokens >lowercase>stopwords(fin)>stopwords('a', 'an','the')>stem(fin) "; |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
58 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
59 |
Tokens source3(tokenizer, L"foobar(zap, 0, 0.0045, 4, 'a', 9223.031)"); |
8 | 60 |
StdFilter tokens3(source3); |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
61 |
Lexer lexer3(tokens3); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
62 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
63 |
try { |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
64 |
auto_ptr<Invokation> invoke = ParseInvokation(lexer); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
65 |
lexer.eatEof(); |
8 | 66 |
printf("Invoke identifier: %S\n", invoke->id()); |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
67 |
printf("%d parameters\n", invoke->params().size()); |
8 | 68 |
auto_ptr<Piping> piping = ParsePiping(text); |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
69 |
printf("piping done.\n"); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
70 |
if (dynamic_cast<const Invokation*>(&piping->tokenizer())) { |
8 | 71 |
printf("Tokenizer: %S\n", dynamic_cast<const Invokation&>(piping->tokenizer()).id()); |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
72 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
73 |
printf("%d filters\n", piping->filters().size()); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
74 |
invoke = ParseInvokation(lexer3); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
75 |
lexer3.eatEof(); |
8 | 76 |
printf("Invoke identifier: %S\n", invoke->id()); |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
77 |
printf("%d parameters\n", invoke->params().size()); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
78 |
} catch (ParseException& e) { |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
79 |
printf("ParseException: %S\n", e.wWhat()); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
80 |
} catch (LexException& e) { |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
81 |
printf("LexException: %S\n", e.wWhat()); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
82 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
83 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
84 |
|
8 | 85 |
void TestSwitch(Itk::TestMgr* mgr) |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
86 |
{ |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
87 |
Cpix::AnalyzerExp::Tokenizer tokenizer; |
8 | 88 |
|
89 |
const wchar_t* text = |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
90 |
L"switch { " |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
91 |
L"case '_docuid', '_mimetype': keywords;" |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
92 |
L"case '_baseappclass': whitespace>lowercase;" |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
93 |
L"default: natural(en); " |
8 | 94 |
L"}"; |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
95 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
96 |
try { |
8 | 97 |
auto_ptr<Piping> sw = ParsePiping(text); |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
98 |
if (dynamic_cast<const Switch*>(&sw->tokenizer())) { |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
99 |
const Switch* s = dynamic_cast<const Switch*>(&sw->tokenizer()); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
100 |
for (int i = 0; i < s->cases().size(); i++) { |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
101 |
const Case* c = s->cases()[i]; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
102 |
printf("case "); |
8 | 103 |
for (int j = 0; j < c->cases().size(); j++) { |
104 |
printf("%S", c->cases()[j]); |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
105 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
106 |
printf(": ...\n"); |
8 | 107 |
// wcout<<L":"<<s->def().tokenizer().id(); |
108 |
} |
|
109 |
printf("default: ...\n");//<<s->def().tokenizer().id()<<"...;"; |
|
110 |
} |
|
111 |
} catch (ParseException& e) { |
|
112 |
// OBS wcout<<L"ParseException: "<<e.describe(text)<<endl; |
|
113 |
e.setContext(text); |
|
114 |
printf("ParseException: %S\n", e.wWhat()); |
|
115 |
} catch (LexException& e) { |
|
116 |
// OBS wcout<<L"LexException: "<<e.describe(text)<<endl; |
|
117 |
e.setContext(text); |
|
118 |
printf("LexException: %S\n", e.wWhat()); |
|
119 |
} |
|
120 |
} |
|
121 |
||
122 |
void TestConfigSwitch(Itk::TestMgr* mgr) |
|
123 |
{ |
|
124 |
Cpix::AnalyzerExp::Tokenizer tokenizer; |
|
125 |
||
126 |
const wchar_t* text = |
|
127 |
L"config_switch { " |
|
128 |
L"case 'indexing': korean;" |
|
129 |
L"case 'query': koreanquery;" |
|
130 |
L"case 'prefix': letter;" |
|
131 |
L"default: korean;" |
|
132 |
L"}"; |
|
133 |
||
134 |
try { |
|
135 |
auto_ptr<Piping> sw = ParsePiping(text); |
|
136 |
if (dynamic_cast<const ConfigSwitch*>(&sw->tokenizer())) { |
|
137 |
const ConfigSwitch* s = dynamic_cast<const ConfigSwitch*>(&sw->tokenizer()); |
|
138 |
for (int i = 0; i < s->cases().size(); i++) { |
|
139 |
const Case* c = s->cases()[i]; |
|
140 |
printf("case "); |
|
141 |
for (int j = 0; j < c->cases().size(); j++) { |
|
142 |
printf("%S", c->cases()[j]); |
|
143 |
} |
|
144 |
printf(": ...\n"); |
|
145 |
// wcout<<L":"<<s->def().tokenizer().id(); |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
146 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
147 |
printf("default: ...\n");//<<s->def().tokenizer().id()<<"...;"; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
148 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
149 |
} catch (ParseException& e) { |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
150 |
// OBS wcout<<L"ParseException: "<<e.describe(text)<<endl; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
151 |
e.setContext(text); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
152 |
printf("ParseException: %S\n", e.wWhat()); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
153 |
} catch (LexException& e) { |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
154 |
// OBS wcout<<L"LexException: "<<e.describe(text)<<endl; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
155 |
e.setContext(text); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
156 |
printf("LexException: %S\n", e.wWhat()); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
157 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
158 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
159 |
|
8 | 160 |
|
161 |
void TestParsingErrors(Itk::TestMgr* mgr) |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
162 |
{ |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
163 |
Cpix::AnalyzerExp::Tokenizer tokenizer; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
164 |
// eof |
8 | 165 |
const wchar_t* text; |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
166 |
try { |
8 | 167 |
ParsePiping( text = L"foobar(zap, foo, 'bar', 'raf', do, " ); |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
168 |
} catch (ParseException& e) { |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
169 |
printf("ParseException: %S\n", e.wWhat()); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
170 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
171 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
172 |
// Unfinished literal |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
173 |
try { |
8 | 174 |
ParsePiping(text = L"foobar(zap, foo, 'bar', 'a, raboof)"); |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
175 |
} catch (LexException& e) { // syntax error |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
176 |
printf("LexException: %S\n", e.wWhat()); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
177 |
} catch (ParseException& e) { // syntax error |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
178 |
printf("ParseException: %S\n", e.wWhat()); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
179 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
180 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
181 |
// Unknown token |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
182 |
try { |
8 | 183 |
ParsePiping(text = L"foobar(!zap, foo, 'bar', 'a', raboof)"); |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
184 |
} catch (LexException& e) { // syntax error |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
185 |
printf("LexException: %S\n", e.wWhat()); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
186 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
187 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
188 |
// Missing comma |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
189 |
try { |
8 | 190 |
ParsePiping(text = L"foobar(zap, foo, 'bar', 'a' raboof)"); |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
191 |
} catch (ParseException& e) { |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
192 |
printf("ParseException: %S\n", e.wWhat()); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
193 |
} |
8 | 194 |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
195 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
196 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
197 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
198 |
const char * CustomAnalyzerTestDocs[] = { |
8 | 199 |
STEM_TEST_CORPUS_PATH "\\en\\1.txt", |
200 |
STEM_TEST_CORPUS_PATH "\\en\\2.txt", |
|
201 |
STEM_TEST_CORPUS_PATH "\\en\\3.txt", |
|
202 |
STEM_TEST_CORPUS_PATH "\\en\\4.txt", |
|
203 |
||
204 |
STEM_TEST_CORPUS_PATH "\\fi\\1.txt", |
|
205 |
STEM_TEST_CORPUS_PATH "\\fi\\2.txt", |
|
206 |
LOC_TEST_CORPUS_PATH "\\th\\1.txt", |
|
207 |
LOC_TEST_CORPUS_PATH "\\th\\2.txt", |
|
208 |
||
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
209 |
NULL |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
210 |
}; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
211 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
212 |
const char DEFAULT_ENCODING[] = "UTF-8"; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
213 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
214 |
void PrintTokenStream(lucene::analysis::TokenStream* stream) |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
215 |
{ |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
216 |
using namespace lucene::analysis; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
217 |
lucene::analysis::Token token; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
218 |
while (stream->next(&token)) { |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
219 |
int pos = token.getPositionIncrement(); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
220 |
if (pos == 0) { |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
221 |
printf("|"); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
222 |
} else { |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
223 |
for (int i = 0; i < pos; i++) printf(" "); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
224 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
225 |
printf("'%S'", token.termText()); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
226 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
227 |
printf("\n"); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
228 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
229 |
|
8 | 230 |
void TestCustomAnalyzer(Itk::TestMgr * testMgr, |
231 |
const char** files, |
|
232 |
const wchar_t* definition) |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
233 |
{ |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
234 |
using namespace lucene::analysis; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
235 |
using namespace lucene::util; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
236 |
using namespace Cpix; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
237 |
using namespace std; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
238 |
CustomAnalyzer analyzer(definition); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
239 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
240 |
printf("Analyzer \"%S\":\n", definition); |
8 | 241 |
for (int i = 0; files[i]; i++) |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
242 |
{ |
8 | 243 |
printf("File !%s tokenized:\n", (files[i]+1)); |
244 |
FileReader file( files[i], DEFAULT_ENCODING ); |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
245 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
246 |
TokenStream* stream = analyzer.tokenStream( L"field", &file ); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
247 |
PrintTokenStream( stream ); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
248 |
stream->close(); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
249 |
_CLDELETE( stream ); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
250 |
} |
8 | 251 |
printf("\n"); |
252 |
} |
|
253 |
||
254 |
void TestCustomAnalyzer(Itk::TestMgr * testMgr, const wchar_t* definition) { |
|
255 |
TestCustomAnalyzer(testMgr, CustomAnalyzerTestDocs, definition); |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
256 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
257 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
258 |
void TestCustomAnalyzers(Itk::TestMgr * testMgr) |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
259 |
{ |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
260 |
TestCustomAnalyzer(testMgr, L"stdtokens"); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
261 |
TestCustomAnalyzer(testMgr, L"whitespace"); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
262 |
TestCustomAnalyzer(testMgr, L"whitespace>lowercase"); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
263 |
TestCustomAnalyzer(testMgr, L"whitespace>accent"); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
264 |
TestCustomAnalyzer(testMgr, L"letter"); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
265 |
TestCustomAnalyzer(testMgr, L"letter>lowercase"); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
266 |
TestCustomAnalyzer(testMgr, L"keyword"); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
267 |
TestCustomAnalyzer(testMgr, L"keyword>lowercase"); |
8 | 268 |
// TestCustomAnalyzer(testMgr, L"stdtokens>lowercase>stem(en)"); // Does not work with NON-ASCII |
269 |
TestCustomAnalyzer(testMgr, L"letter>lowercase>stop(en)"); |
|
270 |
TestCustomAnalyzer(testMgr, L"letter>lowercase>stop('i', 'oh', 'nyt', 'n�in')"); |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
271 |
TestCustomAnalyzer(testMgr, L"letter>length(2, 4)"); |
8 | 272 |
TestCustomAnalyzer(testMgr, L"standard>prefixes(1)"); |
273 |
TestCustomAnalyzer(testMgr, L"standard>prefixes(2)"); |
|
274 |
TestCustomAnalyzer(testMgr, L"standard>prefixes(3)"); |
|
275 |
TestCustomAnalyzer(testMgr, L"stdtokens>stdfilter>lowercase>thai>stop(en)"); |
|
276 |
TestCustomAnalyzer(testMgr, L"cjk>stop(en)"); |
|
277 |
TestCustomAnalyzer(testMgr, L"ngram(1)>lowercase>stop(en)"); |
|
278 |
TestCustomAnalyzer(testMgr, L"ngram(2)>lowercase>stop(en)"); |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
279 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
280 |
|
8 | 281 |
void TestTokenizationWithLocales(Itk::TestMgr * testMgr) { |
282 |
printf("locale=en\n"); |
|
283 |
cpix_Result result; |
|
284 |
cpix_SetLocale( &result, "en" ); |
|
285 |
TestCustomAnalyzer(testMgr, L"natural"); |
|
286 |
||
287 |
printf("locale=th\n"); |
|
288 |
cpix_SetLocale( &result, "th" ); |
|
289 |
TestCustomAnalyzer(testMgr, L"natural"); |
|
290 |
||
291 |
printf("locale=ko\n"); |
|
292 |
cpix_SetLocale( &result, "ko" ); |
|
293 |
TestCustomAnalyzer(testMgr, L"natural"); |
|
294 |
||
295 |
printf("locale=zh\n"); |
|
296 |
cpix_SetLocale( &result, "zh" ); |
|
297 |
TestCustomAnalyzer(testMgr, L"natural"); |
|
298 |
||
299 |
printf("locale=jp\n"); |
|
300 |
cpix_SetLocale( &result, "jp" ); |
|
301 |
TestCustomAnalyzer(testMgr, L"natural"); |
|
302 |
||
303 |
cpix_SetLocale( &result, cpix_LOCALE_AUTO ); |
|
304 |
} |
|
305 |
||
306 |
template<typename T> |
|
307 |
void TestTokenizationWithLocale(Itk::TestMgr * testMgr) { |
|
308 |
cpix_Result result; |
|
309 |
cpix_SetLocale( &result, T::LOCALE ); |
|
310 |
TestCustomAnalyzer(testMgr, EnglishLocale::FILES, L"natural"); |
|
311 |
TestCustomAnalyzer(testMgr, T::FILES, L"natural"); |
|
312 |
cpix_SetLocale( &result, cpix_LOCALE_AUTO ); |
|
313 |
} |
|
314 |
||
315 |
||
316 |
template<typename T> |
|
317 |
void AddTokenizationWithLocaleTest(Itk::SuiteTester* suite) { |
|
318 |
suite->add(T::LOCALE, |
|
319 |
&TestTokenizationWithLocale<T>, |
|
320 |
T::LOCALE); |
|
321 |
} |
|
322 |
||
323 |
void TestTokenizationWithCurrentLocale(Itk::TestMgr * testMgr) { |
|
324 |
cpix_Result result; |
|
325 |
cpix_SetLocale( &result, cpix_LOCALE_AUTO ); |
|
326 |
TestCustomAnalyzer(testMgr, L"natural"); |
|
327 |
} |
|
328 |
||
329 |
void TestAnalyzerWithField(Itk::TestMgr * testMgr, const wchar_t* definition, const wchar_t* field) |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
330 |
{ |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
331 |
using namespace lucene::analysis; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
332 |
using namespace lucene::util; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
333 |
using namespace Cpix; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
334 |
using namespace std; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
335 |
CustomAnalyzer analyzer(definition); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
336 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
337 |
printf("File !%s tokenized for field %S:\n", (CustomAnalyzerTestDocs[0]+1), field); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
338 |
FileReader file( CustomAnalyzerTestDocs[0], DEFAULT_ENCODING ); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
339 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
340 |
TokenStream* stream = analyzer.tokenStream( field, &file ); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
341 |
PrintTokenStream( stream ); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
342 |
stream->close(); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
343 |
_CLDELETE( stream ); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
344 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
345 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
346 |
void TestSwitchAnalyzers(Itk::TestMgr * testMgr) |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
347 |
{ |
8 | 348 |
const wchar_t* sw = L"\n" |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
349 |
L"switch {\n" |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
350 |
L" case '_docuid': keyword;\n" |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
351 |
L" case '_appclass': whitespace>lowercase;\n" |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
352 |
L" case 'title', 'message': stdtokens>accent>lowercase>stem(en)>stop(en);\n" |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
353 |
L" default: letter>lowercase>stop('i');\n" |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
354 |
L"}"; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
355 |
TestAnalyzerWithField(testMgr, sw, L"_docuid"); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
356 |
TestAnalyzerWithField(testMgr, sw, L"_appclass"); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
357 |
TestAnalyzerWithField(testMgr, sw, L"Title"); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
358 |
TestAnalyzerWithField(testMgr, sw, L"message"); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
359 |
TestAnalyzerWithField(testMgr, sw, L"field"); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
360 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
361 |
|
8 | 362 |
void TestLocaleSwitchAnalyzers(Itk::TestMgr * testMgr) |
363 |
{ |
|
364 |
const wchar_t* sw = L"\n" |
|
365 |
L"locale_switch {\n" |
|
366 |
L" case 'en': stdtokens>stdfilter>lowercase>stop(en);\n" |
|
367 |
L" case 'th': stdtokens>stdfilter>lowercase>thai>stop(en);\n" |
|
368 |
L" case 'ca': stdtokens>stdfilter>lowercase>accent;\n" |
|
369 |
L" default: stdtokens>stdfilter>lowercase;\n" |
|
370 |
L"}"; |
|
371 |
cpix_Result result; |
|
372 |
printf("locale=en:\n"); |
|
373 |
cpix_SetLocale( &result, "en" ); |
|
374 |
TestCustomAnalyzer(testMgr, sw); |
|
375 |
printf("\n"); |
|
376 |
printf("locale=th:\n"); |
|
377 |
cpix_SetLocale( &result, "th" ); |
|
378 |
TestCustomAnalyzer(testMgr, sw); |
|
379 |
printf("\n"); |
|
380 |
printf("locale=ca:\n"); |
|
381 |
cpix_SetLocale( &result, "ca" ); |
|
382 |
TestCustomAnalyzer(testMgr, sw); |
|
383 |
printf("\n"); |
|
384 |
printf("default locale:\n"); |
|
385 |
cpix_SetLocale( &result, "fail" ); |
|
386 |
TestCustomAnalyzer(testMgr, sw); |
|
387 |
cpix_SetLocale( &result, cpix_LOCALE_AUTO ); |
|
388 |
} |
|
389 |
||
390 |
||
391 |
Itk::TesterBase * CreateAnalysisWhiteBoxLocalizationTests() { |
|
392 |
using namespace Itk; |
|
393 |
||
394 |
SuiteTester |
|
395 |
* tests = new SuiteTester("loc"); |
|
396 |
||
397 |
std::string locale; |
|
398 |
locale = "currentlocale_"; |
|
399 |
||
400 |
Cpt::auto_array<char> name( Cpix::Spi::GetLanguageNames()[0].c_str() ); |
|
401 |
locale += name.get(); |
|
402 |
||
403 |
tests->add(locale.c_str(), |
|
404 |
&TestTokenizationWithCurrentLocale, |
|
405 |
locale.c_str()); |
|
406 |
||
407 |
AddTokenizationWithLocaleTest<EnglishLocale>(tests); |
|
408 |
AddTokenizationWithLocaleTest<FrenchLocale>(tests); |
|
409 |
AddTokenizationWithLocaleTest<HebrewLocale>(tests); |
|
410 |
AddTokenizationWithLocaleTest<ThaiLocale>(tests); |
|
411 |
AddTokenizationWithLocaleTest<KoreanLocale>(tests); |
|
412 |
AddTokenizationWithLocaleTest<ChineseLocale>(tests); |
|
413 |
AddTokenizationWithLocaleTest<JapaneseLocale>(tests); |
|
414 |
||
415 |
return tests; |
|
416 |
} |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
417 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
418 |
Itk::TesterBase * CreateAnalysisWhiteBoxTests() |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
419 |
{ |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
420 |
using namespace Itk; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
421 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
422 |
SuiteTester |
8 | 423 |
* analysisTests = new SuiteTester("whitebox"); |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
424 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
425 |
analysisTests->add("analyzer", |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
426 |
&TestCustomAnalyzers, |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
427 |
"analyzer"); |
8 | 428 |
analysisTests->add("switchAnalyzer", |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
429 |
&TestSwitchAnalyzers, |
8 | 430 |
"switchAnalyzer"); |
431 |
analysisTests->add("localeSwitchAnalyzer", |
|
432 |
&TestLocaleSwitchAnalyzers, |
|
433 |
"localeSwitchAnalyzer"); |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
434 |
analysisTests->add("tokenization", |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
435 |
TestTokenization6, |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
436 |
"tokenization"); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
437 |
analysisTests->add("parsing", |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
438 |
TestParsing, |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
439 |
"parsing"); |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
440 |
analysisTests->add("parsing2", |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
441 |
TestSwitch, |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
442 |
"parsing2"); |
8 | 443 |
analysisTests->add("parsing3", |
444 |
TestConfigSwitch, |
|
445 |
"parsing3"); |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
446 |
analysisTests->add("parsingerrors", |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
447 |
TestParsingErrors, |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
448 |
"parsingerrors"); |
8 | 449 |
|
450 |
analysisTests->add(CreateAnalysisWhiteBoxLocalizationTests()); |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
451 |
return analysisTests; |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
452 |
} |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
453 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
454 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
455 |