22 |
22 |
23 using namespace Cpt::Lex; |
23 using namespace Cpt::Lex; |
24 using namespace Cpt::Parser; |
24 using namespace Cpt::Parser; |
25 using namespace std; |
25 using namespace std; |
26 |
26 |
27 enum TokenType { |
27 const wchar_t* TOKEN_LEFT_BRACKET = L"left bracket"; |
28 TOKEN_LEFT_BRACKET = Cpt::Lex::TOKEN_LAST_RESERVED, // 8 |
28 const wchar_t* TOKEN_RIGHT_BRACKET = L"right bracket"; |
29 TOKEN_RIGHT_BRACKET, |
29 const wchar_t* TOKEN_COMMA = L"comma"; |
30 TOKEN_COMMA, // 10 |
30 const wchar_t* TOKEN_PIPE = L"pipe"; |
31 TOKEN_PIPE, |
31 const wchar_t* TOKEN_SWITCH = L"switch"; |
32 TOKEN_SWITCH, |
32 const wchar_t* TOKEN_CASE = L"case"; |
33 TOKEN_CASE, |
33 const wchar_t* TOKEN_DEFAULT = L"default"; |
34 TOKEN_DEFAULT, |
34 const wchar_t* TOKEN_LEFT_BRACE = L"left brace"; |
35 TOKEN_LEFT_BRACE, // 15 |
35 const wchar_t* TOKEN_RIGHT_BRACE = L"right brace"; |
36 TOKEN_RIGHT_BRACE, |
36 const wchar_t* TOKEN_COLON = L"colon"; |
37 TOKEN_COLON, |
37 const wchar_t* TOKEN_TERMINATOR = L"terminator"; |
38 TOKEN_TERMINATOR |
|
39 }; |
|
40 |
38 |
41 void PrintToken(Cpt::Lex::Token token) { |
39 void PrintToken(Cpt::Lex::Token token) { |
42 switch (token.type()) { |
40 wcout<<token.type()<<L"('"<<token.text()<<L"')"; |
43 case TOKEN_WS: wcout<<L"space"; break; |
41 } |
44 case TOKEN_ID: wcout<<"id"; break; |
42 |
45 case TOKEN_LIT: wcout<<"lit"; break; |
43 void TestTokenization(Itk::TestMgr * testMgr, |
46 case TOKEN_STRLIT: wcout<<"str-lit"; break; |
|
47 case TOKEN_REALLIT: wcout<<"real-lit"; break; |
|
48 case TOKEN_INTLIT: wcout<<"int-lit"; break; |
|
49 case TOKEN_LEFT_BRACKET: wcout<<"lbr"; break; |
|
50 case TOKEN_RIGHT_BRACKET: wcout<<"rbr"; break; |
|
51 case TOKEN_COMMA: wcout<<"comma"; break; |
|
52 case TOKEN_PIPE: wcout<<"pipe"; break; |
|
53 case TOKEN_SWITCH : wcout<<"sw"; break; |
|
54 case TOKEN_CASE : wcout<<"case"; break; |
|
55 case TOKEN_DEFAULT : wcout<<"default"; break; |
|
56 case TOKEN_LEFT_BRACE : wcout<<"lbc"; break; |
|
57 case TOKEN_RIGHT_BRACE : wcout<<"rbc"; break; |
|
58 case TOKEN_COLON : wcout<<"cl"; break; |
|
59 case TOKEN_TERMINATOR : wcout<<"tr"; break; |
|
60 |
|
61 default: wcout<<"unknown"; break; |
|
62 } |
|
63 wcout<<L"('"<<token.text()<<L"')"; |
|
64 } |
|
65 |
|
66 void TestTokenization(Itk::TestMgr * , |
|
67 const wchar_t * inputStr) |
44 const wchar_t * inputStr) |
68 { |
45 { |
69 WhitespaceTokenizer ws; |
46 WhitespaceTokenizer ws; |
|
47 LineCommentTokenizer line; |
|
48 SectionCommentTokenizer section; |
70 IdTokenizer ids; |
49 IdTokenizer ids; |
71 IntLitTokenizer ints; |
50 IntLitTokenizer ints; |
72 RealLitTokenizer reals; |
51 RealLitTokenizer reals; |
73 LitTokenizer lits('\''); |
52 LitTokenizer lits('\''); |
74 SymbolTokenizer lb(TOKEN_LEFT_BRACKET, L"("); |
53 SymbolTokenizer lb(TOKEN_LEFT_BRACKET, L"("); |
82 // (to check if those types are recognized correctly). So |
61 // (to check if those types are recognized correctly). So |
83 // basically, in test cases, lit will mean string literals, |
62 // basically, in test cases, lit will mean string literals, |
84 // and int-lit, real-lit will mean integer and real literals, |
63 // and int-lit, real-lit will mean integer and real literals, |
85 // respectively. |
64 // respectively. |
86 Tokenizer* tokenizers[] = { |
65 Tokenizer* tokenizers[] = { |
87 &ws, &lb, &rb, &cm, &pp, &ids, &ints, &reals, &lits, 0 |
66 &ws, &line, §ion, &lb, &rb, &cm, &pp, &ids, &ints, &reals, &lits, 0 |
88 }; |
67 }; |
89 MultiTokenizer tokenizer(tokenizers); |
68 MultiTokenizer tokenizer(tokenizers); |
90 |
69 |
91 Tokens |
70 Tokens |
92 source(tokenizer, |
71 source(tokenizer, |
93 inputStr); |
72 inputStr); |
94 WhiteSpaceFilter tokens(source); |
73 StdFilter tokens(source); |
95 |
74 |
96 while (tokens) PrintToken(tokens++); |
75 while (tokens) PrintToken(tokens++); |
97 cout<<endl; |
76 cout<<endl; |
98 } |
77 } |
99 |
78 |
119 |
98 |
120 |
99 |
121 void TestTokenization4(Itk::TestMgr * testMgr) |
100 void TestTokenization4(Itk::TestMgr * testMgr) |
122 { |
101 { |
123 TestTokenization(testMgr, |
102 TestTokenization(testMgr, |
124 L"'\\' ''\\\\' '\\a' '\\\n'"); |
103 L"'\\' ''\\\\' '\\a' '\\\n' // comment\n /*foobar*/"); |
125 } |
104 } |
126 |
105 |
127 |
106 |
128 void TestTokenization5(Itk::TestMgr * ) |
107 void TestTokenization5(Itk::TestMgr * testMgr) |
129 { |
108 { |
130 WhitespaceTokenizer |
109 WhitespaceTokenizer |
131 ws; |
110 ws; |
132 IdTokenizer |
111 IdTokenizer |
133 ids; |
112 ids; |
134 SymbolTokenizer |
113 SymbolTokenizer |
135 for_(0xf00, L"for"); |
114 for_(L"for", L"for"); |
136 SymbolTokenizer |
115 SymbolTokenizer |
137 if_(0xbeef, L"if"); |
116 if_(L"if", L"if"); |
138 Tokenizer* tokenizers[] = { |
117 Tokenizer* tokenizers[] = { |
139 &ws, &for_, &if_, &ids, 0 |
118 &ws, &for_, &if_, &ids, 0 |
140 }; |
119 }; |
141 |
120 |
142 MultiTokenizer |
121 MultiTokenizer |
143 tokenizer(tokenizers); |
122 tokenizer(tokenizers); |
144 |
123 |
145 Tokens |
124 Tokens |
146 source(tokenizer, |
125 source(tokenizer, |
147 L"fo for fore forth ofor oforo i if ifdom ifer fif fifi forfi fifor"); // test escape in literals |
126 L"fo for fore forth ofor oforo i if ifdom ifer fif fifi forfi fifor"); // test escape in literals |
148 WhiteSpaceFilter |
127 StdFilter |
149 tokens(source); |
128 tokens(source); |
150 |
129 |
151 while (tokens) PrintToken(tokens++); |
130 while (tokens) PrintToken(tokens++); |
152 cout<<endl; |
131 cout<<endl; |
153 } |
132 } |
154 |
133 |
155 void TestTokenizationErrors(Itk::TestMgr* ) |
134 void TestTokenization6(Itk::TestMgr * testMgr) |
|
135 { |
|
136 WhitespaceTokenizer |
|
137 ws; |
|
138 LineCommentTokenizer |
|
139 line; |
|
140 SectionCommentTokenizer |
|
141 section; |
|
142 IdTokenizer |
|
143 ids; |
|
144 IntLitTokenizer |
|
145 intLit; |
|
146 RealLitTokenizer |
|
147 realLit; |
|
148 SymbolTokenizer |
|
149 div(L"slash", L"/"); |
|
150 SymbolTokenizer |
|
151 mul(L"star", L"*"); |
|
152 SymbolTokenizer |
|
153 plus(L"plus", L"+"); |
|
154 SymbolTokenizer |
|
155 minus(L"minus", L"-"); |
|
156 SymbolTokenizer |
|
157 equal(L"equals", L"="); |
|
158 |
|
159 Tokenizer* tokenizers[] = { |
|
160 &ws, &line, §ion, &ids, &intLit, &realLit, &div, &mul, &plus, &minus, &equal, 0 |
|
161 }; |
|
162 |
|
163 MultiTokenizer |
|
164 tokenizer(tokenizers); |
|
165 |
|
166 const wchar_t* text = |
|
167 L"4 + 6 = 2 * 5\n" |
|
168 L"6 / 2 = 1*3 // true\n" |
|
169 L"3 / x /*important thingie*/ = 2 * y\n" |
|
170 L"6 / x * / * / /* non sense / * / */ // zap" |
|
171 L"//\n" |
|
172 L"//"; |
|
173 |
|
174 { |
|
175 cout<<"With whitespaces & comments visible"<<endl; |
|
176 Tokens |
|
177 tokens(tokenizer, text); |
|
178 |
|
179 while (tokens) PrintToken(tokens++); |
|
180 cout<<endl; |
|
181 } |
|
182 |
|
183 { |
|
184 cout<<"With whitespaces & comments filtered"<<endl; |
|
185 Tokens |
|
186 source(tokenizer, text); |
|
187 |
|
188 StdFilter tokens(source); |
|
189 |
|
190 while (tokens) PrintToken(tokens++); |
|
191 cout<<endl; |
|
192 } |
|
193 |
|
194 } |
|
195 |
|
196 void TestTokenizationErrors(Itk::TestMgr* mgr) |
156 { |
197 { |
157 WhitespaceTokenizer ws; |
198 WhitespaceTokenizer ws; |
158 IdTokenizer ids; |
199 IdTokenizer ids; |
159 LitTokenizer lits('\''); |
200 LitTokenizer lits('\''); |
160 SymbolTokenizer lb(TOKEN_LEFT_BRACKET, L"("); |
201 SymbolTokenizer lb(TOKEN_LEFT_BRACKET, L"("); |
169 { |
210 { |
170 Tokens tokens(tokenizer, text = L"stdtokens>lowercase>stopwords('a', 'an','the)>stem('en')"); |
211 Tokens tokens(tokenizer, text = L"stdtokens>lowercase>stopwords('a', 'an','the)>stem('en')"); |
171 try { |
212 try { |
172 while (tokens) PrintToken(tokens++); |
213 while (tokens) PrintToken(tokens++); |
173 } catch (LexException& exc) { |
214 } catch (LexException& exc) { |
174 /* OBS |
215 exc.setContext(text); |
175 wcout<<endl<<L"LexException: "<<exc.describe(text)<<endl; |
216 wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl; |
176 */ |
|
177 exc.setContext(text); |
|
178 wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl; |
|
179 } catch (exception& exc) { |
217 } catch (exception& exc) { |
180 cout<<endl<<"Exception: "<<exc.what()<<endl; |
218 cout<<endl<<"Exception: "<<exc.what()<<endl; |
181 } |
219 } |
182 } |
220 } |
183 { |
221 { |
184 Tokens tokens(tokenizer, text = L"fas-324we?`213ff3*21(+"); |
222 Tokens tokens(tokenizer, text = L"fas-324we?`213ff3*21(+"); |
185 try { |
223 try { |
186 while (tokens) PrintToken(tokens++); |
224 while (tokens) PrintToken(tokens++); |
187 } catch (LexException& exc) { |
225 } catch (LexException& exc) { |
188 /* OBS |
226 exc.setContext(text); |
189 wcout<<endl<<L"LexException: "<<exc.describe(text)<<endl; |
227 wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl; |
190 */ |
|
191 exc.setContext(text); |
|
192 wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl; |
|
193 } catch (exception& exc) { |
228 } catch (exception& exc) { |
194 cout<<endl<<"Exception: "<<exc.what()<<endl; |
229 cout<<endl<<"Exception: "<<exc.what()<<endl; |
195 } |
230 } |
196 } |
231 } |
197 } |
232 } |
198 |
233 |
|
234 void TestWhitespaceSplitter(Itk::TestMgr* mgr) |
|
235 { |
|
236 { |
|
237 WhitespaceSplitter tokens(L"foobar foo bar foo\tbar _*4 4bar foo*bar foo\nbar foo\rbar foo\0bar"); |
|
238 while (tokens) printf(" \"%S\"", tokens++.text().c_str()); |
|
239 printf("\n"); |
|
240 } |
|
241 |
|
242 { |
|
243 WhitespaceSplitter tokens(L"foobar"); |
|
244 while (tokens) printf(" \"%S\"", tokens++.text().c_str()); |
|
245 printf("\n"); |
|
246 } |
|
247 |
|
248 { |
|
249 WhitespaceSplitter tokens(L" foobar \r\n"); |
|
250 while (tokens) printf(" \"%S\"", tokens++.text().c_str()); |
|
251 printf("\n"); |
|
252 } |
|
253 |
|
254 { |
|
255 WhitespaceSplitter tokens(L" "); |
|
256 while (tokens) printf(" \"%S\"", tokens++.text().c_str()); |
|
257 printf("\n"); |
|
258 } |
|
259 |
|
260 { |
|
261 WhitespaceSplitter tokens(L""); |
|
262 while (tokens) printf(" \"%S\"", tokens++.text().c_str()); |
|
263 printf("\n"); |
|
264 } |
|
265 |
|
266 } |
|
267 |
199 Itk::TesterBase * CreateParsingTests() |
268 Itk::TesterBase * CreateParsingTests() |
200 { |
269 { |
201 using namespace Itk; |
270 using namespace Itk; |
202 |
271 |
203 SuiteTester |
272 SuiteTester |
204 * parsingTests = new SuiteTester("parsing"); |
273 * parsingTests = new SuiteTester("parsing"); |
205 |
274 |
206 |
|
207 parsingTests->add("tokenization1", |
275 parsingTests->add("tokenization1", |
208 TestTokenization1, |
276 TestTokenization1, |
209 "tokenization1"); |
277 "tokenization1"); |
210 |
278 |
211 parsingTests->add("tokenization2", |
279 parsingTests->add("tokenization2", |