--- a/searchengine/util/tsrc/cpixtoolsunittest/src/parseunittest.cpp Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/util/tsrc/cpixtoolsunittest/src/parseunittest.cpp Mon Jun 28 10:34:53 2010 +0530
@@ -24,49 +24,28 @@
using namespace Cpt::Parser;
using namespace std;
-enum TokenType {
- TOKEN_LEFT_BRACKET = Cpt::Lex::TOKEN_LAST_RESERVED, // 8
- TOKEN_RIGHT_BRACKET,
- TOKEN_COMMA, // 10
- TOKEN_PIPE,
- TOKEN_SWITCH,
- TOKEN_CASE,
- TOKEN_DEFAULT,
- TOKEN_LEFT_BRACE, // 15
- TOKEN_RIGHT_BRACE,
- TOKEN_COLON,
- TOKEN_TERMINATOR
-};
+const wchar_t* TOKEN_LEFT_BRACKET = L"left bracket";
+const wchar_t* TOKEN_RIGHT_BRACKET = L"right bracket";
+const wchar_t* TOKEN_COMMA = L"comma";
+const wchar_t* TOKEN_PIPE = L"pipe";
+const wchar_t* TOKEN_SWITCH = L"switch";
+const wchar_t* TOKEN_CASE = L"case";
+const wchar_t* TOKEN_DEFAULT = L"default";
+const wchar_t* TOKEN_LEFT_BRACE = L"left brace";
+const wchar_t* TOKEN_RIGHT_BRACE = L"right brace";
+const wchar_t* TOKEN_COLON = L"colon";
+const wchar_t* TOKEN_TERMINATOR = L"terminator";
void PrintToken(Cpt::Lex::Token token) {
- switch (token.type()) {
- case TOKEN_WS: wcout<<L"space"; break;
- case TOKEN_ID: wcout<<"id"; break;
- case TOKEN_LIT: wcout<<"lit"; break;
- case TOKEN_STRLIT: wcout<<"str-lit"; break;
- case TOKEN_REALLIT: wcout<<"real-lit"; break;
- case TOKEN_INTLIT: wcout<<"int-lit"; break;
- case TOKEN_LEFT_BRACKET: wcout<<"lbr"; break;
- case TOKEN_RIGHT_BRACKET: wcout<<"rbr"; break;
- case TOKEN_COMMA: wcout<<"comma"; break;
- case TOKEN_PIPE: wcout<<"pipe"; break;
- case TOKEN_SWITCH : wcout<<"sw"; break;
- case TOKEN_CASE : wcout<<"case"; break;
- case TOKEN_DEFAULT : wcout<<"default"; break;
- case TOKEN_LEFT_BRACE : wcout<<"lbc"; break;
- case TOKEN_RIGHT_BRACE : wcout<<"rbc"; break;
- case TOKEN_COLON : wcout<<"cl"; break;
- case TOKEN_TERMINATOR : wcout<<"tr"; break;
-
- default: wcout<<"unknown"; break;
- }
- wcout<<L"('"<<token.text()<<L"')";
+ wcout<<token.type()<<L"('"<<token.text()<<L"')";
}
-void TestTokenization(Itk::TestMgr * ,
+void TestTokenization(Itk::TestMgr * testMgr,
const wchar_t * inputStr)
{
WhitespaceTokenizer ws;
+ LineCommentTokenizer line;
+ SectionCommentTokenizer section;
IdTokenizer ids;
IntLitTokenizer ints;
RealLitTokenizer reals;
@@ -84,14 +63,14 @@
// and int-lit, real-lit will mean integer and real literals,
// respectively.
Tokenizer* tokenizers[] = {
- &ws, &lb, &rb, &cm, &pp, &ids, &ints, &reals, &lits, 0
+ &ws, &line, §ion, &lb, &rb, &cm, &pp, &ids, &ints, &reals, &lits, 0
};
MultiTokenizer tokenizer(tokenizers);
Tokens
source(tokenizer,
inputStr);
- WhiteSpaceFilter tokens(source);
+ StdFilter tokens(source);
while (tokens) PrintToken(tokens++);
cout<<endl;
@@ -107,7 +86,7 @@
void TestTokenization2(Itk::TestMgr * testMgr)
{
TestTokenization(testMgr,
- L"'foo' 0 1 -2 'bar' +234 -34");
+ L"'foo' 0 1 -2 'bar' +234 -34 // side note");
}
@@ -121,20 +100,20 @@
void TestTokenization4(Itk::TestMgr * testMgr)
{
TestTokenization(testMgr,
- L"'\\' ''\\\\' '\\a' '\\\n'");
+ L"'\\' ''\\\\' '\\a' '\\\n' // comment\n /*foobar*/");
}
-void TestTokenization5(Itk::TestMgr * )
+void TestTokenization5(Itk::TestMgr * testMgr)
{
WhitespaceTokenizer
ws;
IdTokenizer
ids;
SymbolTokenizer
- for_(0xf00, L"for");
+ for_(L"for", L"for");
SymbolTokenizer
- if_(0xbeef, L"if");
+ if_(L"if", L"if");
Tokenizer* tokenizers[] = {
&ws, &for_, &if_, &ids, 0
};
@@ -145,14 +124,76 @@
Tokens
source(tokenizer,
L"fo for fore forth ofor oforo i if ifdom ifer fif fifi forfi fifor"); // test escape in literals
- WhiteSpaceFilter
+ StdFilter
tokens(source);
while (tokens) PrintToken(tokens++);
cout<<endl;
}
-void TestTokenizationErrors(Itk::TestMgr* )
+void TestTokenization6(Itk::TestMgr * testMgr)
+{
+ WhitespaceTokenizer
+ ws;
+ LineCommentTokenizer
+ line;
+ SectionCommentTokenizer
+ section;
+ IdTokenizer
+ ids;
+ IntLitTokenizer
+ intLit;
+ RealLitTokenizer
+ realLit;
+ SymbolTokenizer
+ div(L"slash", L"/");
+ SymbolTokenizer
+ mul(L"star", L"*");
+ SymbolTokenizer
+ plus(L"plus", L"+");
+ SymbolTokenizer
+ minus(L"minus", L"-");
+ SymbolTokenizer
+ equal(L"equals", L"=");
+
+ Tokenizer* tokenizers[] = {
+ &ws, &line, §ion, &ids, &intLit, &realLit, &div, &mul, &plus, &minus, &equal, 0
+ };
+
+ MultiTokenizer
+ tokenizer(tokenizers);
+
+ const wchar_t* text =
+ L"4 + 6 = 2 * 5\n"
+ L"6 / 2 = 1*3 // true\n"
+ L"3 / x /*important thingie*/ = 2 * y\n"
+ L"6 / x * / * / /* non sense / * / */ // zap"
+ L"//\n"
+ L"//";
+
+ {
+ cout<<"With whitespaces & comments visible"<<endl;
+ Tokens
+ tokens(tokenizer, text);
+
+ while (tokens) PrintToken(tokens++);
+ cout<<endl;
+ }
+
+ {
+ cout<<"With whitespaces & comments filtered"<<endl;
+ Tokens
+ source(tokenizer, text);
+
+ StdFilter tokens(source);
+
+ while (tokens) PrintToken(tokens++);
+ cout<<endl;
+ }
+
+}
+
+void TestTokenizationErrors(Itk::TestMgr* mgr)
{
WhitespaceTokenizer ws;
IdTokenizer ids;
@@ -171,11 +212,8 @@
try {
while (tokens) PrintToken(tokens++);
} catch (LexException& exc) {
- /* OBS
- wcout<<endl<<L"LexException: "<<exc.describe(text)<<endl;
- */
- exc.setContext(text);
- wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl;
+ exc.setContext(text);
+ wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl;
} catch (exception& exc) {
cout<<endl<<"Exception: "<<exc.what()<<endl;
}
@@ -185,17 +223,48 @@
try {
while (tokens) PrintToken(tokens++);
} catch (LexException& exc) {
- /* OBS
- wcout<<endl<<L"LexException: "<<exc.describe(text)<<endl;
- */
- exc.setContext(text);
- wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl;
+ exc.setContext(text);
+ wcout<<endl<<L"LexException: "<<exc.wWhat()<<endl;
} catch (exception& exc) {
cout<<endl<<"Exception: "<<exc.what()<<endl;
}
}
}
+void TestWhitespaceSplitter(Itk::TestMgr* mgr)
+{
+ {
+ WhitespaceSplitter tokens(L"foobar foo bar foo\tbar _*4 4bar foo*bar foo\nbar foo\rbar foo\0bar");
+ while (tokens) printf(" \"%S\"", tokens++.text().c_str());
+ printf("\n");
+ }
+
+ {
+ WhitespaceSplitter tokens(L"foobar");
+ while (tokens) printf(" \"%S\"", tokens++.text().c_str());
+ printf("\n");
+ }
+
+ {
+ WhitespaceSplitter tokens(L" foobar \r\n");
+ while (tokens) printf(" \"%S\"", tokens++.text().c_str());
+ printf("\n");
+ }
+
+ {
+ WhitespaceSplitter tokens(L" ");
+ while (tokens) printf(" \"%S\"", tokens++.text().c_str());
+ printf("\n");
+ }
+
+ {
+ WhitespaceSplitter tokens(L"");
+ while (tokens) printf(" \"%S\"", tokens++.text().c_str());
+ printf("\n");
+ }
+
+}
+
Itk::TesterBase * CreateParsingTests()
{
using namespace Itk;
@@ -203,7 +272,6 @@
SuiteTester
* parsingTests = new SuiteTester("parsing");
-
parsingTests->add("tokenization1",
TestTokenization1,
"tokenization1");
@@ -223,11 +291,19 @@
parsingTests->add("tokenization5",
TestTokenization5,
"tokenization5");
+
+ parsingTests->add("tokenization6",
+ TestTokenization6,
+ "tokenization6");
parsingTests->add("syntaxerrors",
TestTokenizationErrors,
"syntaxerrors");
-
+
+ parsingTests->add("whitespace",
+ TestWhitespaceSplitter,
+ "whitespace");
+
return parsingTests;
}