searchengine/cpix/tsrc/cpixunittest/src/analysiswhitebox.cpp
changeset 3 ae3f1779f6da
parent 0 671dee74050a
child 7 a5fbfefd615f
equal deleted inserted replaced
2:6c1a2771f4b7 3:ae3f1779f6da
    24 
    24 
    25 #include "itk.h"
    25 #include "itk.h"
    26 
    26 
    27 #include "config.h"
    27 #include "config.h"
    28 #include "testutils.h"
    28 #include "testutils.h"
       
    29 
       
    30 #include "std_log_result.h"
    29 
    31 
    30 // For testing custom analyzer
    32 // For testing custom analyzer
    31 #include "CLucene.h"
    33 #include "CLucene.h"
    32 #include "CLucene\analysis\AnalysisHeader.h"
    34 #include "CLucene\analysis\AnalysisHeader.h"
    33 #include "CLucene\util\stringreader.h"
    35 #include "CLucene\util\stringreader.h"
    64 }
    66 }
    65 
    67 
    66 
    68 
    67 void TestTokenization6(Itk::TestMgr * )
    69 void TestTokenization6(Itk::TestMgr * )
    68 {
    70 {
    69 	Cpix::AnalyzerExp::Tokenizer tokenizer; 
    71     char *xml_file = (char*)__FUNCTION__;
       
    72         assert_failed = 0;
       
    73     Cpix::AnalyzerExp::Tokenizer tokenizer; 
    70 	Tokens source(tokenizer, 
    74 	Tokens source(tokenizer, 
    71 		L"switch { "
    75 		L"switch { "
    72 		  L"case '_docuid', '_mimetype': keywords;"
    76 		  L"case '_docuid', '_mimetype': keywords;"
    73 		  L"case '_baseappclass':        whitespace>lowercase;"
    77 		  L"case '_baseappclass':        whitespace>lowercase;"
    74 		  L"default: 					 natural(en); "
    78 		  L"default: 					 natural(en); "
    75 		L"}");
    79 		L"}");
    76     WhiteSpaceFilter 
    80     WhiteSpaceFilter 
    77         tokens(source); 
    81         tokens(source); 
    78 
    82 
    79     while (tokens) PrintToken(tokens++); 
    83     while (tokens) PrintToken(tokens++);
       
    84     testResultXml(xml_file);
    80 }
    85 }
    81 
    86 
    82 void TestParsing(Itk::TestMgr* )
    87 void TestParsing(Itk::TestMgr* )
    83 { 
    88 { 
    84 	Cpix::AnalyzerExp::Tokenizer tokenizer; 
    89 	Cpix::AnalyzerExp::Tokenizer tokenizer; 
    85 	
    90     char *xml_file = (char*)__FUNCTION__;
       
    91         assert_failed = 0;
    86 	Tokens source(tokenizer, L"foobar(zap, foo, 'bar', 'a', raboof)");
    92 	Tokens source(tokenizer, L"foobar(zap, foo, 'bar', 'a', raboof)");
    87 	WhiteSpaceFilter tokens(source);
    93 	WhiteSpaceFilter tokens(source);
    88 	Lexer lexer(tokens);
    94 	Lexer lexer(tokens);
    89 
    95 
    90 	Tokens source2(tokenizer, L" stdtokens >lowercase>stopwords(fin)>stopwords('a', 'an','the')>stem(fin)  ");
    96 	Tokens source2(tokenizer, L" stdtokens >lowercase>stopwords(fin)>stopwords('a', 'an','the')>stem(fin)  ");
   110 		invoke = ParseInvokation(lexer3);
   116 		invoke = ParseInvokation(lexer3);
   111 		lexer3.eatEof(); 
   117 		lexer3.eatEof(); 
   112 		printf("Invoke identifier: %S\n", (invoke->id()).c_str()); 
   118 		printf("Invoke identifier: %S\n", (invoke->id()).c_str()); 
   113 		printf("%d parameters\n", invoke->params().size()); 
   119 		printf("%d parameters\n", invoke->params().size()); 
   114 	} catch (ParseException& e) {
   120 	} catch (ParseException& e) {
       
   121         assert_failed = 1;
   115 		printf("ParseException: %S\n", e.wWhat()); 
   122 		printf("ParseException: %S\n", e.wWhat()); 
   116 	} catch (LexException& e) {
   123 	} catch (LexException& e) {
       
   124         assert_failed = 1;	
   117 		printf("LexException: %S\n", e.wWhat()); 
   125 		printf("LexException: %S\n", e.wWhat()); 
   118 	}
   126 	}
       
   127 	testResultXml(xml_file);
   119 }
   128 }
   120 
   129 
   121 void TestSwitch(Itk::TestMgr* )
   130 void TestSwitch(Itk::TestMgr* )
   122 { 
   131 { 
   123 	Cpix::AnalyzerExp::Tokenizer tokenizer; 
   132 	Cpix::AnalyzerExp::Tokenizer tokenizer; 
   124 	
   133     char *xml_file = (char*)__FUNCTION__;
       
   134         assert_failed = 0;
   125 	const wchar_t* text; 
   135 	const wchar_t* text; 
   126 	Tokens source(tokenizer, text = 
   136 	Tokens source(tokenizer, text = 
   127 		L"switch { "
   137 		L"switch { "
   128 		  L"case '_docuid', '_mimetype': keywords;"
   138 		  L"case '_docuid', '_mimetype': keywords;"
   129 		  L"case '_baseappclass':        whitespace>lowercase;"
   139 		  L"case '_baseappclass':        whitespace>lowercase;"
   148 			}
   158 			}
   149 			printf("default: ...\n");//<<s->def().tokenizer().id()<<"...;";
   159 			printf("default: ...\n");//<<s->def().tokenizer().id()<<"...;";
   150 		}
   160 		}
   151 	} catch (ParseException& e) {
   161 	} catch (ParseException& e) {
   152 		// OBS wcout<<L"ParseException: "<<e.describe(text)<<endl; 
   162 		// OBS wcout<<L"ParseException: "<<e.describe(text)<<endl; 
       
   163         assert_failed = 1;
   153 		e.setContext(text);
   164 		e.setContext(text);
   154 		printf("ParseException: %S\n", e.wWhat()); 
   165 		printf("ParseException: %S\n", e.wWhat()); 
   155 	} catch (LexException& e) {
   166 	} catch (LexException& e) {
   156 		// OBS wcout<<L"LexException: "<<e.describe(text)<<endl; 
   167 		// OBS wcout<<L"LexException: "<<e.describe(text)<<endl; 
       
   168         assert_failed = 1;
   157 		e.setContext(text);
   169 		e.setContext(text);
   158 		printf("LexException: %S\n", e.wWhat()); 
   170 		printf("LexException: %S\n", e.wWhat()); 
   159 	}
   171 	}
       
   172 	testResultXml(xml_file);
   160 }
   173 }
   161 
   174 
   162 void TestParsingErrors(Itk::TestMgr* )
   175 void TestParsingErrors(Itk::TestMgr* )
   163 {
   176 {
       
   177     char *xml_file = (char*)__FUNCTION__;
       
   178             assert_failed = 0;
   164 	Cpix::AnalyzerExp::Tokenizer tokenizer; 
   179 	Cpix::AnalyzerExp::Tokenizer tokenizer; 
   165 	// eof
   180 	// eof
   166 	const wchar_t* text; 
   181 	const wchar_t* text; 
   167 	StdLexer eof(tokenizer, text = L"foobar(zap, foo, 'bar', 'raf', do, ");
   182 	StdLexer eof(tokenizer, text = L"foobar(zap, foo, 'bar', 'raf', do, ");
   168 	try {
   183 	try {
   209 	} catch (ParseException& e) {
   224 	} catch (ParseException& e) {
   210 		// OBS wcout<<L"ParseException: "<<e.describe(text)<<endl; 
   225 		// OBS wcout<<L"ParseException: "<<e.describe(text)<<endl; 
   211 		e.setContext(text);
   226 		e.setContext(text);
   212 		printf("ParseException: %S\n", e.wWhat()); 
   227 		printf("ParseException: %S\n", e.wWhat()); 
   213 	} 
   228 	} 
   214 
   229 	testResultXml(xml_file);
   215 }
   230 }
   216 
   231 
   217 
   232 
   218 const char * CustomAnalyzerTestDocs[] = {
   233 const char * CustomAnalyzerTestDocs[] = {
   219     FILE_TEST_CORPUS_PATH "\\en\\1.txt",
   234     FILE_TEST_CORPUS_PATH "\\en\\1.txt",
   263 	}
   278 	}
   264 }
   279 }
   265 
   280 
   266 void TestCustomAnalyzers(Itk::TestMgr * testMgr)
   281 void TestCustomAnalyzers(Itk::TestMgr * testMgr)
   267 {
   282 {
       
   283     char *xml_file = (char*)__FUNCTION__;
       
   284         assert_failed = 0;
   268 	TestCustomAnalyzer(testMgr, L"stdtokens");
   285 	TestCustomAnalyzer(testMgr, L"stdtokens");
   269 	TestCustomAnalyzer(testMgr, L"whitespace");
   286 	TestCustomAnalyzer(testMgr, L"whitespace");
   270 	TestCustomAnalyzer(testMgr, L"whitespace>lowercase");
   287 	TestCustomAnalyzer(testMgr, L"whitespace>lowercase");
   271 	TestCustomAnalyzer(testMgr, L"whitespace>accent");
   288 	TestCustomAnalyzer(testMgr, L"whitespace>accent");
   272 	TestCustomAnalyzer(testMgr, L"letter");
   289 	TestCustomAnalyzer(testMgr, L"letter");
   275 	TestCustomAnalyzer(testMgr, L"keyword>lowercase");
   292 	TestCustomAnalyzer(testMgr, L"keyword>lowercase");
   276 	TestCustomAnalyzer(testMgr, L"stdtokens>lowercase>accent>stem(en)"); 
   293 	TestCustomAnalyzer(testMgr, L"stdtokens>lowercase>accent>stem(en)"); 
   277 	TestCustomAnalyzer(testMgr, L"letter>lowercase>accent>stop(en)"); 
   294 	TestCustomAnalyzer(testMgr, L"letter>lowercase>accent>stop(en)"); 
   278 	TestCustomAnalyzer(testMgr, L"letter>lowercase>stop('i', 'oh', 'nyt', 'näin')"); 
   295 	TestCustomAnalyzer(testMgr, L"letter>lowercase>stop('i', 'oh', 'nyt', 'näin')"); 
   279 	TestCustomAnalyzer(testMgr, L"letter>length(2, 4)");
   296 	TestCustomAnalyzer(testMgr, L"letter>length(2, 4)");
       
   297 	testResultXml(xml_file);
   280 }
   298 }
   281 
   299 
   282 void TestAnalyzerWithField(Itk::TestMgr * , const wchar_t* definition, const wchar_t* field)
   300 void TestAnalyzerWithField(Itk::TestMgr * , const wchar_t* definition, const wchar_t* field)
   283 {
   301 {
   284 	using namespace lucene::analysis; 
   302 	using namespace lucene::analysis; 
   296 	_CLDELETE( stream ); 
   314 	_CLDELETE( stream ); 
   297 }
   315 }
   298 
   316 
   299 void TestSwitchAnalyzers(Itk::TestMgr * testMgr)
   317 void TestSwitchAnalyzers(Itk::TestMgr * testMgr)
   300 {
   318 {
   301 	const wchar_t* sw = L"\n"
   319     char *xml_file = (char*)__FUNCTION__;
       
   320         assert_failed = 0;
       
   321     const wchar_t* sw = L"\n"
   302 		L"switch {\n"
   322 		L"switch {\n"
   303 		L"    case '_docuid':          keyword;\n"
   323 		L"    case '_docuid':          keyword;\n"
   304 		L"    case '_appclass':        whitespace>lowercase;\n"
   324 		L"    case '_appclass':        whitespace>lowercase;\n"
   305 		L"    case 'title', 'message': stdtokens>accent>lowercase>stem(en)>stop(en);\n"
   325 		L"    case 'title', 'message': stdtokens>accent>lowercase>stem(en)>stop(en);\n"
   306 		L"    default:                 letter>lowercase>stop('i');\n"
   326 		L"    default:                 letter>lowercase>stop('i');\n"
   308 	TestAnalyzerWithField(testMgr, sw, L"_docuid");
   328 	TestAnalyzerWithField(testMgr, sw, L"_docuid");
   309 	TestAnalyzerWithField(testMgr, sw, L"_appclass");
   329 	TestAnalyzerWithField(testMgr, sw, L"_appclass");
   310 	TestAnalyzerWithField(testMgr, sw, L"Title"); 
   330 	TestAnalyzerWithField(testMgr, sw, L"Title"); 
   311 	TestAnalyzerWithField(testMgr, sw, L"message"); 
   331 	TestAnalyzerWithField(testMgr, sw, L"message"); 
   312 	TestAnalyzerWithField(testMgr, sw, L"field"); 
   332 	TestAnalyzerWithField(testMgr, sw, L"field"); 
       
   333 	testResultXml(xml_file);
   313 }
   334 }
   314 
   335 
   315 
   336 
   316 Itk::TesterBase * CreateAnalysisWhiteBoxTests()
   337 Itk::TesterBase * CreateAnalysisWhiteBoxTests()
   317 {
   338 {
   318     using namespace Itk;
   339     using namespace Itk;
   319 
   340 
   320     SuiteTester
   341     SuiteTester
   321         * analysisTests = new SuiteTester("whitebox");
   342         * analysisTests = new SuiteTester("analysiswhitebox");
   322     
   343     
   323     analysisTests->add("analyzer",
   344     analysisTests->add("analyzer",
   324 					   &TestCustomAnalyzers,
   345 					   &TestCustomAnalyzers,
   325 					   "analyzer");
   346 					   "analyzer");
   326     analysisTests->add("switchAnalyzer",
   347     analysisTests->add("switchanalyzer",
   327 					   &TestSwitchAnalyzers,
   348 					   &TestSwitchAnalyzers,
   328 					   "switchAnalyzer");
   349 					   "switchanalyzer");
   329     analysisTests->add("tokenization",
   350     analysisTests->add("tokenization",
   330     				   TestTokenization6,
   351     				   TestTokenization6,
   331     				   "tokenization");
   352     				   "tokenization");
   332   	analysisTests->add("parsing",
   353   	analysisTests->add("parsing",
   333                       TestParsing,
   354                       TestParsing,