searchengine/oss/loc/analysisunittest/src/evaluationtest.cpp
changeset 24 65456528cac2
equal deleted inserted replaced
23:d4d56f5e7c55 24:65456528cac2
       
     1 /*
       
     2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 #include "itk.h"
       
    19 
       
    20 #include "thaianalysis.h"
       
    21 
       
    22 #include "CLucene.h"
       
    23 
       
    24 #include <iostream>
       
    25 
       
    26 #include "evaluationtool.h"
       
    27 #include "analysisunittest.h"
       
    28 #include "CJKAnalyzer.h"
       
    29 #include "koreananalyzer.h"
       
    30 #include "ngram.h"
       
    31 #include "prefixfilter.h"
       
    32 
       
    33 #include "testutils.h"
       
    34 
       
    35 using namespace std; 
       
    36 using namespace analysis; 
       
    37 using namespace lucene::analysis; 
       
    38 using namespace evaluationtool; 
       
    39 
       
    40 
       
    41 
       
    42 void doEvaluate(Itk::TestMgr* testMgr, Analyzer& analyzer, const char* testName, const char* corpusFile, const char* evalFile, Analyzer* queryAnalyzer = NULL, Analyzer* prefixAnalyzer = NULL ) 
       
    43 {
       
    44 	Corpus corpus(corpusFile);
       
    45 	PreparedCorpus prepared(corpus, analyzer, queryAnalyzer, prefixAnalyzer);
       
    46 	EvaluationRecord record(evalFile);
       
    47 	
       
    48 	int failed = 0; 
       
    49 	int improved = 0;
       
    50 	
       
    51 	int timeMs = 0;
       
    52 	
       
    53 	for (int i = 0; i < record.length(); i++)
       
    54 	{
       
    55 		const wchar_t* query = record.query(i);
       
    56 		
       
    57 		if (!*query) continue; // skip empty queries
       
    58 
       
    59 		Results& ideal = record.ideal(i); 
       
    60 		Results& java = record.measured(i); // results for Java implementation
       
    61 		
       
    62 		Itk::Timestamp begin;
       
    63 		Itk::getTimestamp(&begin);
       
    64 		
       
    65 		Results results(prepared, query);
       
    66 		
       
    67 		Itk::Timestamp end;
       
    68 		Itk::getTimestamp(&end);
       
    69 
       
    70 		Evaluation control( ideal, java );
       
    71 		Evaluation eval( ideal, results );
       
    72 		
       
    73 		timeMs += Itk::getElapsedMs(&end, &begin); 
       
    74 		
       
    75 		wprintf(L"Q '%S' - ", query);
       
    76 		printTokens(queryAnalyzer?*queryAnalyzer:analyzer, query);
       
    77 	
       
    78 		wprintf(L"i:");
       
    79 		for (int i = 0; i < results.length(); i++) {
       
    80 			if (ideal.hit(i)) {
       
    81 				wprintf(L"X");
       
    82 			} else {
       
    83 				wprintf(L".");
       
    84 			}
       
    85 		}
       
    86 		wprintf(L"\n");
       
    87 		
       
    88 		wprintf(L"j:");
       
    89 		for (int i = 0; i < results.length(); i++) {
       
    90 			if (java.hit(i)) {
       
    91 				wprintf(L"X");
       
    92 			} else {
       
    93 				wprintf(L".");
       
    94 			}
       
    95 		}
       
    96 		wprintf(L"\n"); 
       
    97 
       
    98 		wprintf(L"c:");
       
    99 		for (int i = 0; i < results.length(); i++) {
       
   100 			if (eval.error(i) && !control.error(i)) {
       
   101 				wprintf(L"!");
       
   102 			} else if (!eval.error(i) && control.error(i)){
       
   103 				wprintf(L"+"); 
       
   104 			} else {
       
   105 				wprintf(L"."); 
       
   106 			}
       
   107 		}
       
   108 		wprintf(L"\n"); 
       
   109 		if ( eval.errors() == control.errors() ) {
       
   110 			wprintf(L"ok\n"); 
       
   111 		} else if ( eval.errors() < control.errors() ) {
       
   112 			wprintf(L"improved\n");
       
   113 			improved++; 
       
   114 		} else {
       
   115 			wprintf(L"more errors!\n");
       
   116 			failed++; 
       
   117 		}
       
   118 		wprintf(L"\n"); 
       
   119 	}
       
   120 	
       
   121     wprintf(L"Index size was %d KB\n", prepared.indexSize() / 1000);
       
   122 	wprintf(L"Improved in %d / %d\n", improved, record.length());
       
   123 	wprintf(L"Deteriorated in %d / %d\n", failed, record.length());
       
   124 
       
   125 	
       
   126 	std::string title;
       
   127 	title += testName; title += " search time";
       
   128     ITK_REPORT( testMgr, title.c_str(), "%d ms / query", timeMs / record.length());
       
   129     
       
   130     title = testName; title += " index size";
       
   131     ITK_REPORT( testMgr, title.c_str(), "%d KB", prepared.indexSize() / 1000);
       
   132 }
       
   133 
       
   134 void ThaiEvaluation(Itk::TestMgr* testMgr) 
       
   135 {
       
   136 	ThaiAnalyzer analyzer;
       
   137 	doEvaluate(testMgr, analyzer, "thai", CORPUS_DIR "thai/corpus.txt", CORPUS_DIR "thai/eval.txt");
       
   138 }
       
   139 
       
   140 void GalicianEvaluation(Itk::TestMgr* testMgr) {	
       
   141 //	GalicianAnalyzer analyzer;
       
   142 	standard::StandardAnalyzer analyzer;
       
   143 	doEvaluate(testMgr, analyzer, "galician", CORPUS_DIR "galician/corpus.txt", CORPUS_DIR "galician/eval.txt");
       
   144 }
       
   145 
       
   146 void KoreanCjkEvaluation(Itk::TestMgr* testMgr) {	
       
   147 	cjk::CJKAnalyzer analyzer;
       
   148 	doEvaluate(testMgr, analyzer, "korean_cjk", CORPUS_DIR "korean/corpus.txt", CORPUS_DIR "korean/eval.txt");
       
   149 }
       
   150 
       
   151 void KoreanBigramEvaluation(Itk::TestMgr* testMgr) {	
       
   152 	CjkNGramAnalyzer analyzer(2);
       
   153 	doEvaluate(testMgr, analyzer, "korean_2gram", CORPUS_DIR "korean/corpus.txt", CORPUS_DIR "korean/eval.txt");
       
   154 }
       
   155 
       
   156 void KoreanUnigramEvaluation(Itk::TestMgr* testMgr) {
       
   157     CjkNGramAnalyzer analyzer(1);
       
   158     doEvaluate(testMgr, analyzer, "korean_1gram", CORPUS_DIR "korean/corpus.txt", CORPUS_DIR "korean/eval.txt");
       
   159 }
       
   160 
       
   161 void KoreanJamuUnigramEvaluation(Itk::TestMgr* testMgr) {  
       
   162     JamuNGramAnalyzer analyzer(1);
       
   163     doEvaluate(testMgr, analyzer, "jamu_1gram", CORPUS_DIR "korean/corpus.txt", CORPUS_DIR "korean/eval.txt");
       
   164 }
       
   165 
       
   166 void KoreanJamuBigramEvaluation(Itk::TestMgr* testMgr) {    
       
   167     JamuNGramAnalyzer analyzer(2);
       
   168     doEvaluate(testMgr, analyzer, "jamu_2gram", CORPUS_DIR "korean/corpus.txt", CORPUS_DIR "korean/eval.txt");
       
   169 }
       
   170 
       
   171 void KoreanEvaluation(Itk::TestMgr* testMgr) {    
       
   172     KoreanAnalyzer analyzer;
       
   173     KoreanQueryAnalyzer queryAnalyzer;
       
   174     doEvaluate(testMgr, analyzer, "korean", CORPUS_DIR "korean/corpus.txt", CORPUS_DIR "korean/eval.txt", &queryAnalyzer);
       
   175 }
       
   176 
       
   177 void ChineseBigramEvaluation(Itk::TestMgr* testMgr) {	
       
   178 	CjkNGramAnalyzer analyzer(2);
       
   179 	doEvaluate(testMgr, analyzer, "chinese_2gram", CORPUS_DIR "chinese_prc/corpus.txt", CORPUS_DIR "chinese_prc/eval.txt");
       
   180 }
       
   181 
       
   182 void ChineseUnigramEvaluation(Itk::TestMgr* testMgr) {
       
   183 	CjkNGramAnalyzer analyzer(1);
       
   184 	doEvaluate(testMgr, analyzer, "chinese_1gram", CORPUS_DIR "chinese_prc/corpus.txt", CORPUS_DIR "chinese_prc/eval.txt");
       
   185 }
       
   186 
       
   187 void HebrewEvaluation(Itk::TestMgr* testMgr) {    
       
   188     HebrewAnalyzer analyzer;
       
   189     HebrewQueryAnalyzer queryAnalyzer;
       
   190     doEvaluate(testMgr, analyzer, "hebrew", CORPUS_DIR "hebrew/corpus.txt", CORPUS_DIR "hebrew/eval.txt", &queryAnalyzer, &queryAnalyzer);
       
   191 }
       
   192 
       
   193 void FrenchEvaluation(Itk::TestMgr* testMgr) {    
       
   194 	FrenchAnalyzer analyzer;
       
   195     doEvaluate(testMgr, analyzer, "french", CORPUS_DIR "french/corpus.txt", CORPUS_DIR "french/eval.txt", &analyzer, &analyzer);
       
   196 }
       
   197 
       
   198 Itk::TesterBase * CreateEvaluationTest() 
       
   199 {
       
   200 	using namespace Itk;
       
   201 	
       
   202 	SuiteTester
       
   203 		* testSuite = 
       
   204 			new SuiteTester( "evaluation" );
       
   205 	
       
   206 	testSuite->add( "thai", ThaiEvaluation, "thai" );
       
   207 	testSuite->add( "galician", GalicianEvaluation, "galician" );
       
   208 	testSuite->add( "korean_cjk", KoreanCjkEvaluation, "korean_cjk" );
       
   209     testSuite->add( "korean", KoreanEvaluation, "korean" );
       
   210 	testSuite->add( "korean_1gram", KoreanUnigramEvaluation, "korean_1gram" );
       
   211 	testSuite->add( "korean_2gram", KoreanBigramEvaluation, "korean_2gram" );
       
   212 	testSuite->add( "chinese_1gram", ChineseUnigramEvaluation, "chinese_1gram" );
       
   213 	testSuite->add( "chinese_2gram", ChineseBigramEvaluation, "chinese_2gram" );
       
   214 
       
   215     testSuite->add( "jamu_1gram", KoreanJamuUnigramEvaluation, "jamu_1gram" );
       
   216     testSuite->add( "jamu_2gram", KoreanJamuBigramEvaluation, "jamu_2gram" );
       
   217 
       
   218     testSuite->add( "hebrew", HebrewEvaluation, "hebrew" );
       
   219     testSuite->add( "french", FrenchEvaluation, "french" );
       
   220 
       
   221 	return testSuite;
       
   222 }
       
   223