searchengine/oss/loc/analysisunittest/inc/evaluationtool.h
changeset 24 65456528cac2
equal deleted inserted replaced
23:d4d56f5e7c55 24:65456528cac2
       
     1 /*
       
     2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 #ifndef EVALUATIONTOOL_H_
       
    19 #define EVALUATIONTOOL_H_
       
    20 
       
    21 #include <vector>
       
    22 #include <memory>
       
    23 #include <string>
       
    24 #include <bitset>
       
    25 
       
    26 #include "cpixmemtools.h"
       
    27 
       
    28 namespace lucene {
       
    29 	namespace analysis {
       
    30 		class Analyzer;
       
    31 	}
       
    32 	namespace store {
       
    33 		class FSDirectory;
       
    34 	}
       
    35 	namespace search {
       
    36 		class IndexSearcher;
       
    37 	}
       
    38 	namespace queryParser {
       
    39 		class QueryParser;
       
    40 	}
       
    41 	namespace util {
       
    42 		class Reader;
       
    43 	}
       
    44 }
       
    45 
       
    46 namespace evaluationtool 
       
    47 {
       
    48 	
       
    49 	/**
       
    50 	 * Loads and provides access to a corpus in foreign language
       
    51 	 */
       
    52 	class Corpus 
       
    53 	{
       
    54 		public:
       
    55 			Corpus(const char* file);
       
    56 			
       
    57 			const wchar_t* operator[](int i);
       
    58 			
       
    59 			int size(); 
       
    60 		
       
    61 		private: 
       
    62 		
       
    63 			std::vector<std::wstring> lines_; 
       
    64 	};
       
    65 	
       
    66 #define MAXLINES 1024
       
    67 
       
    68 	/**
       
    69 	 * Contains the corpus in indexed form so that it can be searched
       
    70 	 */
       
    71 	class PreparedCorpus 
       
    72 	{
       
    73 	public:
       
    74 		
       
    75 		PreparedCorpus( Corpus& corpus, 
       
    76 				        lucene::analysis::Analyzer& analyzer, 
       
    77 				        lucene::analysis::Analyzer* queryAnalyzer = NULL,
       
    78 				        lucene::analysis::Analyzer* prefixAnalyzer = NULL );
       
    79 		
       
    80 		int size();
       
    81 		
       
    82 		int indexSize();
       
    83 		
       
    84 		void search(const wchar_t* query, std::bitset<MAXLINES>& hits ); 
       
    85 
       
    86 	public: 
       
    87 		
       
    88 		int size_;
       
    89 		
       
    90 		std::auto_ptr<lucene::store::FSDirectory> dir_;
       
    91 		
       
    92 		std::auto_ptr<lucene::queryParser::QueryParser> queryParser_;
       
    93 		
       
    94 		std::auto_ptr<lucene::search::IndexSearcher> searcher_;
       
    95 		
       
    96 		lucene::analysis::Analyzer* prefixAnalyzer_; 
       
    97 	};
       
    98 	
       
    99 	class Results 
       
   100 		{
       
   101 		public: 
       
   102 			Results();
       
   103 		
       
   104 			Results(std::bitset<MAXLINES>& hits, int lines);
       
   105 			
       
   106 			Results(PreparedCorpus& corpus, 
       
   107 					const wchar_t* query);
       
   108 		
       
   109 			bool hit(int i);
       
   110 			
       
   111 			void append(bool hit);
       
   112 						
       
   113 			int length(); 
       
   114 			
       
   115 		private:
       
   116 			
       
   117 			std::bitset<MAXLINES> hits_;
       
   118 			
       
   119 			int lines_; 
       
   120 			
       
   121 		};
       
   122 	
       
   123 	/**
       
   124 	 * An entry containing the ideal and measured results for a query.
       
   125 	 */
       
   126 	class EvaluationRecordEntry {
       
   127 	public:
       
   128 		EvaluationRecordEntry(const wchar_t* query, 
       
   129 							  Results& ideal, 
       
   130 							  Results& measured);
       
   131 		
       
   132 		EvaluationRecordEntry(lucene::util::Reader& reader);
       
   133 		
       
   134 		EvaluationRecordEntry();
       
   135 		
       
   136 		std::wstring query_; 
       
   137 		Results ideal_; 
       
   138 		Results measured_; 
       
   139 	};
       
   140 	
       
   141 	/** 
       
   142 	 * Contains ideal results and measured results for all queries. 
       
   143 	 */
       
   144 	class EvaluationRecord  {
       
   145 		public: 
       
   146 	
       
   147 			EvaluationRecord(const char* file);
       
   148 			
       
   149 			int length();
       
   150 			
       
   151 			const wchar_t* query(int i); 
       
   152 			
       
   153 			Results& ideal(int i); 
       
   154 			
       
   155 			Results& measured(int i); 
       
   156 		
       
   157 		private:
       
   158 			
       
   159 			std::vector<EvaluationRecordEntry> entries_; 
       
   160 		
       
   161 	};
       
   162 	
       
   163 	/**
       
   164 	 * Provides information of how the measured search compared
       
   165 	 * to the ideal one. 
       
   166 	 */
       
   167 	class Evaluation
       
   168 	{
       
   169 		public:
       
   170 		
       
   171 			Evaluation(Results& ideal, Results& measured);
       
   172 
       
   173 			bool falsePositive(int line);
       
   174 
       
   175 			bool falseNegative(int line);
       
   176 
       
   177 			bool error(int line);
       
   178 		
       
   179 			int errors();
       
   180 			
       
   181 			int falsePositives(); 
       
   182 			
       
   183 			int falseNegatives(); 
       
   184 			
       
   185 		
       
   186 		private: 
       
   187 		
       
   188 			Results& ideal_; 
       
   189 	
       
   190 			Results& measured_; 
       
   191 			
       
   192 	};
       
   193 
       
   194 }
       
   195 
       
   196 #endif /* EVALUATIONTOOL_H_ */