searchengine/oss/loc/analysisunittest/inc/evaluationtool.h
author hgs
Fri, 15 Oct 2010 12:09:28 +0530
changeset 24 65456528cac2
permissions -rw-r--r--
201041
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
24
hgs
parents:
diff changeset
     1
/*
hgs
parents:
diff changeset
     2
* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
hgs
parents:
diff changeset
     3
* All rights reserved.
hgs
parents:
diff changeset
     4
* This component and the accompanying materials are made available
hgs
parents:
diff changeset
     5
* under the terms of "Eclipse Public License v1.0"
hgs
parents:
diff changeset
     6
* which accompanies this distribution, and is available
hgs
parents:
diff changeset
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
hgs
parents:
diff changeset
     8
*
hgs
parents:
diff changeset
     9
* Initial Contributors:
hgs
parents:
diff changeset
    10
* Nokia Corporation - initial contribution.
hgs
parents:
diff changeset
    11
*
hgs
parents:
diff changeset
    12
* Contributors:
hgs
parents:
diff changeset
    13
*
hgs
parents:
diff changeset
    14
* Description: 
hgs
parents:
diff changeset
    15
*
hgs
parents:
diff changeset
    16
*/
hgs
parents:
diff changeset
    17
hgs
parents:
diff changeset
    18
#ifndef EVALUATIONTOOL_H_
hgs
parents:
diff changeset
    19
#define EVALUATIONTOOL_H_
hgs
parents:
diff changeset
    20
hgs
parents:
diff changeset
    21
#include <vector>
hgs
parents:
diff changeset
    22
#include <memory>
hgs
parents:
diff changeset
    23
#include <string>
hgs
parents:
diff changeset
    24
#include <bitset>
hgs
parents:
diff changeset
    25
hgs
parents:
diff changeset
    26
#include "cpixmemtools.h"
hgs
parents:
diff changeset
    27
hgs
parents:
diff changeset
    28
namespace lucene {
hgs
parents:
diff changeset
    29
	namespace analysis {
hgs
parents:
diff changeset
    30
		class Analyzer;
hgs
parents:
diff changeset
    31
	}
hgs
parents:
diff changeset
    32
	namespace store {
hgs
parents:
diff changeset
    33
		class FSDirectory;
hgs
parents:
diff changeset
    34
	}
hgs
parents:
diff changeset
    35
	namespace search {
hgs
parents:
diff changeset
    36
		class IndexSearcher;
hgs
parents:
diff changeset
    37
	}
hgs
parents:
diff changeset
    38
	namespace queryParser {
hgs
parents:
diff changeset
    39
		class QueryParser;
hgs
parents:
diff changeset
    40
	}
hgs
parents:
diff changeset
    41
	namespace util {
hgs
parents:
diff changeset
    42
		class Reader;
hgs
parents:
diff changeset
    43
	}
hgs
parents:
diff changeset
    44
}
hgs
parents:
diff changeset
    45
hgs
parents:
diff changeset
    46
namespace evaluationtool 
hgs
parents:
diff changeset
    47
{
hgs
parents:
diff changeset
    48
	
hgs
parents:
diff changeset
    49
	/**
hgs
parents:
diff changeset
    50
	 * Loads and provides access to a corpus in foreign language
hgs
parents:
diff changeset
    51
	 */
hgs
parents:
diff changeset
    52
	class Corpus 
hgs
parents:
diff changeset
    53
	{
hgs
parents:
diff changeset
    54
		public:
hgs
parents:
diff changeset
    55
			Corpus(const char* file);
hgs
parents:
diff changeset
    56
			
hgs
parents:
diff changeset
    57
			const wchar_t* operator[](int i);
hgs
parents:
diff changeset
    58
			
hgs
parents:
diff changeset
    59
			int size(); 
hgs
parents:
diff changeset
    60
		
hgs
parents:
diff changeset
    61
		private: 
hgs
parents:
diff changeset
    62
		
hgs
parents:
diff changeset
    63
			std::vector<std::wstring> lines_; 
hgs
parents:
diff changeset
    64
	};
hgs
parents:
diff changeset
    65
	
hgs
parents:
diff changeset
    66
#define MAXLINES 1024
hgs
parents:
diff changeset
    67
hgs
parents:
diff changeset
    68
	/**
hgs
parents:
diff changeset
    69
	 * Contains the corpus in indexed form so that it can be searched
hgs
parents:
diff changeset
    70
	 */
hgs
parents:
diff changeset
    71
	class PreparedCorpus 
hgs
parents:
diff changeset
    72
	{
hgs
parents:
diff changeset
    73
	public:
hgs
parents:
diff changeset
    74
		
hgs
parents:
diff changeset
    75
		PreparedCorpus( Corpus& corpus, 
hgs
parents:
diff changeset
    76
				        lucene::analysis::Analyzer& analyzer, 
hgs
parents:
diff changeset
    77
				        lucene::analysis::Analyzer* queryAnalyzer = NULL,
hgs
parents:
diff changeset
    78
				        lucene::analysis::Analyzer* prefixAnalyzer = NULL );
hgs
parents:
diff changeset
    79
		
hgs
parents:
diff changeset
    80
		int size();
hgs
parents:
diff changeset
    81
		
hgs
parents:
diff changeset
    82
		int indexSize();
hgs
parents:
diff changeset
    83
		
hgs
parents:
diff changeset
    84
		void search(const wchar_t* query, std::bitset<MAXLINES>& hits ); 
hgs
parents:
diff changeset
    85
hgs
parents:
diff changeset
    86
	public: 
hgs
parents:
diff changeset
    87
		
hgs
parents:
diff changeset
    88
		int size_;
hgs
parents:
diff changeset
    89
		
hgs
parents:
diff changeset
    90
		std::auto_ptr<lucene::store::FSDirectory> dir_;
hgs
parents:
diff changeset
    91
		
hgs
parents:
diff changeset
    92
		std::auto_ptr<lucene::queryParser::QueryParser> queryParser_;
hgs
parents:
diff changeset
    93
		
hgs
parents:
diff changeset
    94
		std::auto_ptr<lucene::search::IndexSearcher> searcher_;
hgs
parents:
diff changeset
    95
		
hgs
parents:
diff changeset
    96
		lucene::analysis::Analyzer* prefixAnalyzer_; 
hgs
parents:
diff changeset
    97
	};
hgs
parents:
diff changeset
    98
	
hgs
parents:
diff changeset
    99
	class Results 
hgs
parents:
diff changeset
   100
		{
hgs
parents:
diff changeset
   101
		public: 
hgs
parents:
diff changeset
   102
			Results();
hgs
parents:
diff changeset
   103
		
hgs
parents:
diff changeset
   104
			Results(std::bitset<MAXLINES>& hits, int lines);
hgs
parents:
diff changeset
   105
			
hgs
parents:
diff changeset
   106
			Results(PreparedCorpus& corpus, 
hgs
parents:
diff changeset
   107
					const wchar_t* query);
hgs
parents:
diff changeset
   108
		
hgs
parents:
diff changeset
   109
			bool hit(int i);
hgs
parents:
diff changeset
   110
			
hgs
parents:
diff changeset
   111
			void append(bool hit);
hgs
parents:
diff changeset
   112
						
hgs
parents:
diff changeset
   113
			int length(); 
hgs
parents:
diff changeset
   114
			
hgs
parents:
diff changeset
   115
		private:
hgs
parents:
diff changeset
   116
			
hgs
parents:
diff changeset
   117
			std::bitset<MAXLINES> hits_;
hgs
parents:
diff changeset
   118
			
hgs
parents:
diff changeset
   119
			int lines_; 
hgs
parents:
diff changeset
   120
			
hgs
parents:
diff changeset
   121
		};
hgs
parents:
diff changeset
   122
	
hgs
parents:
diff changeset
   123
	/**
hgs
parents:
diff changeset
   124
	 * An entry containing the ideal and measured results for a query.
hgs
parents:
diff changeset
   125
	 */
hgs
parents:
diff changeset
   126
	class EvaluationRecordEntry {
hgs
parents:
diff changeset
   127
	public:
hgs
parents:
diff changeset
   128
		EvaluationRecordEntry(const wchar_t* query, 
hgs
parents:
diff changeset
   129
							  Results& ideal, 
hgs
parents:
diff changeset
   130
							  Results& measured);
hgs
parents:
diff changeset
   131
		
hgs
parents:
diff changeset
   132
		EvaluationRecordEntry(lucene::util::Reader& reader);
hgs
parents:
diff changeset
   133
		
hgs
parents:
diff changeset
   134
		EvaluationRecordEntry();
hgs
parents:
diff changeset
   135
		
hgs
parents:
diff changeset
   136
		std::wstring query_; 
hgs
parents:
diff changeset
   137
		Results ideal_; 
hgs
parents:
diff changeset
   138
		Results measured_; 
hgs
parents:
diff changeset
   139
	};
hgs
parents:
diff changeset
   140
	
hgs
parents:
diff changeset
   141
	/** 
hgs
parents:
diff changeset
   142
	 * Contains ideal results and measured results for all queries. 
hgs
parents:
diff changeset
   143
	 */
hgs
parents:
diff changeset
   144
	class EvaluationRecord  {
hgs
parents:
diff changeset
   145
		public: 
hgs
parents:
diff changeset
   146
	
hgs
parents:
diff changeset
   147
			EvaluationRecord(const char* file);
hgs
parents:
diff changeset
   148
			
hgs
parents:
diff changeset
   149
			int length();
hgs
parents:
diff changeset
   150
			
hgs
parents:
diff changeset
   151
			const wchar_t* query(int i); 
hgs
parents:
diff changeset
   152
			
hgs
parents:
diff changeset
   153
			Results& ideal(int i); 
hgs
parents:
diff changeset
   154
			
hgs
parents:
diff changeset
   155
			Results& measured(int i); 
hgs
parents:
diff changeset
   156
		
hgs
parents:
diff changeset
   157
		private:
hgs
parents:
diff changeset
   158
			
hgs
parents:
diff changeset
   159
			std::vector<EvaluationRecordEntry> entries_; 
hgs
parents:
diff changeset
   160
		
hgs
parents:
diff changeset
   161
	};
hgs
parents:
diff changeset
   162
	
hgs
parents:
diff changeset
   163
	/**
hgs
parents:
diff changeset
   164
	 * Provides information of how the measured search compared
hgs
parents:
diff changeset
   165
	 * to the ideal one. 
hgs
parents:
diff changeset
   166
	 */
hgs
parents:
diff changeset
   167
	class Evaluation
hgs
parents:
diff changeset
   168
	{
hgs
parents:
diff changeset
   169
		public:
hgs
parents:
diff changeset
   170
		
hgs
parents:
diff changeset
   171
			Evaluation(Results& ideal, Results& measured);
hgs
parents:
diff changeset
   172
hgs
parents:
diff changeset
   173
			bool falsePositive(int line);
hgs
parents:
diff changeset
   174
hgs
parents:
diff changeset
   175
			bool falseNegative(int line);
hgs
parents:
diff changeset
   176
hgs
parents:
diff changeset
   177
			bool error(int line);
hgs
parents:
diff changeset
   178
		
hgs
parents:
diff changeset
   179
			int errors();
hgs
parents:
diff changeset
   180
			
hgs
parents:
diff changeset
   181
			int falsePositives(); 
hgs
parents:
diff changeset
   182
			
hgs
parents:
diff changeset
   183
			int falseNegatives(); 
hgs
parents:
diff changeset
   184
			
hgs
parents:
diff changeset
   185
		
hgs
parents:
diff changeset
   186
		private: 
hgs
parents:
diff changeset
   187
		
hgs
parents:
diff changeset
   188
			Results& ideal_; 
hgs
parents:
diff changeset
   189
	
hgs
parents:
diff changeset
   190
			Results& measured_; 
hgs
parents:
diff changeset
   191
			
hgs
parents:
diff changeset
   192
	};
hgs
parents:
diff changeset
   193
hgs
parents:
diff changeset
   194
}
hgs
parents:
diff changeset
   195
hgs
parents:
diff changeset
   196
#endif /* EVALUATIONTOOL_H_ */