|
1 /* |
|
2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 #ifndef EVALUATIONTOOL_H_ |
|
19 #define EVALUATIONTOOL_H_ |
|
20 |
|
21 #include <vector> |
|
22 #include <memory> |
|
23 #include <string> |
|
24 #include <bitset> |
|
25 |
|
26 #include "cpixmemtools.h" |
|
27 |
|
28 namespace lucene { |
|
29 namespace analysis { |
|
30 class Analyzer; |
|
31 } |
|
32 namespace store { |
|
33 class FSDirectory; |
|
34 } |
|
35 namespace search { |
|
36 class IndexSearcher; |
|
37 } |
|
38 namespace queryParser { |
|
39 class QueryParser; |
|
40 } |
|
41 namespace util { |
|
42 class Reader; |
|
43 } |
|
44 } |
|
45 |
|
46 namespace evaluationtool |
|
47 { |
|
48 |
|
49 /** |
|
50 * Loads and provides access to a corpus in foreign language |
|
51 */ |
|
52 class Corpus |
|
53 { |
|
54 public: |
|
55 Corpus(const char* file); |
|
56 |
|
57 const wchar_t* operator[](int i); |
|
58 |
|
59 int size(); |
|
60 |
|
61 private: |
|
62 |
|
63 std::vector<std::wstring> lines_; |
|
64 }; |
|
65 |
|
66 #define MAXLINES 1024 |
|
67 |
|
68 /** |
|
69 * Contains the corpus in indexed form so that it can be searched |
|
70 */ |
|
71 class PreparedCorpus |
|
72 { |
|
73 public: |
|
74 |
|
75 PreparedCorpus( Corpus& corpus, |
|
76 lucene::analysis::Analyzer& analyzer, |
|
77 lucene::analysis::Analyzer* queryAnalyzer = NULL, |
|
78 lucene::analysis::Analyzer* prefixAnalyzer = NULL ); |
|
79 |
|
80 int size(); |
|
81 |
|
82 int indexSize(); |
|
83 |
|
84 void search(const wchar_t* query, std::bitset<MAXLINES>& hits ); |
|
85 |
|
86 public: |
|
87 |
|
88 int size_; |
|
89 |
|
90 std::auto_ptr<lucene::store::FSDirectory> dir_; |
|
91 |
|
92 std::auto_ptr<lucene::queryParser::QueryParser> queryParser_; |
|
93 |
|
94 std::auto_ptr<lucene::search::IndexSearcher> searcher_; |
|
95 |
|
96 lucene::analysis::Analyzer* prefixAnalyzer_; |
|
97 }; |
|
98 |
|
99 class Results |
|
100 { |
|
101 public: |
|
102 Results(); |
|
103 |
|
104 Results(std::bitset<MAXLINES>& hits, int lines); |
|
105 |
|
106 Results(PreparedCorpus& corpus, |
|
107 const wchar_t* query); |
|
108 |
|
109 bool hit(int i); |
|
110 |
|
111 void append(bool hit); |
|
112 |
|
113 int length(); |
|
114 |
|
115 private: |
|
116 |
|
117 std::bitset<MAXLINES> hits_; |
|
118 |
|
119 int lines_; |
|
120 |
|
121 }; |
|
122 |
|
123 /** |
|
124 * An entry containing the ideal and measured results for a query. |
|
125 */ |
|
126 class EvaluationRecordEntry { |
|
127 public: |
|
128 EvaluationRecordEntry(const wchar_t* query, |
|
129 Results& ideal, |
|
130 Results& measured); |
|
131 |
|
132 EvaluationRecordEntry(lucene::util::Reader& reader); |
|
133 |
|
134 EvaluationRecordEntry(); |
|
135 |
|
136 std::wstring query_; |
|
137 Results ideal_; |
|
138 Results measured_; |
|
139 }; |
|
140 |
|
141 /** |
|
142 * Contains ideal results and measured results for all queries. |
|
143 */ |
|
144 class EvaluationRecord { |
|
145 public: |
|
146 |
|
147 EvaluationRecord(const char* file); |
|
148 |
|
149 int length(); |
|
150 |
|
151 const wchar_t* query(int i); |
|
152 |
|
153 Results& ideal(int i); |
|
154 |
|
155 Results& measured(int i); |
|
156 |
|
157 private: |
|
158 |
|
159 std::vector<EvaluationRecordEntry> entries_; |
|
160 |
|
161 }; |
|
162 |
|
163 /** |
|
164 * Provides information of how the measured search compared |
|
165 * to the ideal one. |
|
166 */ |
|
167 class Evaluation |
|
168 { |
|
169 public: |
|
170 |
|
171 Evaluation(Results& ideal, Results& measured); |
|
172 |
|
173 bool falsePositive(int line); |
|
174 |
|
175 bool falseNegative(int line); |
|
176 |
|
177 bool error(int line); |
|
178 |
|
179 int errors(); |
|
180 |
|
181 int falsePositives(); |
|
182 |
|
183 int falseNegatives(); |
|
184 |
|
185 |
|
186 private: |
|
187 |
|
188 Results& ideal_; |
|
189 |
|
190 Results& measured_; |
|
191 |
|
192 }; |
|
193 |
|
194 } |
|
195 |
|
196 #endif /* EVALUATIONTOOL_H_ */ |