|
1 /* |
|
2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 #include "cpixhits.h" |
|
20 #include "idxdb.h" |
|
21 #include "cpixexc.h" |
|
22 #include "iqrytype.h" |
|
23 #include "document.h" |
|
24 |
|
25 #include "common/cpixlog.h" |
|
26 |
|
27 namespace Cpix |
|
28 { |
|
29 |
|
30 /** |
|
31 * Interface IHits |
|
32 */ |
|
33 IHits::~IHits() |
|
34 { |
|
35 ; |
|
36 } |
|
37 |
|
38 |
|
39 void HitsBase::destroyWrappers() |
|
40 { |
|
41 WrapperMap::iterator |
|
42 i = wrappers_.begin(), |
|
43 end = wrappers_.end(); |
|
44 |
|
45 for (; i != end; ++i) |
|
46 { |
|
47 delete i->second; |
|
48 } |
|
49 |
|
50 wrappers_.clear(); |
|
51 } |
|
52 |
|
53 |
|
54 HitsBase::HitsBase(bool docsOwnedByClucene) |
|
55 : docsOwnedByClucene_(docsOwnedByClucene) |
|
56 { |
|
57 ; |
|
58 } |
|
59 |
|
60 |
|
61 void HitsBase::wrapDocument(int32_t index, |
|
62 lucene::document::Document * doc) |
|
63 { |
|
64 if (wrappers_.find(index) != wrappers_.end()) |
|
65 { |
|
66 delete wrappers_[index]; |
|
67 wrappers_[index] = NULL; |
|
68 } |
|
69 |
|
70 Cpix::Document |
|
71 * cpixDoc = new Cpix::Document(doc, |
|
72 docsOwnedByClucene_); |
|
73 wrappers_[index] = cpixDoc; |
|
74 } |
|
75 |
|
76 |
|
77 Cpix::Document * HitsBase::getDocument(int32_t index) |
|
78 { |
|
79 Cpix::Document |
|
80 * rv = NULL; |
|
81 |
|
82 if (wrappers_.find(index) == wrappers_.end()) |
|
83 { |
|
84 getDocument_(index); |
|
85 } |
|
86 |
|
87 rv = wrappers_[index]; |
|
88 |
|
89 return rv; |
|
90 } |
|
91 |
|
92 |
|
93 |
|
94 /** |
|
95 * Class ClhDocumentConsumer |
|
96 */ |
|
97 int32_t ClhDocumentConsumer::beginIndex() const |
|
98 { |
|
99 return beginIndex_; |
|
100 } |
|
101 |
|
102 |
|
103 int32_t ClhDocumentConsumer::endIndex() const |
|
104 { |
|
105 return endIndex_; |
|
106 } |
|
107 |
|
108 |
|
109 void ClhDocumentConsumer::useDocument(int32_t index, |
|
110 lucene::document::Document * document) |
|
111 { |
|
112 clHits_.wrapDocument(index, |
|
113 document); |
|
114 } |
|
115 |
|
116 |
|
117 void ClhDocumentConsumer::failedDocument(int32_t index, |
|
118 int clErrorNumber) |
|
119 { |
|
120 failedDocErrors_[index - beginIndex_] = clErrorNumber; |
|
121 } |
|
122 |
|
123 |
|
124 ClhDocumentConsumer::ClhDocumentConsumer(LuceneHits & clHits) |
|
125 : pageSize_(IdxDbMgr::instance()->getClHitsPageSize()), |
|
126 beginIndex_(UNUSED), |
|
127 endIndex_(UNUSED), |
|
128 clHits_(clHits) |
|
129 { |
|
130 failedDocErrors_.reserve(pageSize_); |
|
131 for (int i = 0; i < pageSize_; ++i) |
|
132 { |
|
133 failedDocErrors_.push_back(CL_ERR_UNKNOWN); |
|
134 } |
|
135 } |
|
136 |
|
137 |
|
138 void ClhDocumentConsumer::setPageBoundaryForIndex(int32_t index) |
|
139 { |
|
140 if (index >= clHits_.length()) |
|
141 { |
|
142 THROW_CPIXEXC("Not a valid hit doc: %d (hits length is %d)", |
|
143 index, |
|
144 clHits_.length()); |
|
145 } |
|
146 |
|
147 beginIndex_ = index - (index % pageSize_); |
|
148 endIndex_ = beginIndex_ + pageSize_; |
|
149 |
|
150 endIndex_ = std::min(clHits_.length(), |
|
151 endIndex_); |
|
152 |
|
153 for (int i = 0; i < pageSize_; ++i) |
|
154 { |
|
155 failedDocErrors_[i] = CL_ERR_UNKNOWN; |
|
156 } |
|
157 } |
|
158 |
|
159 |
|
160 void ClhDocumentConsumer::throwIfFailedDocIndex(int32_t index) |
|
161 { |
|
162 if (beginIndex_ == UNUSED) |
|
163 { |
|
164 // nothing has been fetched with this instance, so |
|
165 // nothing has failed yet |
|
166 return; |
|
167 } |
|
168 |
|
169 if (index < beginIndex_ || index >= endIndex_) |
|
170 { |
|
171 // we have not tried to load this document in this |
|
172 // batch |
|
173 return; |
|
174 } |
|
175 |
|
176 int32_t |
|
177 idx = index - beginIndex_; |
|
178 |
|
179 if (failedDocErrors_[idx] != CL_ERR_Success) |
|
180 { |
|
181 THROW_CPIXEXC("Failed to fetch hit document %d, CL error code: %d (cf. clucene debug/error.h)", |
|
182 index, |
|
183 failedDocErrors_[idx]); |
|
184 } |
|
185 else |
|
186 { |
|
187 THROW_CPIXEXC(PL_ERROR "Reloading a document should not be necessary %d", |
|
188 index); |
|
189 } |
|
190 } |
|
191 |
|
192 |
|
193 |
|
194 |
|
195 |
|
196 /** |
|
197 * Class LuceneHits wrapping lucene::document::Hits |
|
198 */ |
|
199 LuceneHits::LuceneHits(lucene::search::Hits * hits, |
|
200 lucene::search::Query * query, |
|
201 IdxDbHndl idxDb, |
|
202 Version idxDbVersion) |
|
203 : HitsBase(true), // docs in here are owned by clucene |
|
204 hits_(hits), |
|
205 length_(hits != NULL ? hits->length() : 0), |
|
206 query_(query), // increase reference count |
|
207 idxDb_(idxDb), |
|
208 idxDbVersion_(idxDbVersion), |
|
209 docConsumer_(NULL) |
|
210 { |
|
211 docConsumer_ = new ClhDocumentConsumer(*this); |
|
212 |
|
213 IdxDbMgr::instance()->incRefHndl(idxDb_); |
|
214 |
|
215 // pre-fetching the first page - assumption is that first |
|
216 // document to get will be #0 anyway, and no need for IPC + |
|
217 // ctxt switch with client by which time an idx operation may |
|
218 // have upset everything. |
|
219 // |
|
220 // TODO the dependent idxdb locks are released by this point, |
|
221 // so getDocument_ will have to re-acquire all locks, which is |
|
222 // not only OS operation but may bump into already modified |
|
223 // idx-es. Idea: IIdxDb::search could be reorganized into |
|
224 // fetchRecommitting() instead - the first fetch operations |
|
225 // would be performed while locks are still in place... |
|
226 if (length_ > 0) |
|
227 { |
|
228 getDocument_(0); |
|
229 } |
|
230 } |
|
231 |
|
232 LuceneHits::~LuceneHits() |
|
233 { |
|
234 delete hits_; |
|
235 hits_ = 0; |
|
236 |
|
237 destroyWrappers(); |
|
238 |
|
239 delete docConsumer_; |
|
240 |
|
241 IdxDbMgr::instance()->releaseHndl(idxDb_); |
|
242 } |
|
243 |
|
244 |
|
245 void LuceneHits::getDocument_(int32_t index) |
|
246 { |
|
247 // check if we have tried to fetch it before and failed |
|
248 docConsumer_->throwIfFailedDocIndex(index); |
|
249 |
|
250 // common usage pattern is to enumerate hits, never to access |
|
251 // them at true random manner. That means that we never need |
|
252 // wrappers for more than the current page - ie when about to |
|
253 // fetch the next page we can discard all other wrapped |
|
254 // documents. Merely an memory consumption optimization |
|
255 // measure, can be commented out. |
|
256 destroyWrappers(); |
|
257 |
|
258 // if we get here, then it means we don't have the page of hit |
|
259 // docs we need, but it may still throw if the hits is an |
|
260 // empty object or the hits itself was NULL |
|
261 docConsumer_->setPageBoundaryForIndex(index); |
|
262 |
|
263 IIdxDb* idx = IIdxDb::getPtr( idxDb_ ); |
|
264 |
|
265 lucene::search::Hits |
|
266 * currentHits = hits_; |
|
267 |
|
268 hits_ = 0; |
|
269 |
|
270 // the version number of idx that produced currentHits is |
|
271 // either still the same in which case we get back |
|
272 // currentHits, or it has changed and in that case currentHits |
|
273 // will be destroyed by this call and a new one will be |
|
274 // returned - rv will recieve the pointer to the fetched document |
|
275 hits_ = idx->fetchRecommitting(currentHits, |
|
276 &idxDbVersion_, |
|
277 query_, |
|
278 *docConsumer_); |
|
279 |
|
280 length_ = hits_->length(); |
|
281 |
|
282 if (wrappers_.find(index) == wrappers_.end()) |
|
283 { |
|
284 THROW_CPIXEXC("Failed to fetch doc %d: TODO reason", |
|
285 index); |
|
286 } |
|
287 } |
|
288 |
|
289 |
|
290 int32_t LuceneHits::length() |
|
291 { |
|
292 return length_; |
|
293 } |
|
294 |
|
295 |
|
296 /** |
|
297 * HitDocumentList (for suggestion feature) |
|
298 */ |
|
299 HitDocumentList::HitDocumentList() |
|
300 : HitsBase(false) // docs owned by this |
|
301 { |
|
302 ; |
|
303 } |
|
304 |
|
305 HitDocumentList::~HitDocumentList() |
|
306 { |
|
307 destroyWrappers(); |
|
308 for (std::vector<lucene::document::Document*>::iterator i = documents_.begin(); |
|
309 i != documents_.end(); |
|
310 i++) |
|
311 { |
|
312 _CLDELETE(*i); |
|
313 } |
|
314 } |
|
315 |
|
316 void HitDocumentList::getDocument_(int32_t index) |
|
317 { |
|
318 if (index >= 0 && index < documents_.size()) |
|
319 { |
|
320 wrapDocument(index, |
|
321 documents_[index]); |
|
322 } |
|
323 else |
|
324 { |
|
325 THROW_CPIXEXC(L"Hit document index '%d' out of bounds", |
|
326 index); |
|
327 } |
|
328 } |
|
329 |
|
330 int32_t HitDocumentList::length() |
|
331 { |
|
332 return documents_.size(); |
|
333 } |
|
334 |
|
335 |
|
336 void HitDocumentList::add(lucene::document::Document* document) |
|
337 { |
|
338 documents_.push_back(document); |
|
339 } |
|
340 |
|
341 |
|
342 void HitDocumentList::remove(int index) |
|
343 { |
|
344 documents_.erase(documents_.begin()+index); |
|
345 } |
|
346 |
|
347 |
|
348 |
|
349 |
|
350 |
|
351 } |