105 emit searchingStarted(); |
108 emit searchingStarted(); |
106 |
109 |
107 #if !defined(QT_NO_EXCEPTIONS) |
110 #if !defined(QT_NO_EXCEPTIONS) |
108 try { |
111 try { |
109 #endif |
112 #endif |
110 QCLuceneBooleanQuery booleanQuery; |
113 QCLuceneBooleanQuery booleanQueryTitle; |
|
114 QCLuceneBooleanQuery booleanQueryContent; |
111 QCLuceneStandardAnalyzer analyzer; |
115 QCLuceneStandardAnalyzer analyzer; |
112 if (!buildQuery(booleanQuery, queryList, analyzer)) { |
116 const QStringList& attribList = |
|
117 engine.filterAttributes(engine.currentFilter()); |
|
118 bool titleQueryIsValid = buildQuery(queryList, TitleTokenizedField, |
|
119 attribList, booleanQueryTitle, analyzer); |
|
120 bool contentQueryIsValid = buildQuery(queryList, ContentField, |
|
121 attribList, booleanQueryContent, analyzer); |
|
122 if (!titleQueryIsValid && !contentQueryIsValid) { |
113 emit searchingFinished(0); |
123 emit searchingFinished(0); |
114 return; |
124 return; |
115 } |
125 } |
116 |
126 |
117 const QStringList attribList = engine.filterAttributes(engine.currentFilter()); |
127 QCLuceneIndexSearcher indexSearcher(indexPath); |
118 if (!attribList.isEmpty()) { |
128 |
119 QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+") |
129 // QCLuceneHits object must be allocated on the heap, because |
120 + attribList.join(QLatin1String(" +")), QLatin1String("attribute"), analyzer); |
130 // there is no default constructor. |
121 |
131 QSharedPointer<QCLuceneHits> titleHits; |
122 if (!query) { |
132 QSharedPointer<QCLuceneHits> contentHits; |
|
133 if (titleQueryIsValid) { |
|
134 titleHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits( |
|
135 indexSearcher.search(booleanQueryTitle))); |
|
136 } |
|
137 if (contentQueryIsValid) { |
|
138 contentHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits( |
|
139 indexSearcher.search(booleanQueryContent))); |
|
140 } |
|
141 bool boost = true; |
|
142 if ((titleHits.isNull() || titleHits->length() == 0) |
|
143 && (contentHits.isNull() || contentHits->length() == 0)) { |
|
144 booleanQueryTitle = QCLuceneBooleanQuery(); |
|
145 booleanQueryContent = QCLuceneBooleanQuery(); |
|
146 titleQueryIsValid = |
|
147 buildTryHarderQuery(queryList, TitleTokenizedField, |
|
148 attribList, booleanQueryTitle, analyzer); |
|
149 contentQueryIsValid = |
|
150 buildTryHarderQuery(queryList, ContentField, attribList, |
|
151 booleanQueryContent, analyzer); |
|
152 if (!titleQueryIsValid && !contentQueryIsValid) { |
123 emit searchingFinished(0); |
153 emit searchingFinished(0); |
124 return; |
154 return; |
125 } |
155 } |
126 booleanQuery.add(query, true, true, false); |
156 if (titleQueryIsValid) { |
127 } |
157 titleHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits( |
128 |
158 indexSearcher.search(booleanQueryTitle))); |
129 QCLuceneIndexSearcher indexSearcher(indexPath); |
|
130 QCLuceneHits hits = indexSearcher.search(booleanQuery); |
|
131 |
|
132 bool boost = true; |
|
133 QCLuceneBooleanQuery tryHarderQuery; |
|
134 if (hits.length() == 0) { |
|
135 if (buildTryHarderQuery(tryHarderQuery, queryList, analyzer)) { |
|
136 if (!attribList.isEmpty()) { |
|
137 QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+") |
|
138 + attribList.join(QLatin1String(" +")), QLatin1String("attribute"), |
|
139 analyzer); |
|
140 tryHarderQuery.add(query, true, true, false); |
|
141 } |
|
142 hits = indexSearcher.search(tryHarderQuery); |
|
143 boost = (hits.length() == 0); |
|
144 } |
159 } |
145 } |
160 if (contentQueryIsValid) { |
|
161 contentHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits( |
|
162 indexSearcher.search(booleanQueryContent))); |
|
163 } |
|
164 boost = false; |
|
165 } |
|
166 QList<QSharedPointer<QCLuceneHits> > cluceneHitsList; |
|
167 if (!titleHits.isNull()) |
|
168 cluceneHitsList.append(titleHits); |
|
169 if (!contentHits.isNull()) |
|
170 cluceneHitsList.append(contentHits); |
146 |
171 |
147 QSet<QString> pathSet; |
172 QSet<QString> pathSet; |
148 QCLuceneDocument document; |
173 QCLuceneDocument document; |
149 const QStringList namespaceList = engine.registeredDocumentations(); |
174 const QStringList namespaceList = engine.registeredDocumentations(); |
150 |
175 |
151 for (qint32 i = 0; i < hits.length(); i++) { |
176 foreach (QSharedPointer<QCLuceneHits> hits, cluceneHitsList) { |
152 document = hits.document(i); |
177 for (qint32 i = 0; i < hits->length(); i++) { |
153 const QString path = document.get(QLatin1String("path")); |
178 document = hits->document(i); |
154 if (!pathSet.contains(path) && namespaceList.contains( |
179 const QString path = document.get(PathField); |
155 document.get(QLatin1String("namespace")), Qt::CaseInsensitive)) { |
180 if (!pathSet.contains(path) && namespaceList.contains( |
156 pathSet.insert(path); |
181 document.get(NamespaceField), Qt::CaseInsensitive)) { |
157 hitList.append(qMakePair(path, document.get(QLatin1String("title")))); |
182 pathSet.insert(path); |
|
183 hitList.append(qMakePair(path, document.get(TitleField))); |
|
184 } |
|
185 document.clear(); |
|
186 |
|
187 mutex.lock(); |
|
188 if (m_cancel) { |
|
189 mutex.unlock(); |
|
190 emit searchingFinished(0); |
|
191 return; |
|
192 } |
|
193 mutex.unlock(); |
158 } |
194 } |
159 document.clear(); |
|
160 |
|
161 mutex.lock(); |
|
162 if (m_cancel) { |
|
163 mutex.unlock(); |
|
164 emit searchingFinished(0); |
|
165 return; |
|
166 } |
|
167 mutex.unlock(); |
|
168 } |
195 } |
169 |
196 |
170 indexSearcher.close(); |
197 indexSearcher.close(); |
171 const int count = hitList.count(); |
198 const int count = hitList.count(); |
172 if ((count > 0) && boost) |
199 if ((count > 0) && boost) |
182 } |
209 } |
183 #endif |
210 #endif |
184 } |
211 } |
185 } |
212 } |
186 |
213 |
187 bool QHelpSearchIndexReaderClucene::defaultQuery(const QString &term, QCLuceneBooleanQuery &booleanQuery, |
214 bool QHelpSearchIndexReaderClucene::buildQuery( |
188 QCLuceneStandardAnalyzer &analyzer) |
215 const QList<QHelpSearchQuery> &queries, const QString &fieldName, |
189 { |
216 const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery, |
190 const QLatin1String c("content"); |
217 QCLuceneAnalyzer &analyzer) |
191 const QLatin1String t("titleTokenized"); |
218 { |
192 |
219 bool queryIsValid = false; |
193 QCLuceneQuery *query = QCLuceneQueryParser::parse(term, c, analyzer); |
220 foreach (const QHelpSearchQuery &query, queries) { |
194 QCLuceneQuery *query2 = QCLuceneQueryParser::parse(term, t, analyzer); |
221 if (fieldName != ContentField && isNegativeQuery(query)) { |
195 if (query && query2) { |
222 queryIsValid = false; |
196 booleanQuery.add(query, true, false, false); |
223 break; |
197 booleanQuery.add(query2, true, false, false); |
224 } |
|
225 switch (query.fieldName) { |
|
226 case QHelpSearchQuery::FUZZY: |
|
227 if (addFuzzyQuery(query, fieldName, booleanQuery, analyzer)) |
|
228 queryIsValid = true; |
|
229 break; |
|
230 case QHelpSearchQuery::WITHOUT: |
|
231 if (fieldName != ContentField) |
|
232 return false; |
|
233 if (addWithoutQuery(query, fieldName, booleanQuery)) |
|
234 queryIsValid = true; |
|
235 break; |
|
236 case QHelpSearchQuery::PHRASE: |
|
237 if (addPhraseQuery(query, fieldName, booleanQuery)) |
|
238 queryIsValid = true; |
|
239 break; |
|
240 case QHelpSearchQuery::ALL: |
|
241 if (addAllQuery(query, fieldName, booleanQuery)) |
|
242 queryIsValid = true; |
|
243 break; |
|
244 case QHelpSearchQuery::DEFAULT: |
|
245 if (addDefaultQuery(query, fieldName, true, booleanQuery, analyzer)) |
|
246 queryIsValid = true; |
|
247 break; |
|
248 case QHelpSearchQuery::ATLEAST: |
|
249 if (addAtLeastQuery(query, fieldName, booleanQuery, analyzer)) |
|
250 queryIsValid = true; |
|
251 break; |
|
252 default: |
|
253 Q_ASSERT(!"Invalid field name"); |
|
254 } |
|
255 } |
|
256 |
|
257 if (queryIsValid && !filterAttributes.isEmpty()) { |
|
258 queryIsValid = |
|
259 addAttributesQuery(filterAttributes, booleanQuery, analyzer); |
|
260 } |
|
261 |
|
262 return queryIsValid; |
|
263 } |
|
264 |
|
265 bool QHelpSearchIndexReaderClucene::buildTryHarderQuery( |
|
266 const QList<QHelpSearchQuery> &queries, const QString &fieldName, |
|
267 const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery, |
|
268 QCLuceneAnalyzer &analyzer) |
|
269 { |
|
270 if (queries.isEmpty()) |
|
271 return false; |
|
272 const QHelpSearchQuery &query = queries.front(); |
|
273 if (query.fieldName != QHelpSearchQuery::DEFAULT) |
|
274 return false; |
|
275 if (isNegativeQuery(query)) |
|
276 return false; |
|
277 if (!addDefaultQuery(query, fieldName, false, booleanQuery, analyzer)) |
|
278 return false; |
|
279 if (filterAttributes.isEmpty()) |
198 return true; |
280 return true; |
199 } |
281 return addAttributesQuery(filterAttributes, booleanQuery, analyzer); |
200 |
282 } |
201 return false; |
283 |
202 } |
284 bool QHelpSearchIndexReaderClucene::isNegativeQuery(const QHelpSearchQuery &query) const |
203 |
285 { |
204 bool QHelpSearchIndexReaderClucene::buildQuery(QCLuceneBooleanQuery &booleanQuery, |
286 const QString &search = query.wordList.join(" "); |
205 const QList<QHelpSearchQuery> &queryList, QCLuceneStandardAnalyzer &analyzer) |
287 return search.contains('!') || search.contains('-') |
206 { |
288 || search.contains(QLatin1String(" NOT ")); |
207 foreach (const QHelpSearchQuery query, queryList) { |
289 } |
208 switch (query.fieldName) { |
290 |
209 case QHelpSearchQuery::FUZZY: { |
291 bool QHelpSearchIndexReaderClucene::addFuzzyQuery(const QHelpSearchQuery &query, |
210 const QLatin1String fuzzy("~"); |
292 const QString &fieldName, QCLuceneBooleanQuery &booleanQuery, |
211 foreach (const QString &term, query.wordList) { |
293 QCLuceneAnalyzer &analyzer) |
212 if (term.isEmpty() |
294 { |
213 || !defaultQuery(term.toLower() + fuzzy, booleanQuery, analyzer)) { |
295 bool queryIsValid = false; |
214 return false; |
296 const QLatin1String fuzzy("~"); |
215 } |
297 foreach (const QString &term, query.wordList) { |
216 } |
298 if (!term.isEmpty()) { |
217 } break; |
299 QCLuceneQuery *lQuery = |
218 |
300 QCLuceneQueryParser::parse(term + fuzzy, fieldName, analyzer); |
219 case QHelpSearchQuery::WITHOUT: { |
301 if (lQuery != 0) { |
220 QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); |
302 booleanQuery.add(lQuery, true, false, false); |
221 foreach (const QString &term, query.wordList) { |
303 queryIsValid = true; |
222 if (stopWords.contains(term, Qt::CaseInsensitive)) |
304 } |
223 continue; |
305 } |
224 |
306 } |
225 QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( |
307 return queryIsValid; |
226 QLatin1String("content"), term.toLower())); |
308 } |
227 QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm( |
309 |
228 QLatin1String("titleTokenized"), term.toLower())); |
310 bool QHelpSearchIndexReaderClucene::addWithoutQuery(const QHelpSearchQuery &query, |
229 |
311 const QString &fieldName, QCLuceneBooleanQuery &booleanQuery) |
230 if (query && query2) { |
312 { |
231 booleanQuery.add(query, true, false, true); |
313 bool queryIsValid = false; |
232 booleanQuery.add(query2, true, false, true); |
314 const QStringList &stopWords = QCLuceneStopAnalyzer().englishStopWords(); |
233 } else { |
315 foreach (const QString &term, query.wordList) { |
234 return false; |
316 if (stopWords.contains(term, Qt::CaseInsensitive)) |
235 } |
317 continue; |
236 } |
318 QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm( |
237 } break; |
319 fieldName, term.toLower())); |
238 |
320 booleanQuery.add(lQuery, true, false, true); |
239 case QHelpSearchQuery::PHRASE: { |
321 queryIsValid = true; |
240 const QString &term = query.wordList.at(0).toLower(); |
322 } |
241 if (term.contains(QLatin1Char(' '))) { |
323 return queryIsValid; |
242 QStringList termList = term.split(QLatin1String(" ")); |
324 } |
243 QCLucenePhraseQuery *q = new QCLucenePhraseQuery(); |
325 |
244 QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); |
326 bool QHelpSearchIndexReaderClucene::addPhraseQuery(const QHelpSearchQuery &query, |
245 foreach (const QString &term, termList) { |
327 const QString &fieldName, QCLuceneBooleanQuery &booleanQuery) |
246 if (!stopWords.contains(term, Qt::CaseInsensitive)) |
328 { |
247 q->addTerm(QCLuceneTerm(QLatin1String("content"), term.toLower())); |
329 bool queryIsValid = false; |
248 } |
330 const QString &term = query.wordList.at(0).toLower(); |
249 booleanQuery.add(q, true, true, false); |
331 if (term.contains(QLatin1Char(' '))) { |
250 } else { |
332 const QStringList termList = term.split(QLatin1String(" ")); |
251 QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( |
333 QCLucenePhraseQuery *q = new QCLucenePhraseQuery(); |
252 QLatin1String("content"), term.toLower())); |
334 const QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); |
253 QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm( |
335 foreach (const QString &term, termList) { |
254 QLatin1String("titleTokenized"), term.toLower())); |
336 if (!stopWords.contains(term, Qt::CaseInsensitive)) |
255 |
337 q->addTerm(QCLuceneTerm(fieldName, term.toLower())); |
256 if (query && query2) { |
338 } |
257 booleanQuery.add(query, true, true, false); |
339 if (!q->getTerms().isEmpty()) { |
258 booleanQuery.add(query2, true, false, false); |
340 booleanQuery.add(q, true, true, false); |
259 } else { |
341 queryIsValid = true; |
260 return false; |
342 } |
261 } |
343 } else { |
262 } |
344 QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm( |
263 } break; |
345 fieldName, term.toLower())); |
264 |
346 booleanQuery.add(lQuery, true, true, false); |
265 case QHelpSearchQuery::ALL: { |
347 queryIsValid = true; |
266 QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords(); |
348 } |
267 foreach (const QString &term, query.wordList) { |
349 return queryIsValid; |
268 if (stopWords.contains(term, Qt::CaseInsensitive)) |
350 } |
269 continue; |
351 |
270 |
352 bool QHelpSearchIndexReaderClucene::addAllQuery(const QHelpSearchQuery &query, |
271 QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm( |
353 const QString &fieldName, QCLuceneBooleanQuery &booleanQuery) |
272 QLatin1String("content"), term.toLower())); |
354 { |
273 |
355 bool queryIsValid = false; |
274 if (query) { |
356 const QStringList &stopWords = QCLuceneStopAnalyzer().englishStopWords(); |
275 booleanQuery.add(query, true, true, false); |
357 foreach (const QString &term, query.wordList) { |
276 } else { |
358 if (stopWords.contains(term, Qt::CaseInsensitive)) |
277 return false; |
359 continue; |
278 } |
360 QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm( |
279 } |
361 fieldName, term.toLower())); |
280 } break; |
362 booleanQuery.add(lQuery, true, true, false); |
281 |
363 queryIsValid = true; |
282 case QHelpSearchQuery::DEFAULT: { |
364 } |
283 foreach (const QString &term, query.wordList) { |
365 return queryIsValid; |
284 QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(), |
366 } |
285 QLatin1String("content"), analyzer); |
367 |
286 |
368 bool QHelpSearchIndexReaderClucene::addDefaultQuery(const QHelpSearchQuery &query, |
287 if (query) |
369 const QString &fieldName, bool allTermsRequired, |
288 booleanQuery.add(query, true, true, false); |
370 QCLuceneBooleanQuery &booleanQuery, |
289 } |
371 QCLuceneAnalyzer &analyzer) |
290 } break; |
372 { |
291 |
373 bool queryIsValid = false; |
292 case QHelpSearchQuery::ATLEAST: { |
374 foreach (const QString &term, query.wordList) { |
293 foreach (const QString &term, query.wordList) { |
375 QCLuceneQuery *lQuery = |
294 if (term.isEmpty() || !defaultQuery(term.toLower(), booleanQuery, analyzer)) |
376 QCLuceneQueryParser::parse(term.toLower(), fieldName, analyzer); |
295 return false; |
377 if (lQuery) { |
296 } |
378 booleanQuery.add(lQuery, true, allTermsRequired, false); |
297 } |
379 queryIsValid = true; |
298 } |
380 } |
299 } |
381 } |
300 |
382 return queryIsValid; |
|
383 } |
|
384 |
|
385 bool QHelpSearchIndexReaderClucene::addAtLeastQuery( |
|
386 const QHelpSearchQuery &query, const QString &fieldName, |
|
387 QCLuceneBooleanQuery &booleanQuery, QCLuceneAnalyzer &analyzer) |
|
388 { |
|
389 bool queryIsValid = false; |
|
390 foreach (const QString &term, query.wordList) { |
|
391 if (!term.isEmpty()) { |
|
392 QCLuceneQuery *lQuery = |
|
393 QCLuceneQueryParser::parse(term, fieldName, analyzer); |
|
394 if (lQuery) { |
|
395 booleanQuery.add(lQuery, true, false, false); |
|
396 queryIsValid = true; |
|
397 } |
|
398 } |
|
399 } |
|
400 return queryIsValid; |
|
401 } |
|
402 |
|
403 bool QHelpSearchIndexReaderClucene::addAttributesQuery( |
|
404 const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery, |
|
405 QCLuceneAnalyzer &analyzer) |
|
406 { |
|
407 QCLuceneQuery* lQuery = QCLuceneQueryParser::parse(QLatin1String("+") |
|
408 + filterAttributes.join(QLatin1String(" +")), AttributeField, analyzer); |
|
409 if (!lQuery) |
|
410 return false; |
|
411 booleanQuery.add(lQuery, true, true, false); |
301 return true; |
412 return true; |
302 } |
|
303 |
|
304 bool QHelpSearchIndexReaderClucene::buildTryHarderQuery(QCLuceneBooleanQuery &booleanQuery, |
|
305 const QList<QHelpSearchQuery> &queryList, QCLuceneStandardAnalyzer &analyzer) |
|
306 { |
|
307 bool retVal = false; |
|
308 foreach (const QHelpSearchQuery query, queryList) { |
|
309 switch (query.fieldName) { |
|
310 default: break; |
|
311 case QHelpSearchQuery::DEFAULT: { |
|
312 foreach (const QString &term, query.wordList) { |
|
313 QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(), |
|
314 QLatin1String("content"), analyzer); |
|
315 |
|
316 if (query) { |
|
317 retVal = true; |
|
318 booleanQuery.add(query, true, false, false); |
|
319 } |
|
320 } |
|
321 } break; |
|
322 } |
|
323 } |
|
324 return retVal; |
|
325 } |
413 } |
326 |
414 |
327 void QHelpSearchIndexReaderClucene::boostSearchHits(const QHelpEngineCore &engine, |
415 void QHelpSearchIndexReaderClucene::boostSearchHits(const QHelpEngineCore &engine, |
328 QList<QHelpSearchEngine::SearchHit> &hitList, const QList<QHelpSearchQuery> &queryList) |
416 QList<QHelpSearchEngine::SearchHit> &hitList, const QList<QHelpSearchQuery> &queryList) |
329 { |
417 { |