--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp Mon Jan 11 14:00:40 2010 +0000
@@ -0,0 +1,394 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** All rights reserved.
+** Contact: Nokia Corporation (qt-info@nokia.com)
+**
+** This file is part of the Qt Assistant of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the Technology Preview License Agreement accompanying
+** this package.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** If you have questions regarding the use of this file, please contact
+** Nokia at qt-info@nokia.com.
+**
+**
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "qhelpenginecore.h"
+#include "fulltextsearch/qsearchable_p.h"
+#include "fulltextsearch/qqueryparser_p.h"
+#include "fulltextsearch/qindexreader_p.h"
+#include "qhelpsearchindexreader_clucene_p.h"
+
+#include <QtCore/QDir>
+#include <QtCore/QSet>
+#include <QtCore/QString>
+#include <QtCore/QFileInfo>
+#include <QtCore/QStringList>
+#include <QtCore/QTextStream>
+#include <QtCore/QMutexLocker>
+
+QT_BEGIN_NAMESPACE
+
+namespace qt {
+ namespace fulltextsearch {
+ namespace clucene {
+
+QHelpSearchIndexReaderClucene::QHelpSearchIndexReaderClucene()
+ : QHelpSearchIndexReader()
+{
+ // nothing todo
+}
+
+QHelpSearchIndexReaderClucene::~QHelpSearchIndexReaderClucene()
+{
+}
+
+
+void QHelpSearchIndexReaderClucene::run()
+{
+ mutex.lock();
+
+ if (m_cancel) {
+ mutex.unlock();
+ return;
+ }
+
+ const QString collectionFile(this->m_collectionFile);
+ const QList<QHelpSearchQuery> &queryList = this->m_query;
+ const QString indexPath(m_indexFilesFolder);
+
+ mutex.unlock();
+
+ QHelpEngineCore engine(collectionFile, 0);
+ if (!engine.setupData())
+ return;
+
+ QFileInfo fInfo(indexPath);
+ if (fInfo.exists() && !fInfo.isWritable()) {
+ qWarning("Full Text Search, could not read index (missing permissions).");
+ return;
+ }
+
+ if(QCLuceneIndexReader::indexExists(indexPath)) {
+ mutex.lock();
+ if (m_cancel) {
+ mutex.unlock();
+ return;
+ }
+ mutex.unlock();
+
+ emit searchingStarted();
+
+#if !defined(QT_NO_EXCEPTIONS)
+ try {
+#endif
+ QCLuceneBooleanQuery booleanQuery;
+ QCLuceneStandardAnalyzer analyzer;
+ if (!buildQuery(booleanQuery, queryList, analyzer)) {
+ emit searchingFinished(0);
+ return;
+ }
+
+ const QStringList attribList = engine.filterAttributes(engine.currentFilter());
+ if (!attribList.isEmpty()) {
+ QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+")
+ + attribList.join(QLatin1String(" +")), QLatin1String("attribute"), analyzer);
+
+ if (!query) {
+ emit searchingFinished(0);
+ return;
+ }
+ booleanQuery.add(query, true, true, false);
+ }
+
+ QCLuceneIndexSearcher indexSearcher(indexPath);
+ QCLuceneHits hits = indexSearcher.search(booleanQuery);
+
+ bool boost = true;
+ QCLuceneBooleanQuery tryHarderQuery;
+ if (hits.length() == 0) {
+ if (buildTryHarderQuery(tryHarderQuery, queryList, analyzer)) {
+ if (!attribList.isEmpty()) {
+ QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+")
+ + attribList.join(QLatin1String(" +")), QLatin1String("attribute"),
+ analyzer);
+ tryHarderQuery.add(query, true, true, false);
+ }
+ hits = indexSearcher.search(tryHarderQuery);
+ boost = (hits.length() == 0);
+ }
+ }
+
+ QSet<QString> pathSet;
+ QCLuceneDocument document;
+ const QStringList namespaceList = engine.registeredDocumentations();
+
+ for (qint32 i = 0; i < hits.length(); i++) {
+ document = hits.document(i);
+ const QString path = document.get(QLatin1String("path"));
+ if (!pathSet.contains(path) && namespaceList.contains(
+ document.get(QLatin1String("namespace")), Qt::CaseInsensitive)) {
+ pathSet.insert(path);
+ hitList.append(qMakePair(path, document.get(QLatin1String("title"))));
+ }
+ document.clear();
+
+ mutex.lock();
+ if (m_cancel) {
+ mutex.unlock();
+ emit searchingFinished(0);
+ return;
+ }
+ mutex.unlock();
+ }
+
+ indexSearcher.close();
+ const int count = hitList.count();
+ if ((count > 0) && boost)
+ boostSearchHits(engine, hitList, queryList);
+ emit searchingFinished(hitList.count());
+
+#if !defined(QT_NO_EXCEPTIONS)
+ } catch(...) {
+ mutex.lock();
+ hitList.clear();
+ mutex.unlock();
+ emit searchingFinished(0);
+ }
+#endif
+ }
+}
+
+bool QHelpSearchIndexReaderClucene::defaultQuery(const QString &term, QCLuceneBooleanQuery &booleanQuery,
+ QCLuceneStandardAnalyzer &analyzer)
+{
+ const QLatin1String c("content");
+ const QLatin1String t("titleTokenized");
+
+ QCLuceneQuery *query = QCLuceneQueryParser::parse(term, c, analyzer);
+ QCLuceneQuery *query2 = QCLuceneQueryParser::parse(term, t, analyzer);
+ if (query && query2) {
+ booleanQuery.add(query, true, false, false);
+ booleanQuery.add(query2, true, false, false);
+ return true;
+ }
+
+ return false;
+}
+
+bool QHelpSearchIndexReaderClucene::buildQuery(QCLuceneBooleanQuery &booleanQuery,
+ const QList<QHelpSearchQuery> &queryList, QCLuceneStandardAnalyzer &analyzer)
+{
+ foreach (const QHelpSearchQuery query, queryList) {
+ switch (query.fieldName) {
+ case QHelpSearchQuery::FUZZY: {
+ const QLatin1String fuzzy("~");
+ foreach (const QString &term, query.wordList) {
+ if (term.isEmpty()
+ || !defaultQuery(term.toLower() + fuzzy, booleanQuery, analyzer)) {
+ return false;
+ }
+ }
+ } break;
+
+ case QHelpSearchQuery::WITHOUT: {
+ QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords();
+ foreach (const QString &term, query.wordList) {
+ if (stopWords.contains(term, Qt::CaseInsensitive))
+ continue;
+
+ QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm(
+ QLatin1String("content"), term.toLower()));
+ QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm(
+ QLatin1String("titleTokenized"), term.toLower()));
+
+ if (query && query2) {
+ booleanQuery.add(query, true, false, true);
+ booleanQuery.add(query2, true, false, true);
+ } else {
+ return false;
+ }
+ }
+ } break;
+
+ case QHelpSearchQuery::PHRASE: {
+ const QString &term = query.wordList.at(0).toLower();
+ if (term.contains(QLatin1Char(' '))) {
+ QStringList termList = term.split(QLatin1String(" "));
+ QCLucenePhraseQuery *q = new QCLucenePhraseQuery();
+ QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords();
+ foreach (const QString &term, termList) {
+ if (!stopWords.contains(term, Qt::CaseInsensitive))
+ q->addTerm(QCLuceneTerm(QLatin1String("content"), term.toLower()));
+ }
+ booleanQuery.add(q, true, true, false);
+ } else {
+ QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm(
+ QLatin1String("content"), term.toLower()));
+ QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm(
+ QLatin1String("titleTokenized"), term.toLower()));
+
+ if (query && query2) {
+ booleanQuery.add(query, true, true, false);
+ booleanQuery.add(query2, true, false, false);
+ } else {
+ return false;
+ }
+ }
+ } break;
+
+ case QHelpSearchQuery::ALL: {
+ QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords();
+ foreach (const QString &term, query.wordList) {
+ if (stopWords.contains(term, Qt::CaseInsensitive))
+ continue;
+
+ QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm(
+ QLatin1String("content"), term.toLower()));
+
+ if (query) {
+ booleanQuery.add(query, true, true, false);
+ } else {
+ return false;
+ }
+ }
+ } break;
+
+ case QHelpSearchQuery::DEFAULT: {
+ foreach (const QString &term, query.wordList) {
+ QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(),
+ QLatin1String("content"), analyzer);
+
+ if (query)
+ booleanQuery.add(query, true, true, false);
+ }
+ } break;
+
+ case QHelpSearchQuery::ATLEAST: {
+ foreach (const QString &term, query.wordList) {
+ if (term.isEmpty() || !defaultQuery(term.toLower(), booleanQuery, analyzer))
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+bool QHelpSearchIndexReaderClucene::buildTryHarderQuery(QCLuceneBooleanQuery &booleanQuery,
+ const QList<QHelpSearchQuery> &queryList, QCLuceneStandardAnalyzer &analyzer)
+{
+ bool retVal = false;
+ foreach (const QHelpSearchQuery query, queryList) {
+ switch (query.fieldName) {
+ default: break;
+ case QHelpSearchQuery::DEFAULT: {
+ foreach (const QString &term, query.wordList) {
+ QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(),
+ QLatin1String("content"), analyzer);
+
+ if (query) {
+ retVal = true;
+ booleanQuery.add(query, true, false, false);
+ }
+ }
+ } break;
+ }
+ }
+ return retVal;
+}
+
+void QHelpSearchIndexReaderClucene::boostSearchHits(const QHelpEngineCore &engine,
+ QList<QHelpSearchEngine::SearchHit> &hitList, const QList<QHelpSearchQuery> &queryList)
+{
+ foreach (const QHelpSearchQuery query, queryList) {
+ if (query.fieldName != QHelpSearchQuery::DEFAULT)
+ continue;
+
+ QString joinedQuery = query.wordList.join(QLatin1String(" "));
+
+ QCLuceneStandardAnalyzer analyzer;
+ QCLuceneQuery *parsedQuery = QCLuceneQueryParser::parse(
+ joinedQuery, QLatin1String("content"), analyzer);
+
+ if (parsedQuery) {
+ joinedQuery = parsedQuery->toString();
+ delete parsedQuery;
+ }
+
+ int length = QString(QLatin1String("content:")).length();
+ int index = joinedQuery.indexOf(QLatin1String("content:"));
+
+ QString term;
+ int nextIndex = 0;
+ QStringList searchTerms;
+ while (index != -1) {
+ nextIndex = joinedQuery.indexOf(QLatin1String("content:"), index + 1);
+ term = joinedQuery.mid(index + length, nextIndex - (length + index)).simplified();
+ if (term.startsWith(QLatin1String("\""))
+ && term.endsWith(QLatin1String("\""))) {
+ searchTerms.append(term.remove(QLatin1String("\"")));
+ } else {
+ searchTerms += term.split(QLatin1Char(' '));
+ }
+ index = nextIndex;
+ }
+ searchTerms.removeDuplicates();
+
+ int count = qMin(75, hitList.count());
+ QMap<int, QHelpSearchEngine::SearchHit> hitMap;
+ for (int i = 0; i < count; ++i) {
+ const QHelpSearchEngine::SearchHit &hit = hitList.at(i);
+ QString data = QString::fromUtf8(engine.fileData(hit.first));
+
+ int counter = 0;
+ foreach (const QString &term, searchTerms)
+ counter += data.count(term, Qt::CaseInsensitive);
+ hitMap.insertMulti(counter, hit);
+ }
+
+ QList<QHelpSearchEngine::SearchHit> boostedList;
+ QMap<int, QHelpSearchEngine::SearchHit>::const_iterator it = hitMap.constEnd();
+ do {
+ --it;
+ boostedList.append(it.value());
+ } while (it != hitMap.constBegin());
+ boostedList += hitList.mid(count, hitList.count());
+ mutex.lock();
+ hitList = boostedList;
+ mutex.unlock();
+ }
+}
+
+ } // namespace clucene
+ } // namespace fulltextsearch
+} // namespace qt
+
+QT_END_NAMESPACE