tools/assistant/lib/qhelpsearchindexreader_clucene.cpp
changeset 30 5dc02b23752f
parent 18 2f34d5167611
child 37 758a864f9613
--- a/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp	Wed Jun 23 19:07:03 2010 +0300
+++ b/tools/assistant/lib/qhelpsearchindexreader_clucene.cpp	Tue Jul 06 15:10:48 2010 +0300
@@ -39,16 +39,19 @@
 **
 ****************************************************************************/
 
-#include "qhelpenginecore.h"
+#include "fulltextsearch/qindexreader_p.h"
+#include "fulltextsearch/qqueryparser_p.h"
 #include "fulltextsearch/qsearchable_p.h"
-#include "fulltextsearch/qqueryparser_p.h"
-#include "fulltextsearch/qindexreader_p.h"
+#include "qclucenefieldnames_p.h"
+#include "qhelpenginecore.h"
+
 #include "qhelpsearchindexreader_clucene_p.h"
 
 #include <QtCore/QDir>
 #include <QtCore/QSet>
 #include <QtCore/QString>
 #include <QtCore/QFileInfo>
+#include <QtCore/QSharedPointer>
 #include <QtCore/QStringList>
 #include <QtCore/QTextStream>
 #include <QtCore/QMutexLocker>
@@ -107,64 +110,88 @@
 #if !defined(QT_NO_EXCEPTIONS)
         try {
 #endif
-            QCLuceneBooleanQuery booleanQuery;
+            QCLuceneBooleanQuery booleanQueryTitle;
+            QCLuceneBooleanQuery booleanQueryContent;
             QCLuceneStandardAnalyzer analyzer;
-            if (!buildQuery(booleanQuery, queryList, analyzer)) {
+            const QStringList& attribList =
+                engine.filterAttributes(engine.currentFilter());
+            bool titleQueryIsValid = buildQuery(queryList, TitleTokenizedField,
+                                       attribList, booleanQueryTitle, analyzer);
+            bool contentQueryIsValid = buildQuery(queryList, ContentField,
+                                     attribList, booleanQueryContent, analyzer);
+            if (!titleQueryIsValid && !contentQueryIsValid) {
                 emit searchingFinished(0);
                 return;
             }
 
-            const QStringList attribList = engine.filterAttributes(engine.currentFilter());
-            if (!attribList.isEmpty()) {
-                QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+")
-                    + attribList.join(QLatin1String(" +")), QLatin1String("attribute"), analyzer);
+            QCLuceneIndexSearcher indexSearcher(indexPath);
 
-                if (!query) {
+            // QCLuceneHits object must be allocated on the heap, because
+            // there is no default constructor.
+            QSharedPointer<QCLuceneHits> titleHits;
+            QSharedPointer<QCLuceneHits> contentHits;
+            if (titleQueryIsValid) {
+                titleHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits(
+                    indexSearcher.search(booleanQueryTitle)));
+            }
+            if (contentQueryIsValid) {
+                contentHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits(
+                    indexSearcher.search(booleanQueryContent)));
+            }
+            bool boost = true;
+            if ((titleHits.isNull() || titleHits->length() == 0)
+                && (contentHits.isNull() || contentHits->length() == 0)) {
+                booleanQueryTitle = QCLuceneBooleanQuery();
+                booleanQueryContent = QCLuceneBooleanQuery();
+                titleQueryIsValid =
+                    buildTryHarderQuery(queryList, TitleTokenizedField,
+                                        attribList, booleanQueryTitle, analyzer);
+                contentQueryIsValid =
+                    buildTryHarderQuery(queryList, ContentField, attribList,
+                                        booleanQueryContent, analyzer);
+                if (!titleQueryIsValid && !contentQueryIsValid) {
                     emit searchingFinished(0);
                     return;
                 }
-                booleanQuery.add(query, true, true, false);
+                if (titleQueryIsValid) {
+                    titleHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits(
+                        indexSearcher.search(booleanQueryTitle)));
+                }
+                if (contentQueryIsValid) {
+                    contentHits = QSharedPointer<QCLuceneHits>(new QCLuceneHits(
+                        indexSearcher.search(booleanQueryContent)));
+                }
+                boost = false;
             }
-
-            QCLuceneIndexSearcher indexSearcher(indexPath);
-            QCLuceneHits hits = indexSearcher.search(booleanQuery);
-
-            bool boost = true;
-            QCLuceneBooleanQuery tryHarderQuery;
-            if (hits.length() == 0) {
-                if (buildTryHarderQuery(tryHarderQuery, queryList, analyzer)) {
-                    if (!attribList.isEmpty()) {
-                        QCLuceneQuery* query = QCLuceneQueryParser::parse(QLatin1String("+")
-                            + attribList.join(QLatin1String(" +")), QLatin1String("attribute"),
-                            analyzer);
-                        tryHarderQuery.add(query, true, true, false);
-                    }
-                    hits = indexSearcher.search(tryHarderQuery);
-                    boost = (hits.length() == 0);
-                }
-            }
+            QList<QSharedPointer<QCLuceneHits> > cluceneHitsList;
+            if (!titleHits.isNull())
+                cluceneHitsList.append(titleHits);
+            if (!contentHits.isNull())
+                cluceneHitsList.append(contentHits);
 
             QSet<QString> pathSet;
             QCLuceneDocument document;
             const QStringList namespaceList = engine.registeredDocumentations();
 
-            for (qint32 i = 0; i < hits.length(); i++) {
-                document = hits.document(i);
-                const QString path = document.get(QLatin1String("path"));
-                if (!pathSet.contains(path) && namespaceList.contains(
-                    document.get(QLatin1String("namespace")), Qt::CaseInsensitive)) {
-                    pathSet.insert(path);
-                    hitList.append(qMakePair(path, document.get(QLatin1String("title"))));
+            foreach (QSharedPointer<QCLuceneHits> hits, cluceneHitsList) {
+                for (qint32 i = 0; i < hits->length(); i++) {
+                    document = hits->document(i);
+                    const QString path = document.get(PathField);
+                    if (!pathSet.contains(path) && namespaceList.contains(
+                            document.get(NamespaceField), Qt::CaseInsensitive)) {
+                        pathSet.insert(path);
+                        hitList.append(qMakePair(path, document.get(TitleField)));
+                    }
+                    document.clear();
+
+                    mutex.lock();
+                    if (m_cancel) {
+                        mutex.unlock();
+                        emit searchingFinished(0);
+                        return;
+                    }
+                    mutex.unlock();
                 }
-                document.clear();
-
-                mutex.lock();
-                if (m_cancel) {
-                    mutex.unlock();
-                    emit searchingFinished(0);
-                    return;
-                }
-                mutex.unlock();
             }
 
             indexSearcher.close();
@@ -184,144 +211,205 @@
     }
 }
 
-bool QHelpSearchIndexReaderClucene::defaultQuery(const QString &term, QCLuceneBooleanQuery &booleanQuery,
-    QCLuceneStandardAnalyzer &analyzer)
+bool QHelpSearchIndexReaderClucene::buildQuery(
+    const QList<QHelpSearchQuery> &queries, const QString &fieldName,
+    const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery,
+    QCLuceneAnalyzer &analyzer)
 {
-    const QLatin1String c("content");
-    const QLatin1String t("titleTokenized");
-
-    QCLuceneQuery *query = QCLuceneQueryParser::parse(term, c, analyzer);
-    QCLuceneQuery *query2 = QCLuceneQueryParser::parse(term, t, analyzer);
-    if (query && query2) {
-        booleanQuery.add(query, true, false, false);
-        booleanQuery.add(query2, true, false, false);
-        return true;
+    bool queryIsValid = false;
+    foreach (const QHelpSearchQuery &query, queries) {
+        if (fieldName != ContentField && isNegativeQuery(query)) {
+            queryIsValid = false;
+            break;
+        }
+        switch (query.fieldName) {
+            case QHelpSearchQuery::FUZZY:
+                if (addFuzzyQuery(query, fieldName, booleanQuery, analyzer))
+                    queryIsValid = true;
+                break;
+            case QHelpSearchQuery::WITHOUT:
+                if (fieldName != ContentField)
+                    return false;
+                if (addWithoutQuery(query, fieldName, booleanQuery))
+                    queryIsValid = true;
+                break;
+            case QHelpSearchQuery::PHRASE:
+               if (addPhraseQuery(query, fieldName, booleanQuery))
+                   queryIsValid = true;
+               break;
+            case QHelpSearchQuery::ALL:
+               if (addAllQuery(query, fieldName, booleanQuery))
+                   queryIsValid = true;
+               break;
+            case QHelpSearchQuery::DEFAULT:
+               if (addDefaultQuery(query, fieldName, true, booleanQuery, analyzer))
+                   queryIsValid = true;
+               break;
+            case QHelpSearchQuery::ATLEAST:
+               if (addAtLeastQuery(query, fieldName, booleanQuery, analyzer))
+                   queryIsValid = true;
+               break;
+            default:
+               Q_ASSERT(!"Invalid field name");
+        }
     }
 
-    return false;
+    if (queryIsValid && !filterAttributes.isEmpty()) {
+        queryIsValid =
+            addAttributesQuery(filterAttributes, booleanQuery, analyzer);
+    }
+
+    return queryIsValid;
 }
 
-bool QHelpSearchIndexReaderClucene::buildQuery(QCLuceneBooleanQuery &booleanQuery,
-    const QList<QHelpSearchQuery> &queryList, QCLuceneStandardAnalyzer &analyzer)
+bool QHelpSearchIndexReaderClucene::buildTryHarderQuery(
+    const QList<QHelpSearchQuery> &queries, const QString &fieldName,
+    const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery,
+    QCLuceneAnalyzer &analyzer)
 {
-    foreach (const QHelpSearchQuery query, queryList) {
-        switch (query.fieldName) {
-            case QHelpSearchQuery::FUZZY: {
-                const QLatin1String fuzzy("~");
-                foreach (const QString &term, query.wordList) {
-                    if (term.isEmpty()
-                        || !defaultQuery(term.toLower() + fuzzy, booleanQuery, analyzer)) {
-                        return false;
-                    }
-                }
-            }   break;
-
-            case QHelpSearchQuery::WITHOUT: {
-                QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords();
-                foreach (const QString &term, query.wordList) {
-                    if (stopWords.contains(term, Qt::CaseInsensitive))
-                        continue;
-
-                    QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm(
-                        QLatin1String("content"), term.toLower()));
-                    QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm(
-                        QLatin1String("titleTokenized"), term.toLower()));
-
-                    if (query && query2) {
-                        booleanQuery.add(query, true, false, true);
-                        booleanQuery.add(query2, true, false, true);
-                    } else {
-                        return false;
-                    }
-                }
-            }   break;
+    if (queries.isEmpty())
+        return false;
+    const QHelpSearchQuery &query = queries.front();
+    if (query.fieldName != QHelpSearchQuery::DEFAULT)
+        return false;
+    if (isNegativeQuery(query))
+        return false;
+    if (!addDefaultQuery(query, fieldName, false, booleanQuery, analyzer))
+        return false;
+    if (filterAttributes.isEmpty())
+        return true;
+    return addAttributesQuery(filterAttributes, booleanQuery, analyzer);
+}
 
-            case QHelpSearchQuery::PHRASE: {
-                const QString &term = query.wordList.at(0).toLower();
-                if (term.contains(QLatin1Char(' '))) {
-                    QStringList termList = term.split(QLatin1String(" "));
-                    QCLucenePhraseQuery *q = new QCLucenePhraseQuery();
-                    QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords();
-                    foreach (const QString &term, termList) {
-                        if (!stopWords.contains(term, Qt::CaseInsensitive))
-                            q->addTerm(QCLuceneTerm(QLatin1String("content"), term.toLower()));
-                    }
-                    booleanQuery.add(q, true, true, false);
-                } else {
-                    QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm(
-                        QLatin1String("content"), term.toLower()));
-                    QCLuceneQuery *query2 = new QCLuceneTermQuery(QCLuceneTerm(
-                        QLatin1String("titleTokenized"), term.toLower()));
-
-                    if (query && query2) {
-                        booleanQuery.add(query, true, true, false);
-                        booleanQuery.add(query2, true, false, false);
-                    } else {
-                        return false;
-                    }
-                }
-            }   break;
+bool QHelpSearchIndexReaderClucene::isNegativeQuery(const QHelpSearchQuery &query) const
+{
+    const QString &search = query.wordList.join(" ");
+    return search.contains('!') || search.contains('-')
+            || search.contains(QLatin1String(" NOT "));
+}
 
-            case QHelpSearchQuery::ALL: {
-                QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords();
-                foreach (const QString &term, query.wordList) {
-                    if (stopWords.contains(term, Qt::CaseInsensitive))
-                        continue;
-
-                    QCLuceneQuery *query = new QCLuceneTermQuery(QCLuceneTerm(
-                        QLatin1String("content"), term.toLower()));
-
-                    if (query) {
-                        booleanQuery.add(query, true, true, false);
-                    } else {
-                        return false;
-                    }
-                }
-            }   break;
-
-            case QHelpSearchQuery::DEFAULT: {
-                foreach (const QString &term, query.wordList) {
-                    QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(),
-                        QLatin1String("content"), analyzer);
-
-                    if (query)
-                        booleanQuery.add(query, true, true, false);
-                }
-            }   break;
-
-            case QHelpSearchQuery::ATLEAST: {
-                foreach (const QString &term, query.wordList) {
-                    if (term.isEmpty() || !defaultQuery(term.toLower(), booleanQuery, analyzer))
-                        return false;
-                }
+bool QHelpSearchIndexReaderClucene::addFuzzyQuery(const QHelpSearchQuery &query,
+    const QString &fieldName, QCLuceneBooleanQuery &booleanQuery,
+    QCLuceneAnalyzer &analyzer)
+{
+    bool queryIsValid = false;
+    const QLatin1String fuzzy("~");
+    foreach (const QString &term, query.wordList) {
+        if (!term.isEmpty()) {
+            QCLuceneQuery *lQuery =
+                    QCLuceneQueryParser::parse(term + fuzzy, fieldName, analyzer);
+            if (lQuery != 0) {
+                booleanQuery.add(lQuery, true, false, false);
+                queryIsValid = true;
             }
         }
     }
+    return queryIsValid;
+}
 
-    return true;
+bool QHelpSearchIndexReaderClucene::addWithoutQuery(const QHelpSearchQuery &query,
+    const QString &fieldName, QCLuceneBooleanQuery &booleanQuery)
+{
+    bool queryIsValid = false;
+    const QStringList &stopWords = QCLuceneStopAnalyzer().englishStopWords();
+    foreach (const QString &term, query.wordList) {
+        if (stopWords.contains(term, Qt::CaseInsensitive))
+            continue;
+        QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm(
+                fieldName, term.toLower()));
+        booleanQuery.add(lQuery, true, false, true);
+        queryIsValid = true;
+    }
+    return queryIsValid;
+}
+
+bool QHelpSearchIndexReaderClucene::addPhraseQuery(const QHelpSearchQuery &query,
+    const QString &fieldName, QCLuceneBooleanQuery &booleanQuery)
+{
+    bool queryIsValid = false;
+    const QString &term = query.wordList.at(0).toLower();
+    if (term.contains(QLatin1Char(' '))) {
+        const QStringList termList = term.split(QLatin1String(" "));
+        QCLucenePhraseQuery *q = new QCLucenePhraseQuery();
+        const QStringList stopWords = QCLuceneStopAnalyzer().englishStopWords();
+        foreach (const QString &term, termList) {
+            if (!stopWords.contains(term, Qt::CaseInsensitive))
+                q->addTerm(QCLuceneTerm(fieldName, term.toLower()));
+        }
+        if (!q->getTerms().isEmpty()) {
+            booleanQuery.add(q, true, true, false);
+            queryIsValid = true;
+        }
+    } else {
+        QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm(
+                fieldName, term.toLower()));
+        booleanQuery.add(lQuery, true, true, false);
+        queryIsValid = true;
+    }
+    return queryIsValid;
 }
 
-bool QHelpSearchIndexReaderClucene::buildTryHarderQuery(QCLuceneBooleanQuery &booleanQuery,
-    const QList<QHelpSearchQuery> &queryList, QCLuceneStandardAnalyzer &analyzer)
+bool QHelpSearchIndexReaderClucene::addAllQuery(const QHelpSearchQuery &query,
+    const QString &fieldName, QCLuceneBooleanQuery &booleanQuery)
 {
-    bool retVal = false;
-    foreach (const QHelpSearchQuery query, queryList) {
-        switch (query.fieldName) {
-            default:    break;
-            case QHelpSearchQuery::DEFAULT: {
-                foreach (const QString &term, query.wordList) {
-                    QCLuceneQuery *query = QCLuceneQueryParser::parse(term.toLower(),
-                        QLatin1String("content"), analyzer);
+    bool queryIsValid = false;
+    const QStringList &stopWords = QCLuceneStopAnalyzer().englishStopWords();
+    foreach (const QString &term, query.wordList) {
+        if (stopWords.contains(term, Qt::CaseInsensitive))
+            continue;
+        QCLuceneQuery *lQuery = new QCLuceneTermQuery(QCLuceneTerm(
+                fieldName, term.toLower()));
+        booleanQuery.add(lQuery, true, true, false);
+        queryIsValid = true;
+    }
+    return queryIsValid;
+}
 
-                    if (query) {
-                        retVal = true;
-                        booleanQuery.add(query, true, false, false);
-                    }
-                }
-            }   break;
+bool QHelpSearchIndexReaderClucene::addDefaultQuery(const QHelpSearchQuery &query,
+    const QString &fieldName, bool allTermsRequired,
+    QCLuceneBooleanQuery &booleanQuery,
+    QCLuceneAnalyzer &analyzer)
+{
+    bool queryIsValid = false;
+    foreach (const QString &term, query.wordList) {
+        QCLuceneQuery *lQuery =
+            QCLuceneQueryParser::parse(term.toLower(), fieldName, analyzer);
+        if (lQuery) {
+            booleanQuery.add(lQuery, true, allTermsRequired, false);
+            queryIsValid = true;
         }
     }
-    return retVal;
+    return queryIsValid;
+}
+
+bool QHelpSearchIndexReaderClucene::addAtLeastQuery(
+    const QHelpSearchQuery &query, const QString &fieldName,
+    QCLuceneBooleanQuery &booleanQuery, QCLuceneAnalyzer &analyzer)
+{
+    bool queryIsValid = false;
+    foreach (const QString &term, query.wordList) {
+        if (!term.isEmpty()) {
+            QCLuceneQuery *lQuery =
+                QCLuceneQueryParser::parse(term, fieldName, analyzer);
+            if (lQuery) {
+                booleanQuery.add(lQuery, true, false, false);
+                queryIsValid = true;
+            }
+        }
+    }
+    return queryIsValid;
+}
+
+bool QHelpSearchIndexReaderClucene::addAttributesQuery(
+    const QStringList &filterAttributes, QCLuceneBooleanQuery &booleanQuery,
+    QCLuceneAnalyzer &analyzer)
+{
+    QCLuceneQuery* lQuery = QCLuceneQueryParser::parse(QLatin1String("+")
+        + filterAttributes.join(QLatin1String(" +")), AttributeField, analyzer);
+    if (!lQuery)
+        return false;
+    booleanQuery.add(lQuery, true, true, false);
+    return true;
 }
 
 void QHelpSearchIndexReaderClucene::boostSearchHits(const QHelpEngineCore &engine,
@@ -335,21 +423,22 @@
 
         QCLuceneStandardAnalyzer analyzer;
         QCLuceneQuery *parsedQuery = QCLuceneQueryParser::parse(
-            joinedQuery, QLatin1String("content"), analyzer);
+            joinedQuery, ContentField, analyzer);
 
         if (parsedQuery) {
             joinedQuery = parsedQuery->toString();
             delete parsedQuery;
         }
 
-        int length = QString(QLatin1String("content:")).length();
-        int index = joinedQuery.indexOf(QLatin1String("content:"));
+        const QString contentString(ContentField + QLatin1String(":"));
+        int length = contentString.length();
+        int index = joinedQuery.indexOf(contentString);
 
         QString term;
         int nextIndex = 0;
         QStringList searchTerms;
         while (index != -1) {
-            nextIndex = joinedQuery.indexOf(QLatin1String("content:"), index + 1);
+            nextIndex = joinedQuery.indexOf(contentString, index + 1);
             term = joinedQuery.mid(index + length, nextIndex - (length + index)).simplified();
             if (term.startsWith(QLatin1String("\""))
                 && term.endsWith(QLatin1String("\""))) {