tools/assistant/lib/qhelpsearchindexwriter_default.cpp
changeset 0 1918ee327afb
child 3 41300fa6a67c
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 /****************************************************************************
       
     2 **
       
     3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     4 ** All rights reserved.
       
     5 ** Contact: Nokia Corporation (qt-info@nokia.com)
       
     6 **
       
     7 ** This file is part of the Qt Assistant of the Qt Toolkit.
       
     8 **
       
     9 ** $QT_BEGIN_LICENSE:LGPL$
       
    10 ** No Commercial Usage
       
    11 ** This file contains pre-release code and may not be distributed.
       
    12 ** You may use this file in accordance with the terms and conditions
       
    13 ** contained in the Technology Preview License Agreement accompanying
       
    14 ** this package.
       
    15 **
       
    16 ** GNU Lesser General Public License Usage
       
    17 ** Alternatively, this file may be used under the terms of the GNU Lesser
       
    18 ** General Public License version 2.1 as published by the Free Software
       
    19 ** Foundation and appearing in the file LICENSE.LGPL included in the
       
    20 ** packaging of this file.  Please review the following information to
       
    21 ** ensure the GNU Lesser General Public License version 2.1 requirements
       
    22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
       
    23 **
       
    24 ** In addition, as a special exception, Nokia gives you certain additional
       
    25 ** rights.  These rights are described in the Nokia Qt LGPL Exception
       
    26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
       
    27 **
       
    28 ** If you have questions regarding the use of this file, please contact
       
    29 ** Nokia at qt-info@nokia.com.
       
    30 **
       
    31 **
       
    32 **
       
    33 **
       
    34 **
       
    35 **
       
    36 **
       
    37 **
       
    38 ** $QT_END_LICENSE$
       
    39 **
       
    40 ****************************************************************************/
       
    41 
       
    42 #include "qhelpsearchindexwriter_default_p.h"
       
    43 #include "qhelp_global.h"
       
    44 #include "qhelpenginecore.h"
       
    45 
       
    46 #include <QtCore/QDir>
       
    47 #include <QtCore/QSet>
       
    48 #include <QtCore/QUrl>
       
    49 #include <QtCore/QFile>
       
    50 #include <QtCore/QRegExp>
       
    51 #include <QtCore/QVariant>
       
    52 #include <QtCore/QFileInfo>
       
    53 #include <QtCore/QTextCodec>
       
    54 #include <QtCore/QTextStream>
       
    55 
       
    56 QT_BEGIN_NAMESPACE
       
    57 
       
    58 namespace qt {
       
    59     namespace fulltextsearch {
       
    60         namespace std {
       
    61 
       
    62 Writer::Writer(const QString &path)
       
    63     : indexPath(path)
       
    64     , indexFile(QString())
       
    65     , documentFile(QString())
       
    66 {
       
    67     // nothing todo
       
    68 }
       
    69 
       
    70 Writer::~Writer()
       
    71 {
       
    72     reset();
       
    73 }
       
    74 
       
    75 void Writer::reset()
       
    76 {
       
    77     for(QHash<QString, Entry*>::ConstIterator it =
       
    78         index.begin(); it != index.end(); ++it) {
       
    79             delete it.value();
       
    80     }
       
    81 
       
    82     index.clear();
       
    83     documentList.clear();
       
    84 }
       
    85 
       
    86 bool Writer::writeIndex() const
       
    87 {
       
    88     bool status;
       
    89     QFile idxFile(indexFile);
       
    90     if (!(status = idxFile.open(QFile::WriteOnly)))
       
    91         return status;
       
    92 
       
    93     QDataStream indexStream(&idxFile);
       
    94     for(QHash<QString, Entry*>::ConstIterator it =
       
    95         index.begin(); it != index.end(); ++it) {
       
    96         indexStream << it.key();
       
    97         indexStream << it.value()->documents.count();
       
    98         indexStream << it.value()->documents;
       
    99     }
       
   100     idxFile.close();
       
   101 
       
   102     QFile docFile(documentFile);
       
   103     if (!(status = docFile.open(QFile::WriteOnly)))
       
   104         return status;
       
   105 
       
   106     QDataStream docStream(&docFile);
       
   107     foreach(const QStringList list, documentList) {
       
   108         docStream << list.at(0);
       
   109         docStream << list.at(1);
       
   110     }
       
   111     docFile.close();
       
   112 
       
   113     return status;
       
   114 }
       
   115 
       
   116 void Writer::removeIndex() const
       
   117 {
       
   118     QFile idxFile(indexFile);
       
   119     if (idxFile.exists())
       
   120         idxFile.remove();
       
   121 
       
   122     QFile docFile(documentFile);
       
   123     if (docFile.exists())
       
   124         docFile.remove();
       
   125 }
       
   126 
       
   127 void Writer::setIndexFile(const QString &namespaceName, const QString &attributes)
       
   128 {
       
   129     QString extention = namespaceName + QLatin1String("@") + attributes;
       
   130     indexFile = indexPath + QLatin1String("/indexdb40.") + extention;
       
   131     documentFile = indexPath + QLatin1String("/indexdoc40.") + extention;
       
   132 }
       
   133 
       
   134 void Writer::insertInIndex(const QString &string, int docNum)
       
   135 {
       
   136     if (string == QLatin1String("amp") || string == QLatin1String("nbsp"))
       
   137         return;
       
   138 
       
   139     Entry *entry = 0;
       
   140     if (index.count())
       
   141         entry = index[string];
       
   142 
       
   143     if (entry) {
       
   144         if (entry->documents.last().docNumber != docNum)
       
   145             entry->documents.append(Document(docNum, 1));
       
   146         else
       
   147             entry->documents.last().frequency++;
       
   148     } else {
       
   149         index.insert(string, new Entry(docNum));
       
   150     }
       
   151 }
       
   152 
       
   153 void Writer::insertInDocumentList(const QString &title, const QString &url)
       
   154 {
       
   155     documentList.append(QStringList(title) << url);
       
   156 }
       
   157 
       
   158 
       
   159 QHelpSearchIndexWriter::QHelpSearchIndexWriter()
       
   160     : QThread()
       
   161     , m_cancel(false)
       
   162 {
       
   163     // nothing todo
       
   164 }
       
   165 
       
   166 QHelpSearchIndexWriter::~QHelpSearchIndexWriter()
       
   167 {
       
   168     mutex.lock();
       
   169     this->m_cancel = true;
       
   170     waitCondition.wakeOne();
       
   171     mutex.unlock();
       
   172 
       
   173     wait();
       
   174 }
       
   175 
       
   176 void QHelpSearchIndexWriter::cancelIndexing()
       
   177 {
       
   178     mutex.lock();
       
   179     this->m_cancel = true;
       
   180     mutex.unlock();
       
   181 }
       
   182 
       
   183 void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile,
       
   184                                          const QString &indexFilesFolder,
       
   185                                          bool reindex)
       
   186 {
       
   187     QMutexLocker lock(&mutex);
       
   188 
       
   189     this->m_cancel = false;
       
   190     this->m_reindex = reindex;
       
   191     this->m_collectionFile = collectionFile;
       
   192     this->m_indexFilesFolder = indexFilesFolder;
       
   193 
       
   194     start(QThread::NormalPriority);
       
   195 }
       
   196 
       
   197 void QHelpSearchIndexWriter::run()
       
   198 {
       
   199     mutex.lock();
       
   200 
       
   201     if (m_cancel) {
       
   202         mutex.unlock();
       
   203         return;
       
   204     }
       
   205 
       
   206     const bool reindex(this->m_reindex);
       
   207     const QLatin1String key("DefaultSearchNamespaces");
       
   208     const QString collectionFile(this->m_collectionFile);
       
   209     const QString indexPath = m_indexFilesFolder;
       
   210     
       
   211     mutex.unlock();
       
   212 
       
   213     QHelpEngineCore engine(collectionFile, 0);
       
   214     if (!engine.setupData())
       
   215         return;
       
   216 
       
   217     if (reindex)
       
   218         engine.setCustomValue(key, QLatin1String(""));
       
   219 
       
   220     const QStringList registeredDocs = engine.registeredDocumentations();
       
   221     const QStringList indexedNamespaces = engine.customValue(key).toString().
       
   222         split(QLatin1String("|"), QString::SkipEmptyParts);
       
   223 
       
   224     emit indexingStarted();
       
   225 
       
   226     QStringList namespaces;
       
   227     Writer writer(indexPath);
       
   228     foreach(const QString namespaceName, registeredDocs) {
       
   229         mutex.lock();
       
   230         if (m_cancel) {
       
   231             mutex.unlock();
       
   232             return;
       
   233         }
       
   234         mutex.unlock();
       
   235 
       
   236         // if indexed, continue
       
   237         namespaces.append(namespaceName);
       
   238         if (indexedNamespaces.contains(namespaceName))
       
   239             continue;
       
   240 
       
   241         const QList<QStringList> attributeSets =
       
   242             engine.filterAttributeSets(namespaceName);
       
   243 
       
   244         foreach (QStringList attributes, attributeSets) {
       
   245             // cleanup maybe old or unfinished files
       
   246             writer.setIndexFile(namespaceName, attributes.join(QLatin1String("@")));
       
   247             writer.removeIndex();
       
   248 
       
   249             QSet<QString> documentsSet;
       
   250             const QList<QUrl> docFiles = engine.files(namespaceName, attributes);
       
   251             foreach(QUrl url, docFiles) {
       
   252                 if (m_cancel)
       
   253                     return;
       
   254 
       
   255                 // get rid of duplicated files
       
   256                 if (url.hasFragment())
       
   257                     url.setFragment(QString());
       
   258                 
       
   259                 QString s = url.toString();
       
   260                 if (s.endsWith(QLatin1String(".html"))
       
   261                     || s.endsWith(QLatin1String(".htm"))
       
   262                     || s.endsWith(QLatin1String(".txt")))
       
   263                     documentsSet.insert(s);
       
   264             }
       
   265 
       
   266             int docNum = 0;
       
   267             const QStringList documentsList(documentsSet.toList());
       
   268             foreach(const QString url, documentsList) {
       
   269                 if (m_cancel)
       
   270                     return;
       
   271 
       
   272                 QByteArray data(engine.fileData(url));
       
   273                 if (data.isEmpty())
       
   274                     continue;
       
   275 
       
   276                 QTextStream s(data);
       
   277                 QString en = QHelpGlobal::charsetFromData(data);
       
   278                 s.setCodec(QTextCodec::codecForName(en.toLatin1().constData()));
       
   279 
       
   280                 QString text = s.readAll();
       
   281                 if (text.isNull())
       
   282                     continue;
       
   283 
       
   284                 QString title = QHelpGlobal::documentTitle(text);
       
   285 
       
   286                 int j = 0;
       
   287                 int i = 0;
       
   288                 bool valid = true;
       
   289                 const QChar *buf = text.unicode();
       
   290                 QChar str[64];
       
   291                 QChar c = buf[0];
       
   292 
       
   293                 while ( j < text.length() ) {
       
   294                     if (m_cancel)
       
   295                         return;
       
   296 
       
   297                     if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) {
       
   298                         valid = false;
       
   299                         if ( i > 1 )
       
   300                             writer.insertInIndex(QString(str,i), docNum);
       
   301                         i = 0;
       
   302                         c = buf[++j];
       
   303                         continue;
       
   304                     }
       
   305                     if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) {
       
   306                         valid = true;
       
   307                         c = buf[++j];
       
   308                         continue;
       
   309                     }
       
   310                     if ( !valid ) {
       
   311                         c = buf[++j];
       
   312                         continue;
       
   313                     }
       
   314                     if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) {
       
   315                         str[i] = c.toLower();
       
   316                         ++i;
       
   317                     } else {
       
   318                         if ( i > 1 )
       
   319                             writer.insertInIndex(QString(str,i), docNum);
       
   320                         i = 0;
       
   321                     }
       
   322                     c = buf[++j];
       
   323                 }
       
   324                 if ( i > 1 )
       
   325                     writer.insertInIndex(QString(str,i), docNum);
       
   326 
       
   327                 docNum++;
       
   328                 writer.insertInDocumentList(title, url);
       
   329             }
       
   330 
       
   331             if (writer.writeIndex()) {
       
   332                 engine.setCustomValue(key, addNamespace(
       
   333                     engine.customValue(key).toString(), namespaceName));
       
   334             }
       
   335 
       
   336             writer.reset();
       
   337         }
       
   338     }
       
   339 
       
   340     QStringListIterator qsli(indexedNamespaces);
       
   341     while (qsli.hasNext()) {
       
   342         const QString namespaceName = qsli.next();
       
   343         if (namespaces.contains(namespaceName))
       
   344             continue;
       
   345 
       
   346         const QList<QStringList> attributeSets =
       
   347             engine.filterAttributeSets(namespaceName);
       
   348 
       
   349         foreach (QStringList attributes, attributeSets) {
       
   350             writer.setIndexFile(namespaceName, attributes.join(QLatin1String("@")));
       
   351             writer.removeIndex();
       
   352         }
       
   353 
       
   354         engine.setCustomValue(key, removeNamespace(
       
   355             engine.customValue(key).toString(), namespaceName));
       
   356     }
       
   357 
       
   358     emit indexingFinished();
       
   359 }
       
   360 
       
   361 QString QHelpSearchIndexWriter::addNamespace(const QString namespaces,
       
   362                                              const QString &namespaceName)
       
   363 {
       
   364     QString value = namespaces;
       
   365     if (!value.contains(namespaceName))
       
   366         value.append(namespaceName).append(QLatin1String("|"));
       
   367 
       
   368     return value;
       
   369 }
       
   370 
       
   371 QString QHelpSearchIndexWriter::removeNamespace(const QString namespaces,
       
   372                                                 const QString &namespaceName)
       
   373 {
       
   374     QString value = namespaces;
       
   375     if (value.contains(namespaceName))
       
   376         value.remove(namespaceName + QLatin1String("|"));
       
   377 
       
   378     return value;
       
   379 }
       
   380 
       
   381         }   // namespace std
       
   382     }   // namespace fulltextsearch
       
   383 }   // namespace qt
       
   384 
       
   385 QT_END_NAMESPACE