tools/assistant/lib/qhelpsearchindexwriter_clucene.cpp
changeset 0 1918ee327afb
child 3 41300fa6a67c
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 /****************************************************************************
       
     2 **
       
     3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
       
     4 ** All rights reserved.
       
     5 ** Contact: Nokia Corporation (qt-info@nokia.com)
       
     6 **
       
     7 ** This file is part of the Qt Assistant of the Qt Toolkit.
       
     8 **
       
     9 ** $QT_BEGIN_LICENSE:LGPL$
       
    10 ** No Commercial Usage
       
    11 ** This file contains pre-release code and may not be distributed.
       
    12 ** You may use this file in accordance with the terms and conditions
       
    13 ** contained in the Technology Preview License Agreement accompanying
       
    14 ** this package.
       
    15 **
       
    16 ** GNU Lesser General Public License Usage
       
    17 ** Alternatively, this file may be used under the terms of the GNU Lesser
       
    18 ** General Public License version 2.1 as published by the Free Software
       
    19 ** Foundation and appearing in the file LICENSE.LGPL included in the
       
    20 ** packaging of this file.  Please review the following information to
       
    21 ** ensure the GNU Lesser General Public License version 2.1 requirements
       
    22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
       
    23 **
       
    24 ** In addition, as a special exception, Nokia gives you certain additional
       
    25 ** rights.  These rights are described in the Nokia Qt LGPL Exception
       
    26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
       
    27 **
       
    28 ** If you have questions regarding the use of this file, please contact
       
    29 ** Nokia at qt-info@nokia.com.
       
    30 **
       
    31 **
       
    32 **
       
    33 **
       
    34 **
       
    35 **
       
    36 **
       
    37 **
       
    38 ** $QT_END_LICENSE$
       
    39 **
       
    40 ****************************************************************************/
       
    41 
       
    42 #include "qhelpenginecore.h"
       
    43 #include "qhelp_global.h"
       
    44 #include "fulltextsearch/qhits_p.h"
       
    45 #include "fulltextsearch/qquery_p.h"
       
    46 #include "fulltextsearch/qanalyzer_p.h"
       
    47 #include "fulltextsearch/qdocument_p.h"
       
    48 #include "fulltextsearch/qsearchable_p.h"
       
    49 #include "fulltextsearch/qindexreader_p.h"
       
    50 #include "fulltextsearch/qindexwriter_p.h"
       
    51 #include "qhelpsearchindexwriter_clucene_p.h"
       
    52 
       
    53 #include <QtCore/QDir>
       
    54 #include <QtCore/QString>
       
    55 #include <QtCore/QFileInfo>
       
    56 #include <QtCore/QTextCodec>
       
    57 #include <QtCore/QTextStream>
       
    58 
       
    59 #include <QtNetwork/QLocalSocket>
       
    60 #include <QtNetwork/QLocalServer>
       
    61 
       
    62 #include "private/qfunctions_p.h"
       
    63 
       
    64 QT_BEGIN_NAMESPACE
       
    65 
       
    66 namespace qt {
       
    67     namespace fulltextsearch {
       
    68         namespace clucene {
       
    69 
       
    70 // taken from qtexthtmlparser
       
    71 static const struct QTextHtmlEntity
       
    72 {
       
    73     const char *name;
       
    74     quint16 code;
       
    75 } entities[] = {
       
    76     { "AElig", 0x00c6 },
       
    77     { "AMP", 38 },
       
    78     { "Aacute", 0x00c1 },
       
    79     { "Acirc", 0x00c2 },
       
    80     { "Agrave", 0x00c0 },
       
    81     { "Alpha", 0x0391 },
       
    82     { "Aring", 0x00c5 },
       
    83     { "Atilde", 0x00c3 },
       
    84     { "Auml", 0x00c4 },
       
    85     { "Beta", 0x0392 },
       
    86     { "Ccedil", 0x00c7 },
       
    87     { "Chi", 0x03a7 },
       
    88     { "Dagger", 0x2021 },
       
    89     { "Delta", 0x0394 },
       
    90     { "ETH", 0x00d0 },
       
    91     { "Eacute", 0x00c9 },
       
    92     { "Ecirc", 0x00ca },
       
    93     { "Egrave", 0x00c8 },
       
    94     { "Epsilon", 0x0395 },
       
    95     { "Eta", 0x0397 },
       
    96     { "Euml", 0x00cb },
       
    97     { "GT", 62 },
       
    98     { "Gamma", 0x0393 },
       
    99     { "Iacute", 0x00cd },
       
   100     { "Icirc", 0x00ce },
       
   101     { "Igrave", 0x00cc },
       
   102     { "Iota", 0x0399 },
       
   103     { "Iuml", 0x00cf },
       
   104     { "Kappa", 0x039a },
       
   105     { "LT", 60 },
       
   106     { "Lambda", 0x039b },
       
   107     { "Mu", 0x039c },
       
   108     { "Ntilde", 0x00d1 },
       
   109     { "Nu", 0x039d },
       
   110     { "OElig", 0x0152 },
       
   111     { "Oacute", 0x00d3 },
       
   112     { "Ocirc", 0x00d4 },
       
   113     { "Ograve", 0x00d2 },
       
   114     { "Omega", 0x03a9 },
       
   115     { "Omicron", 0x039f },
       
   116     { "Oslash", 0x00d8 },
       
   117     { "Otilde", 0x00d5 },
       
   118     { "Ouml", 0x00d6 },
       
   119     { "Phi", 0x03a6 },
       
   120     { "Pi", 0x03a0 },
       
   121     { "Prime", 0x2033 },
       
   122     { "Psi", 0x03a8 },
       
   123     { "QUOT", 34 },
       
   124     { "Rho", 0x03a1 },
       
   125     { "Scaron", 0x0160 },
       
   126     { "Sigma", 0x03a3 },
       
   127     { "THORN", 0x00de },
       
   128     { "Tau", 0x03a4 },
       
   129     { "Theta", 0x0398 },
       
   130     { "Uacute", 0x00da },
       
   131     { "Ucirc", 0x00db },
       
   132     { "Ugrave", 0x00d9 },
       
   133     { "Upsilon", 0x03a5 },
       
   134     { "Uuml", 0x00dc },
       
   135     { "Xi", 0x039e },
       
   136     { "Yacute", 0x00dd },
       
   137     { "Yuml", 0x0178 },
       
   138     { "Zeta", 0x0396 },
       
   139     { "aacute", 0x00e1 },
       
   140     { "acirc", 0x00e2 },
       
   141     { "acute", 0x00b4 },
       
   142     { "aelig", 0x00e6 },
       
   143     { "agrave", 0x00e0 },
       
   144     { "alefsym", 0x2135 },
       
   145     { "alpha", 0x03b1 },
       
   146     { "amp", 38 },
       
   147     { "and", 0x22a5 },
       
   148     { "ang", 0x2220 },
       
   149     { "apos", 0x0027 },
       
   150     { "aring", 0x00e5 },
       
   151     { "asymp", 0x2248 },
       
   152     { "atilde", 0x00e3 },
       
   153     { "auml", 0x00e4 },
       
   154     { "bdquo", 0x201e },
       
   155     { "beta", 0x03b2 },
       
   156     { "brvbar", 0x00a6 },
       
   157     { "bull", 0x2022 },
       
   158     { "cap", 0x2229 },
       
   159     { "ccedil", 0x00e7 },
       
   160     { "cedil", 0x00b8 },
       
   161     { "cent", 0x00a2 },
       
   162     { "chi", 0x03c7 },
       
   163     { "circ", 0x02c6 },
       
   164     { "clubs", 0x2663 },
       
   165     { "cong", 0x2245 },
       
   166     { "copy", 0x00a9 },
       
   167     { "crarr", 0x21b5 },
       
   168     { "cup", 0x222a },
       
   169     { "curren", 0x00a4 },
       
   170     { "dArr", 0x21d3 },
       
   171     { "dagger", 0x2020 },
       
   172     { "darr", 0x2193 },
       
   173     { "deg", 0x00b0 },
       
   174     { "delta", 0x03b4 },
       
   175     { "diams", 0x2666 },
       
   176     { "divide", 0x00f7 },
       
   177     { "eacute", 0x00e9 },
       
   178     { "ecirc", 0x00ea },
       
   179     { "egrave", 0x00e8 },
       
   180     { "empty", 0x2205 },
       
   181     { "emsp", 0x2003 },
       
   182     { "ensp", 0x2002 },
       
   183     { "epsilon", 0x03b5 },
       
   184     { "equiv", 0x2261 },
       
   185     { "eta", 0x03b7 },
       
   186     { "eth", 0x00f0 },
       
   187     { "euml", 0x00eb },
       
   188     { "euro", 0x20ac },
       
   189     { "exist", 0x2203 },
       
   190     { "fnof", 0x0192 },
       
   191     { "forall", 0x2200 },
       
   192     { "frac12", 0x00bd },
       
   193     { "frac14", 0x00bc },
       
   194     { "frac34", 0x00be },
       
   195     { "frasl", 0x2044 },
       
   196     { "gamma", 0x03b3 },
       
   197     { "ge", 0x2265 },
       
   198     { "gt", 62 },
       
   199     { "hArr", 0x21d4 },
       
   200     { "harr", 0x2194 },
       
   201     { "hearts", 0x2665 },
       
   202     { "hellip", 0x2026 },
       
   203     { "iacute", 0x00ed },
       
   204     { "icirc", 0x00ee },
       
   205     { "iexcl", 0x00a1 },
       
   206     { "igrave", 0x00ec },
       
   207     { "image", 0x2111 },
       
   208     { "infin", 0x221e },
       
   209     { "int", 0x222b },
       
   210     { "iota", 0x03b9 },
       
   211     { "iquest", 0x00bf },
       
   212     { "isin", 0x2208 },
       
   213     { "iuml", 0x00ef },
       
   214     { "kappa", 0x03ba },
       
   215     { "lArr", 0x21d0 },
       
   216     { "lambda", 0x03bb },
       
   217     { "lang", 0x2329 },
       
   218     { "laquo", 0x00ab },
       
   219     { "larr", 0x2190 },
       
   220     { "lceil", 0x2308 },
       
   221     { "ldquo", 0x201c },
       
   222     { "le", 0x2264 },
       
   223     { "lfloor", 0x230a },
       
   224     { "lowast", 0x2217 },
       
   225     { "loz", 0x25ca },
       
   226     { "lrm", 0x200e },
       
   227     { "lsaquo", 0x2039 },
       
   228     { "lsquo", 0x2018 },
       
   229     { "lt", 60 },
       
   230     { "macr", 0x00af },
       
   231     { "mdash", 0x2014 },
       
   232     { "micro", 0x00b5 },
       
   233     { "middot", 0x00b7 },
       
   234     { "minus", 0x2212 },
       
   235     { "mu", 0x03bc },
       
   236     { "nabla", 0x2207 },
       
   237     { "nbsp", 0x00a0 },
       
   238     { "ndash", 0x2013 },
       
   239     { "ne", 0x2260 },
       
   240     { "ni", 0x220b },
       
   241     { "not", 0x00ac },
       
   242     { "notin", 0x2209 },
       
   243     { "nsub", 0x2284 },
       
   244     { "ntilde", 0x00f1 },
       
   245     { "nu", 0x03bd },
       
   246     { "oacute", 0x00f3 },
       
   247     { "ocirc", 0x00f4 },
       
   248     { "oelig", 0x0153 },
       
   249     { "ograve", 0x00f2 },
       
   250     { "oline", 0x203e },
       
   251     { "omega", 0x03c9 },
       
   252     { "omicron", 0x03bf },
       
   253     { "oplus", 0x2295 },
       
   254     { "or", 0x22a6 },
       
   255     { "ordf", 0x00aa },
       
   256     { "ordm", 0x00ba },
       
   257     { "oslash", 0x00f8 },
       
   258     { "otilde", 0x00f5 },
       
   259     { "otimes", 0x2297 },
       
   260     { "ouml", 0x00f6 },
       
   261     { "para", 0x00b6 },
       
   262     { "part", 0x2202 },
       
   263     { "percnt", 0x0025 },
       
   264     { "permil", 0x2030 },
       
   265     { "perp", 0x22a5 },
       
   266     { "phi", 0x03c6 },
       
   267     { "pi", 0x03c0 },
       
   268     { "piv", 0x03d6 },
       
   269     { "plusmn", 0x00b1 },
       
   270     { "pound", 0x00a3 },
       
   271     { "prime", 0x2032 },
       
   272     { "prod", 0x220f },
       
   273     { "prop", 0x221d },
       
   274     { "psi", 0x03c8 },
       
   275     { "quot", 34 },
       
   276     { "rArr", 0x21d2 },
       
   277     { "radic", 0x221a },
       
   278     { "rang", 0x232a },
       
   279     { "raquo", 0x00bb },
       
   280     { "rarr", 0x2192 },
       
   281     { "rceil", 0x2309 },
       
   282     { "rdquo", 0x201d },
       
   283     { "real", 0x211c },
       
   284     { "reg", 0x00ae },
       
   285     { "rfloor", 0x230b },
       
   286     { "rho", 0x03c1 },
       
   287     { "rlm", 0x200f },
       
   288     { "rsaquo", 0x203a },
       
   289     { "rsquo", 0x2019 },
       
   290     { "sbquo", 0x201a },
       
   291     { "scaron", 0x0161 },
       
   292     { "sdot", 0x22c5 },
       
   293     { "sect", 0x00a7 },
       
   294     { "shy", 0x00ad },
       
   295     { "sigma", 0x03c3 },
       
   296     { "sigmaf", 0x03c2 },
       
   297     { "sim", 0x223c },
       
   298     { "spades", 0x2660 },
       
   299     { "sub", 0x2282 },
       
   300     { "sube", 0x2286 },
       
   301     { "sum", 0x2211 },
       
   302     { "sup", 0x2283 },
       
   303     { "sup1", 0x00b9 },
       
   304     { "sup2", 0x00b2 },
       
   305     { "sup3", 0x00b3 },
       
   306     { "supe", 0x2287 },
       
   307     { "szlig", 0x00df },
       
   308     { "tau", 0x03c4 },
       
   309     { "there4", 0x2234 },
       
   310     { "theta", 0x03b8 },
       
   311     { "thetasym", 0x03d1 },
       
   312     { "thinsp", 0x2009 },
       
   313     { "thorn", 0x00fe },
       
   314     { "tilde", 0x02dc },
       
   315     { "times", 0x00d7 },
       
   316     { "trade", 0x2122 },
       
   317     { "uArr", 0x21d1 },
       
   318     { "uacute", 0x00fa },
       
   319     { "uarr", 0x2191 },
       
   320     { "ucirc", 0x00fb },
       
   321     { "ugrave", 0x00f9 },
       
   322     { "uml", 0x00a8 },
       
   323     { "upsih", 0x03d2 },
       
   324     { "upsilon", 0x03c5 },
       
   325     { "uuml", 0x00fc },
       
   326     { "weierp", 0x2118 },
       
   327     { "xi", 0x03be },
       
   328     { "yacute", 0x00fd },
       
   329     { "yen", 0x00a5 },
       
   330     { "yuml", 0x00ff },
       
   331     { "zeta", 0x03b6 },
       
   332     { "zwj", 0x200d },
       
   333     { "zwnj", 0x200c }
       
   334 };
       
   335 
       
   336 Q_STATIC_GLOBAL_OPERATOR bool operator<(const QString &entityStr, const QTextHtmlEntity &entity)
       
   337 {
       
   338     return entityStr < QLatin1String(entity.name);
       
   339 }
       
   340 
       
   341 Q_STATIC_GLOBAL_OPERATOR bool operator<(const QTextHtmlEntity &entity, const QString &entityStr)
       
   342 {
       
   343     return QLatin1String(entity.name) < entityStr;
       
   344 }
       
   345 
       
   346 static QChar resolveEntity(const QString &entity)
       
   347 {
       
   348     const QTextHtmlEntity *start = &entities[0];
       
   349     const QTextHtmlEntity *end = &entities[(sizeof(entities) / sizeof(entities[0]))];
       
   350     const QTextHtmlEntity *e = qBinaryFind(start, end, entity);
       
   351     if (e == end)
       
   352         return QChar();
       
   353     return e->code;
       
   354 }
       
   355 
       
   356 static const uint latin1Extended[0xA0 - 0x80] = {
       
   357     0x20ac, // 0x80
       
   358     0x0081, // 0x81 direct mapping
       
   359     0x201a, // 0x82
       
   360     0x0192, // 0x83
       
   361     0x201e, // 0x84
       
   362     0x2026, // 0x85
       
   363     0x2020, // 0x86
       
   364     0x2021, // 0x87
       
   365     0x02C6, // 0x88
       
   366     0x2030, // 0x89
       
   367     0x0160, // 0x8A
       
   368     0x2039, // 0x8B
       
   369     0x0152, // 0x8C
       
   370     0x008D, // 0x8D direct mapping
       
   371     0x017D, // 0x8E
       
   372     0x008F, // 0x8F directmapping
       
   373     0x0090, // 0x90 directmapping
       
   374     0x2018, // 0x91
       
   375     0x2019, // 0x92
       
   376     0x201C, // 0x93
       
   377     0X201D, // 0x94
       
   378     0x2022, // 0x95
       
   379     0x2013, // 0x96
       
   380     0x2014, // 0x97
       
   381     0x02DC, // 0x98
       
   382     0x2122, // 0x99
       
   383     0x0161, // 0x9A
       
   384     0x203A, // 0x9B
       
   385     0x0153, // 0x9C
       
   386     0x009D, // 0x9D direct mapping
       
   387     0x017E, // 0x9E
       
   388     0x0178  // 0x9F
       
   389 };
       
   390 // end taken from qtexthtmlparser
       
   391 
       
   392 class DocumentHelper
       
   393 {
       
   394 public:
       
   395     DocumentHelper(const QString &fileName, const QByteArray &data)
       
   396         : fileName(fileName) , data(readData(data)) {}
       
   397     ~DocumentHelper() {}
       
   398 
       
   399     bool addFieldsToDocument(QCLuceneDocument *document,
       
   400         const QString &namespaceName, const QString &attributes = QString())
       
   401     {
       
   402         if (!document)
       
   403             return false;
       
   404 
       
   405         if(!data.isEmpty()) {
       
   406             QString parsedData = parseData();
       
   407             QString parsedTitle = QHelpGlobal::documentTitle(data);
       
   408 
       
   409             if(!parsedData.isEmpty()) {
       
   410                 document->add(new QCLuceneField(QLatin1String("content"),
       
   411                     parsedData,QCLuceneField::INDEX_TOKENIZED));
       
   412                 document->add(new QCLuceneField(QLatin1String("path"), fileName,
       
   413                     QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED));
       
   414                 document->add(new QCLuceneField(QLatin1String("title"), parsedTitle,
       
   415                     QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED));
       
   416                 document->add(new QCLuceneField(QLatin1String("titleTokenized"), parsedTitle,
       
   417                     QCLuceneField::STORE_YES | QCLuceneField::INDEX_TOKENIZED));
       
   418                 document->add(new QCLuceneField(QLatin1String("namespace"), namespaceName,
       
   419                     QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED));
       
   420                 document->add(new QCLuceneField(QLatin1String("attribute"), attributes,
       
   421                     QCLuceneField::STORE_YES | QCLuceneField::INDEX_TOKENIZED));
       
   422                 return true;
       
   423             }
       
   424         }
       
   425 
       
   426         return false;
       
   427     }
       
   428 
       
   429 private:
       
   430     QString readData(const QByteArray &data)
       
   431     {
       
   432         QTextStream textStream(data);
       
   433         QByteArray charSet = QHelpGlobal::charsetFromData(data).toLatin1();
       
   434         textStream.setCodec(QTextCodec::codecForName(charSet.constData()));
       
   435 
       
   436         QString stream = textStream.readAll();
       
   437         if (stream.isNull() || stream.isEmpty())
       
   438             return QString();
       
   439 
       
   440         return stream;
       
   441     }
       
   442 
       
   443     QString parseData() const
       
   444     {
       
   445         const int length = data.length();
       
   446         const QChar *buf = data.unicode();
       
   447 
       
   448         QString parsedContent;
       
   449         parsedContent.reserve(length);
       
   450 
       
   451         bool valid = true;
       
   452         int j = 0, count = 0;
       
   453 
       
   454         QChar c;
       
   455         while (j < length) {
       
   456             c = buf[j++];
       
   457             if (c == QLatin1Char('<') || c == QLatin1Char('&')) {
       
   458                 if (count > 1 && c != QLatin1Char('&'))
       
   459                     parsedContent.append(QLatin1Char(' '));
       
   460                 else if (c == QLatin1Char('&')) {
       
   461                     // Note: this will modify the counter j, in case we sucessful parsed the entity
       
   462                     //       we will have modified the counter to stay 1 before the closing ';', so
       
   463                     //       the following if condition will be met with if (c == QLatin1Char(';'))
       
   464                     parsedContent.append(parseEntity(length, buf, j));
       
   465                 }
       
   466 
       
   467                 count = 0;
       
   468                 valid = false;
       
   469                 continue;
       
   470             }
       
   471             if ((c == QLatin1Char('>') || c == QLatin1Char(';')) && !valid) {
       
   472                 valid = true;
       
   473                 continue;
       
   474             }
       
   475             if (!valid)
       
   476                 continue;
       
   477 
       
   478             if (c.isLetterOrNumber() || c.isPrint()) {
       
   479                 ++count;
       
   480                 parsedContent.append(c.toLower());
       
   481             } else {
       
   482                 if (count > 1)
       
   483                     parsedContent.append(QLatin1Char(' '));
       
   484                 count = 0;
       
   485             }
       
   486         }
       
   487 
       
   488         return parsedContent;
       
   489     }
       
   490 
       
   491     // taken from qtexthtmlparser
       
   492     // parses an entity after "&", and returns it
       
   493     QString parseEntity(int len, const QChar *buf, int &pos) const
       
   494     {
       
   495         int recover = pos;
       
   496         QString entity;
       
   497         while (pos < len) {
       
   498             QChar c = buf[pos++];
       
   499             if (c.isSpace() || pos - recover > 9) {
       
   500                 goto error;
       
   501             }
       
   502             if (c == QLatin1Char(';')) {
       
   503                 pos--;
       
   504                 break;
       
   505             }
       
   506             entity += c;
       
   507         }
       
   508         {
       
   509             QChar resolved = resolveEntity(entity);
       
   510             if (!resolved.isNull())
       
   511                 return QString(resolved);
       
   512         }
       
   513         if (entity.length() > 1 && entity.at(0) == QLatin1Char('#')) {
       
   514             entity.remove(0, 1); // removing leading #
       
   515 
       
   516             int base = 10;
       
   517             bool ok = false;
       
   518 
       
   519             if (entity.at(0).toLower() == QLatin1Char('x')) { // hex entity?
       
   520                 entity.remove(0, 1);
       
   521                 base = 16;
       
   522             }
       
   523 
       
   524             uint uc = entity.toUInt(&ok, base);
       
   525             if (ok) {
       
   526                 if (uc >= 0x80  && uc < 0x80 + (sizeof(latin1Extended) / sizeof(latin1Extended[0])))
       
   527                     uc = latin1Extended[uc - 0x80]; // windows latin 1 extended
       
   528                 QString str;
       
   529                 if (uc > 0xffff) {
       
   530                     // surrogate pair
       
   531                     uc -= 0x10000;
       
   532                     ushort high = uc/0x400 + 0xd800;
       
   533                     ushort low = uc%0x400 + 0xdc00;
       
   534                     str.append(QChar(high));
       
   535                     str.append(QChar(low));
       
   536                 } else {
       
   537                     str.append(QChar(uc));
       
   538                 }
       
   539                 return str;
       
   540             }
       
   541         }
       
   542     error:
       
   543         pos = recover;
       
   544         return QLatin1String(" ");
       
   545     }
       
   546     // end taken from qtexthtmlparser
       
   547 
       
   548 private:
       
   549     QString fileName;
       
   550     QString data;
       
   551 };
       
   552 
       
   553 
       
   554 QHelpSearchIndexWriter::QHelpSearchIndexWriter()
       
   555     : QThread(0)
       
   556     , m_cancel(false)
       
   557 {
       
   558     // nothing todo
       
   559 }
       
   560 
       
   561 QHelpSearchIndexWriter::~QHelpSearchIndexWriter()
       
   562 {
       
   563     mutex.lock();
       
   564     this->m_cancel = true;
       
   565     waitCondition.wakeOne();
       
   566     mutex.unlock();
       
   567 
       
   568     wait();
       
   569 }
       
   570 
       
   571 void QHelpSearchIndexWriter::cancelIndexing()
       
   572 {
       
   573     mutex.lock();
       
   574     this->m_cancel = true;
       
   575     mutex.unlock();
       
   576 }
       
   577 
       
   578 void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile,
       
   579     const QString &indexFilesFolder, bool reindex)
       
   580 {
       
   581     mutex.lock();
       
   582     this->m_cancel = false;
       
   583     this->m_reindex = reindex;
       
   584     this->m_collectionFile = collectionFile;
       
   585     this->m_indexFilesFolder = indexFilesFolder;
       
   586     mutex.unlock();
       
   587 
       
   588     start(QThread::NormalPriority);
       
   589 }
       
   590 
       
   591 void QHelpSearchIndexWriter::optimizeIndex()
       
   592 {
       
   593 #if !defined(QT_NO_EXCEPTIONS)
       
   594     try {
       
   595 #endif
       
   596         if (QCLuceneIndexReader::indexExists(m_indexFilesFolder)) {
       
   597             if (QCLuceneIndexReader::isLocked(m_indexFilesFolder))
       
   598                 return;
       
   599 
       
   600             QCLuceneStandardAnalyzer analyzer;
       
   601             QCLuceneIndexWriter writer(m_indexFilesFolder, analyzer, false);
       
   602             writer.optimize();
       
   603             writer.close();
       
   604         }
       
   605 #if !defined(QT_NO_EXCEPTIONS)
       
   606     } catch (...) {
       
   607         qWarning("Full Text Search, could not optimize index.");
       
   608         return;
       
   609     }
       
   610 #endif
       
   611 }
       
   612 
       
   613 void QHelpSearchIndexWriter::run()
       
   614 {
       
   615     QMutexLocker mutexLocker(&mutex);
       
   616 
       
   617     if (m_cancel)
       
   618         return;
       
   619 
       
   620     const bool reindex = this->m_reindex;
       
   621     const QString collectionFile(this->m_collectionFile);
       
   622 
       
   623     mutexLocker.unlock();
       
   624 
       
   625     QHelpEngineCore engine(collectionFile, 0);
       
   626     if (!engine.setupData())
       
   627         return;
       
   628 
       
   629     const QLatin1String key("CluceneIndexedNamespaces");
       
   630     if (reindex)
       
   631         engine.setCustomValue(key, QLatin1String(""));
       
   632 
       
   633     QMap<QString, QDateTime> indexMap;
       
   634     const QLatin1String oldKey("CluceneSearchNamespaces");
       
   635     if (!engine.customValue(oldKey, QString()).isNull()) {
       
   636         // old style qhc file < 4.4.2, need to convert...
       
   637         const QStringList indexedNamespaces = engine.customValue(oldKey).
       
   638             toString().split(QLatin1String("|"), QString::SkipEmptyParts);
       
   639         foreach (const QString &nameSpace, indexedNamespaces)
       
   640             indexMap.insert(nameSpace, QDateTime());
       
   641         engine.removeCustomValue(oldKey);
       
   642     } else {
       
   643         QDataStream dataStream(engine.customValue(key).toByteArray());
       
   644         dataStream >> indexMap;
       
   645     }
       
   646 
       
   647     QString indexPath = m_indexFilesFolder;
       
   648 
       
   649     QFileInfo fInfo(indexPath);
       
   650     if (fInfo.exists() && !fInfo.isWritable()) {
       
   651         qWarning("Full Text Search, could not create index (missing permissions for '%s').", qPrintable(indexPath));
       
   652         return;
       
   653     }
       
   654 
       
   655     emit indexingStarted();
       
   656 
       
   657     QCLuceneIndexWriter *writer = 0;
       
   658     QCLuceneStandardAnalyzer analyzer;
       
   659     const QStringList registeredDocs = engine.registeredDocumentations();
       
   660 
       
   661     QLocalSocket localSocket;
       
   662     localSocket.connectToServer(QString(QLatin1String("QtAssistant%1"))
       
   663         .arg(QLatin1String(QT_VERSION_STR)));
       
   664 
       
   665     QLocalServer localServer;
       
   666     bool otherInstancesRunning = true;
       
   667     if (!localSocket.waitForConnected()) {
       
   668         otherInstancesRunning = false;
       
   669         localServer.listen(QString(QLatin1String("QtAssistant%1"))
       
   670             .arg(QLatin1String(QT_VERSION_STR)));
       
   671     }
       
   672 
       
   673 #if !defined(QT_NO_EXCEPTIONS)
       
   674     try {
       
   675 #endif
       
   676         // check if it's locked, and if the other instance is running
       
   677         if (!otherInstancesRunning && QCLuceneIndexReader::isLocked(indexPath))
       
   678             QCLuceneIndexReader::unlock(indexPath);
       
   679 
       
   680         if (QCLuceneIndexReader::isLocked(indexPath)) {
       
   681             // poll unless indexing finished to fake progress
       
   682             while (QCLuceneIndexReader::isLocked(indexPath)) {
       
   683                 mutexLocker.relock();
       
   684                 if (m_cancel)
       
   685                     break;
       
   686                 mutexLocker.unlock();
       
   687                 this->sleep(1);
       
   688             }
       
   689             emit indexingFinished();
       
   690             return;
       
   691         }
       
   692 
       
   693         if (QCLuceneIndexReader::indexExists(indexPath) && !reindex) {
       
   694             foreach(const QString &namespaceName, registeredDocs) {
       
   695                 mutexLocker.relock();
       
   696                 if (m_cancel) {
       
   697                     emit indexingFinished();
       
   698                     return;
       
   699                 }
       
   700                 mutexLocker.unlock();
       
   701 
       
   702                 if (!indexMap.contains(namespaceName)) {
       
   703                     // make sure we remove some partly indexed stuff
       
   704                     removeDocuments(indexPath, namespaceName);
       
   705                 } else {
       
   706                     QString path = engine.documentationFileName(namespaceName);
       
   707                     if (indexMap.value(namespaceName) < QFileInfo(path).lastModified()) {
       
   708                         // make sure we remove some outdated indexed stuff
       
   709                         indexMap.remove(namespaceName);
       
   710                         removeDocuments(indexPath, namespaceName);
       
   711                     }
       
   712 
       
   713                     if (indexMap.contains(namespaceName)) {
       
   714                         // make sure we really have content indexed for namespace
       
   715                         // NOTE: Extra variable just for GCC 3.3.5
       
   716                         QLatin1String key("namespace");
       
   717                         QCLuceneTermQuery query(QCLuceneTerm(key, namespaceName));
       
   718                         QCLuceneIndexSearcher indexSearcher(indexPath);
       
   719                         QCLuceneHits hits = indexSearcher.search(query);
       
   720                         if (hits.length() <= 0)
       
   721                             indexMap.remove(namespaceName);
       
   722                     }
       
   723                 }
       
   724             }
       
   725             writer = new QCLuceneIndexWriter(indexPath, analyzer, false);
       
   726         } else {
       
   727             indexMap.clear();
       
   728             writer = new QCLuceneIndexWriter(indexPath, analyzer, true);
       
   729         }
       
   730 #if !defined(QT_NO_EXCEPTIONS)
       
   731     } catch (...) {
       
   732         qWarning("Full Text Search, could not create index writer in '%s'.",
       
   733             qPrintable(indexPath));
       
   734         return;
       
   735     }
       
   736 #endif
       
   737 
       
   738 #if !defined(QT_NO_EXCEPTIONS)
       
   739     try {
       
   740 #endif
       
   741         writer->setMergeFactor(100);
       
   742         writer->setMinMergeDocs(1000);
       
   743         writer->setMaxFieldLength(QCLuceneIndexWriter::DEFAULT_MAX_FIELD_LENGTH);
       
   744 #if !defined(QT_NO_EXCEPTIONS)
       
   745     } catch (...) {
       
   746         qWarning("Full Text Search, could not set writer properties.");
       
   747         return;
       
   748     }
       
   749 #endif
       
   750 
       
   751     QStringList namespaces;
       
   752     foreach(const QString &namespaceName, registeredDocs) {
       
   753         mutexLocker.relock();
       
   754         if (m_cancel) {
       
   755             closeIndexWriter(writer);
       
   756             emit indexingFinished();
       
   757             return;
       
   758         }
       
   759         mutexLocker.unlock();
       
   760 
       
   761         namespaces.append(namespaceName);
       
   762         if (indexMap.contains(namespaceName))
       
   763             continue;
       
   764 
       
   765         const QList<QStringList> attributeSets =
       
   766             engine.filterAttributeSets(namespaceName);
       
   767 
       
   768         if (attributeSets.isEmpty()) {
       
   769             const QList<QUrl> docFiles = indexableFiles(&engine, namespaceName,
       
   770                 QStringList());
       
   771             if (!addDocuments(docFiles, engine, QStringList(), namespaceName,
       
   772                 writer, analyzer))
       
   773                 break;
       
   774         } else {
       
   775             bool bail = false;
       
   776             foreach (const QStringList &attributes, attributeSets) {
       
   777                 const QList<QUrl> docFiles = indexableFiles(&engine,
       
   778                     namespaceName, attributes);
       
   779                 if (!addDocuments(docFiles, engine, attributes, namespaceName,
       
   780                     writer, analyzer)) {
       
   781                     bail = true;
       
   782                     break;
       
   783                 }
       
   784             }
       
   785             if (bail)
       
   786                 break;
       
   787         }
       
   788 
       
   789         mutexLocker.relock();
       
   790         if (!m_cancel) {
       
   791             QString path(engine.documentationFileName(namespaceName));
       
   792             indexMap.insert(namespaceName, QFileInfo(path).lastModified());
       
   793             writeIndexMap(engine, indexMap);
       
   794         }
       
   795         mutexLocker.unlock();
       
   796     }
       
   797 
       
   798     closeIndexWriter(writer);
       
   799 
       
   800     mutexLocker.relock();
       
   801     if (!m_cancel) {
       
   802         mutexLocker.unlock();
       
   803 
       
   804         QStringList indexedNamespaces = indexMap.keys();
       
   805         foreach(const QString &namespaceName, indexedNamespaces) {
       
   806             mutexLocker.relock();
       
   807             if (m_cancel)
       
   808                 break;
       
   809             mutexLocker.unlock();
       
   810 
       
   811             if (!namespaces.contains(namespaceName)) {
       
   812                 indexMap.remove(namespaceName);
       
   813                 writeIndexMap(engine, indexMap);
       
   814                 removeDocuments(indexPath, namespaceName);
       
   815             }
       
   816         }
       
   817     }
       
   818     emit indexingFinished();
       
   819 }
       
   820 
       
   821 bool QHelpSearchIndexWriter::addDocuments(const QList<QUrl> docFiles,
       
   822     const QHelpEngineCore &engine, const QStringList &attributes,
       
   823     const QString &namespaceName, QCLuceneIndexWriter *writer,
       
   824     QCLuceneAnalyzer &analyzer)
       
   825 {
       
   826     QMutexLocker locker(&mutex);
       
   827     const QString attrList = attributes.join(QLatin1String(" "));
       
   828 
       
   829     locker.unlock();
       
   830     foreach(const QUrl &url, docFiles) {
       
   831         QCLuceneDocument document;
       
   832         DocumentHelper helper(url.toString(), engine.fileData(url));
       
   833         if (helper.addFieldsToDocument(&document, namespaceName, attrList)) {
       
   834 #if !defined(QT_NO_EXCEPTIONS)
       
   835             try {
       
   836 #endif
       
   837                 writer->addDocument(document, analyzer);
       
   838 #if !defined(QT_NO_EXCEPTIONS)
       
   839             } catch (...) {
       
   840                 qWarning("Full Text Search, could not properly add documents.");
       
   841                 return false;
       
   842             }
       
   843 #endif
       
   844         }
       
   845         locker.relock();
       
   846         if (m_cancel)
       
   847             return false;
       
   848         locker.unlock();
       
   849     }
       
   850     return true;
       
   851 }
       
   852 
       
   853 void QHelpSearchIndexWriter::removeDocuments(const QString &indexPath,
       
   854     const QString &namespaceName)
       
   855 {
       
   856     if (namespaceName.isEmpty() || QCLuceneIndexReader::isLocked(indexPath))
       
   857         return;
       
   858 
       
   859     QCLuceneIndexReader reader = QCLuceneIndexReader::open(indexPath);
       
   860     reader.deleteDocuments(QCLuceneTerm(QLatin1String("namespace"),
       
   861         namespaceName));
       
   862 
       
   863     reader.close();
       
   864 }
       
   865 
       
   866 bool QHelpSearchIndexWriter::writeIndexMap(QHelpEngineCore &engine,
       
   867     const QMap<QString, QDateTime> &indexMap)
       
   868 {
       
   869     QByteArray bArray;
       
   870 
       
   871     QDataStream data(&bArray, QIODevice::ReadWrite);
       
   872     data << indexMap;
       
   873 
       
   874     return engine.setCustomValue(QLatin1String("CluceneIndexedNamespaces"),
       
   875         bArray);
       
   876 }
       
   877 
       
   878 QList<QUrl> QHelpSearchIndexWriter::indexableFiles(QHelpEngineCore *helpEngine,
       
   879     const QString &namespaceName, const QStringList &attributes) const
       
   880 {
       
   881     QList<QUrl> docFiles = helpEngine->files(namespaceName, attributes,
       
   882         QLatin1String("html"));
       
   883     docFiles += helpEngine->files(namespaceName, attributes, QLatin1String("htm"));
       
   884     docFiles += helpEngine->files(namespaceName, attributes, QLatin1String("txt"));
       
   885 
       
   886     return docFiles;
       
   887 }
       
   888 
       
   889 void QHelpSearchIndexWriter::closeIndexWriter(QCLuceneIndexWriter *writer)
       
   890 {
       
   891 #if !defined(QT_NO_EXCEPTIONS)
       
   892     try {
       
   893 #endif
       
   894         writer->close();
       
   895         delete writer;
       
   896 #if !defined(QT_NO_EXCEPTIONS)
       
   897     } catch (...) {
       
   898         qWarning("Full Text Search, could not properly close index writer.");
       
   899     }
       
   900 #endif
       
   901 }
       
   902 
       
   903         }   // namespace clucene
       
   904     }   // namespace fulltextsearch
       
   905 }   // namespace qt
       
   906 
       
   907 QT_END_NAMESPACE