|
1 /**************************************************************************** |
|
2 ** |
|
3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). |
|
4 ** All rights reserved. |
|
5 ** Contact: Nokia Corporation (qt-info@nokia.com) |
|
6 ** |
|
7 ** This file is part of the Qt Assistant of the Qt Toolkit. |
|
8 ** |
|
9 ** $QT_BEGIN_LICENSE:LGPL$ |
|
10 ** No Commercial Usage |
|
11 ** This file contains pre-release code and may not be distributed. |
|
12 ** You may use this file in accordance with the terms and conditions |
|
13 ** contained in the Technology Preview License Agreement accompanying |
|
14 ** this package. |
|
15 ** |
|
16 ** GNU Lesser General Public License Usage |
|
17 ** Alternatively, this file may be used under the terms of the GNU Lesser |
|
18 ** General Public License version 2.1 as published by the Free Software |
|
19 ** Foundation and appearing in the file LICENSE.LGPL included in the |
|
20 ** packaging of this file. Please review the following information to |
|
21 ** ensure the GNU Lesser General Public License version 2.1 requirements |
|
22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
|
23 ** |
|
24 ** In addition, as a special exception, Nokia gives you certain additional |
|
25 ** rights. These rights are described in the Nokia Qt LGPL Exception |
|
26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
|
27 ** |
|
28 ** If you have questions regarding the use of this file, please contact |
|
29 ** Nokia at qt-info@nokia.com. |
|
30 ** |
|
31 ** |
|
32 ** |
|
33 ** |
|
34 ** |
|
35 ** |
|
36 ** |
|
37 ** |
|
38 ** $QT_END_LICENSE$ |
|
39 ** |
|
40 ****************************************************************************/ |
|
41 |
|
42 #include "qhelpenginecore.h" |
|
43 #include "qhelp_global.h" |
|
44 #include "fulltextsearch/qhits_p.h" |
|
45 #include "fulltextsearch/qquery_p.h" |
|
46 #include "fulltextsearch/qanalyzer_p.h" |
|
47 #include "fulltextsearch/qdocument_p.h" |
|
48 #include "fulltextsearch/qsearchable_p.h" |
|
49 #include "fulltextsearch/qindexreader_p.h" |
|
50 #include "fulltextsearch/qindexwriter_p.h" |
|
51 #include "qhelpsearchindexwriter_clucene_p.h" |
|
52 |
|
53 #include <QtCore/QDir> |
|
54 #include <QtCore/QString> |
|
55 #include <QtCore/QFileInfo> |
|
56 #include <QtCore/QTextCodec> |
|
57 #include <QtCore/QTextStream> |
|
58 |
|
59 #include <QtNetwork/QLocalSocket> |
|
60 #include <QtNetwork/QLocalServer> |
|
61 |
|
62 #include "private/qfunctions_p.h" |
|
63 |
|
64 QT_BEGIN_NAMESPACE |
|
65 |
|
66 namespace qt { |
|
67 namespace fulltextsearch { |
|
68 namespace clucene { |
|
69 |
|
70 // taken from qtexthtmlparser |
|
71 static const struct QTextHtmlEntity |
|
72 { |
|
73 const char *name; |
|
74 quint16 code; |
|
75 } entities[] = { |
|
76 { "AElig", 0x00c6 }, |
|
77 { "AMP", 38 }, |
|
78 { "Aacute", 0x00c1 }, |
|
79 { "Acirc", 0x00c2 }, |
|
80 { "Agrave", 0x00c0 }, |
|
81 { "Alpha", 0x0391 }, |
|
82 { "Aring", 0x00c5 }, |
|
83 { "Atilde", 0x00c3 }, |
|
84 { "Auml", 0x00c4 }, |
|
85 { "Beta", 0x0392 }, |
|
86 { "Ccedil", 0x00c7 }, |
|
87 { "Chi", 0x03a7 }, |
|
88 { "Dagger", 0x2021 }, |
|
89 { "Delta", 0x0394 }, |
|
90 { "ETH", 0x00d0 }, |
|
91 { "Eacute", 0x00c9 }, |
|
92 { "Ecirc", 0x00ca }, |
|
93 { "Egrave", 0x00c8 }, |
|
94 { "Epsilon", 0x0395 }, |
|
95 { "Eta", 0x0397 }, |
|
96 { "Euml", 0x00cb }, |
|
97 { "GT", 62 }, |
|
98 { "Gamma", 0x0393 }, |
|
99 { "Iacute", 0x00cd }, |
|
100 { "Icirc", 0x00ce }, |
|
101 { "Igrave", 0x00cc }, |
|
102 { "Iota", 0x0399 }, |
|
103 { "Iuml", 0x00cf }, |
|
104 { "Kappa", 0x039a }, |
|
105 { "LT", 60 }, |
|
106 { "Lambda", 0x039b }, |
|
107 { "Mu", 0x039c }, |
|
108 { "Ntilde", 0x00d1 }, |
|
109 { "Nu", 0x039d }, |
|
110 { "OElig", 0x0152 }, |
|
111 { "Oacute", 0x00d3 }, |
|
112 { "Ocirc", 0x00d4 }, |
|
113 { "Ograve", 0x00d2 }, |
|
114 { "Omega", 0x03a9 }, |
|
115 { "Omicron", 0x039f }, |
|
116 { "Oslash", 0x00d8 }, |
|
117 { "Otilde", 0x00d5 }, |
|
118 { "Ouml", 0x00d6 }, |
|
119 { "Phi", 0x03a6 }, |
|
120 { "Pi", 0x03a0 }, |
|
121 { "Prime", 0x2033 }, |
|
122 { "Psi", 0x03a8 }, |
|
123 { "QUOT", 34 }, |
|
124 { "Rho", 0x03a1 }, |
|
125 { "Scaron", 0x0160 }, |
|
126 { "Sigma", 0x03a3 }, |
|
127 { "THORN", 0x00de }, |
|
128 { "Tau", 0x03a4 }, |
|
129 { "Theta", 0x0398 }, |
|
130 { "Uacute", 0x00da }, |
|
131 { "Ucirc", 0x00db }, |
|
132 { "Ugrave", 0x00d9 }, |
|
133 { "Upsilon", 0x03a5 }, |
|
134 { "Uuml", 0x00dc }, |
|
135 { "Xi", 0x039e }, |
|
136 { "Yacute", 0x00dd }, |
|
137 { "Yuml", 0x0178 }, |
|
138 { "Zeta", 0x0396 }, |
|
139 { "aacute", 0x00e1 }, |
|
140 { "acirc", 0x00e2 }, |
|
141 { "acute", 0x00b4 }, |
|
142 { "aelig", 0x00e6 }, |
|
143 { "agrave", 0x00e0 }, |
|
144 { "alefsym", 0x2135 }, |
|
145 { "alpha", 0x03b1 }, |
|
146 { "amp", 38 }, |
|
147 { "and", 0x22a5 }, |
|
148 { "ang", 0x2220 }, |
|
149 { "apos", 0x0027 }, |
|
150 { "aring", 0x00e5 }, |
|
151 { "asymp", 0x2248 }, |
|
152 { "atilde", 0x00e3 }, |
|
153 { "auml", 0x00e4 }, |
|
154 { "bdquo", 0x201e }, |
|
155 { "beta", 0x03b2 }, |
|
156 { "brvbar", 0x00a6 }, |
|
157 { "bull", 0x2022 }, |
|
158 { "cap", 0x2229 }, |
|
159 { "ccedil", 0x00e7 }, |
|
160 { "cedil", 0x00b8 }, |
|
161 { "cent", 0x00a2 }, |
|
162 { "chi", 0x03c7 }, |
|
163 { "circ", 0x02c6 }, |
|
164 { "clubs", 0x2663 }, |
|
165 { "cong", 0x2245 }, |
|
166 { "copy", 0x00a9 }, |
|
167 { "crarr", 0x21b5 }, |
|
168 { "cup", 0x222a }, |
|
169 { "curren", 0x00a4 }, |
|
170 { "dArr", 0x21d3 }, |
|
171 { "dagger", 0x2020 }, |
|
172 { "darr", 0x2193 }, |
|
173 { "deg", 0x00b0 }, |
|
174 { "delta", 0x03b4 }, |
|
175 { "diams", 0x2666 }, |
|
176 { "divide", 0x00f7 }, |
|
177 { "eacute", 0x00e9 }, |
|
178 { "ecirc", 0x00ea }, |
|
179 { "egrave", 0x00e8 }, |
|
180 { "empty", 0x2205 }, |
|
181 { "emsp", 0x2003 }, |
|
182 { "ensp", 0x2002 }, |
|
183 { "epsilon", 0x03b5 }, |
|
184 { "equiv", 0x2261 }, |
|
185 { "eta", 0x03b7 }, |
|
186 { "eth", 0x00f0 }, |
|
187 { "euml", 0x00eb }, |
|
188 { "euro", 0x20ac }, |
|
189 { "exist", 0x2203 }, |
|
190 { "fnof", 0x0192 }, |
|
191 { "forall", 0x2200 }, |
|
192 { "frac12", 0x00bd }, |
|
193 { "frac14", 0x00bc }, |
|
194 { "frac34", 0x00be }, |
|
195 { "frasl", 0x2044 }, |
|
196 { "gamma", 0x03b3 }, |
|
197 { "ge", 0x2265 }, |
|
198 { "gt", 62 }, |
|
199 { "hArr", 0x21d4 }, |
|
200 { "harr", 0x2194 }, |
|
201 { "hearts", 0x2665 }, |
|
202 { "hellip", 0x2026 }, |
|
203 { "iacute", 0x00ed }, |
|
204 { "icirc", 0x00ee }, |
|
205 { "iexcl", 0x00a1 }, |
|
206 { "igrave", 0x00ec }, |
|
207 { "image", 0x2111 }, |
|
208 { "infin", 0x221e }, |
|
209 { "int", 0x222b }, |
|
210 { "iota", 0x03b9 }, |
|
211 { "iquest", 0x00bf }, |
|
212 { "isin", 0x2208 }, |
|
213 { "iuml", 0x00ef }, |
|
214 { "kappa", 0x03ba }, |
|
215 { "lArr", 0x21d0 }, |
|
216 { "lambda", 0x03bb }, |
|
217 { "lang", 0x2329 }, |
|
218 { "laquo", 0x00ab }, |
|
219 { "larr", 0x2190 }, |
|
220 { "lceil", 0x2308 }, |
|
221 { "ldquo", 0x201c }, |
|
222 { "le", 0x2264 }, |
|
223 { "lfloor", 0x230a }, |
|
224 { "lowast", 0x2217 }, |
|
225 { "loz", 0x25ca }, |
|
226 { "lrm", 0x200e }, |
|
227 { "lsaquo", 0x2039 }, |
|
228 { "lsquo", 0x2018 }, |
|
229 { "lt", 60 }, |
|
230 { "macr", 0x00af }, |
|
231 { "mdash", 0x2014 }, |
|
232 { "micro", 0x00b5 }, |
|
233 { "middot", 0x00b7 }, |
|
234 { "minus", 0x2212 }, |
|
235 { "mu", 0x03bc }, |
|
236 { "nabla", 0x2207 }, |
|
237 { "nbsp", 0x00a0 }, |
|
238 { "ndash", 0x2013 }, |
|
239 { "ne", 0x2260 }, |
|
240 { "ni", 0x220b }, |
|
241 { "not", 0x00ac }, |
|
242 { "notin", 0x2209 }, |
|
243 { "nsub", 0x2284 }, |
|
244 { "ntilde", 0x00f1 }, |
|
245 { "nu", 0x03bd }, |
|
246 { "oacute", 0x00f3 }, |
|
247 { "ocirc", 0x00f4 }, |
|
248 { "oelig", 0x0153 }, |
|
249 { "ograve", 0x00f2 }, |
|
250 { "oline", 0x203e }, |
|
251 { "omega", 0x03c9 }, |
|
252 { "omicron", 0x03bf }, |
|
253 { "oplus", 0x2295 }, |
|
254 { "or", 0x22a6 }, |
|
255 { "ordf", 0x00aa }, |
|
256 { "ordm", 0x00ba }, |
|
257 { "oslash", 0x00f8 }, |
|
258 { "otilde", 0x00f5 }, |
|
259 { "otimes", 0x2297 }, |
|
260 { "ouml", 0x00f6 }, |
|
261 { "para", 0x00b6 }, |
|
262 { "part", 0x2202 }, |
|
263 { "percnt", 0x0025 }, |
|
264 { "permil", 0x2030 }, |
|
265 { "perp", 0x22a5 }, |
|
266 { "phi", 0x03c6 }, |
|
267 { "pi", 0x03c0 }, |
|
268 { "piv", 0x03d6 }, |
|
269 { "plusmn", 0x00b1 }, |
|
270 { "pound", 0x00a3 }, |
|
271 { "prime", 0x2032 }, |
|
272 { "prod", 0x220f }, |
|
273 { "prop", 0x221d }, |
|
274 { "psi", 0x03c8 }, |
|
275 { "quot", 34 }, |
|
276 { "rArr", 0x21d2 }, |
|
277 { "radic", 0x221a }, |
|
278 { "rang", 0x232a }, |
|
279 { "raquo", 0x00bb }, |
|
280 { "rarr", 0x2192 }, |
|
281 { "rceil", 0x2309 }, |
|
282 { "rdquo", 0x201d }, |
|
283 { "real", 0x211c }, |
|
284 { "reg", 0x00ae }, |
|
285 { "rfloor", 0x230b }, |
|
286 { "rho", 0x03c1 }, |
|
287 { "rlm", 0x200f }, |
|
288 { "rsaquo", 0x203a }, |
|
289 { "rsquo", 0x2019 }, |
|
290 { "sbquo", 0x201a }, |
|
291 { "scaron", 0x0161 }, |
|
292 { "sdot", 0x22c5 }, |
|
293 { "sect", 0x00a7 }, |
|
294 { "shy", 0x00ad }, |
|
295 { "sigma", 0x03c3 }, |
|
296 { "sigmaf", 0x03c2 }, |
|
297 { "sim", 0x223c }, |
|
298 { "spades", 0x2660 }, |
|
299 { "sub", 0x2282 }, |
|
300 { "sube", 0x2286 }, |
|
301 { "sum", 0x2211 }, |
|
302 { "sup", 0x2283 }, |
|
303 { "sup1", 0x00b9 }, |
|
304 { "sup2", 0x00b2 }, |
|
305 { "sup3", 0x00b3 }, |
|
306 { "supe", 0x2287 }, |
|
307 { "szlig", 0x00df }, |
|
308 { "tau", 0x03c4 }, |
|
309 { "there4", 0x2234 }, |
|
310 { "theta", 0x03b8 }, |
|
311 { "thetasym", 0x03d1 }, |
|
312 { "thinsp", 0x2009 }, |
|
313 { "thorn", 0x00fe }, |
|
314 { "tilde", 0x02dc }, |
|
315 { "times", 0x00d7 }, |
|
316 { "trade", 0x2122 }, |
|
317 { "uArr", 0x21d1 }, |
|
318 { "uacute", 0x00fa }, |
|
319 { "uarr", 0x2191 }, |
|
320 { "ucirc", 0x00fb }, |
|
321 { "ugrave", 0x00f9 }, |
|
322 { "uml", 0x00a8 }, |
|
323 { "upsih", 0x03d2 }, |
|
324 { "upsilon", 0x03c5 }, |
|
325 { "uuml", 0x00fc }, |
|
326 { "weierp", 0x2118 }, |
|
327 { "xi", 0x03be }, |
|
328 { "yacute", 0x00fd }, |
|
329 { "yen", 0x00a5 }, |
|
330 { "yuml", 0x00ff }, |
|
331 { "zeta", 0x03b6 }, |
|
332 { "zwj", 0x200d }, |
|
333 { "zwnj", 0x200c } |
|
334 }; |
|
335 |
|
336 Q_STATIC_GLOBAL_OPERATOR bool operator<(const QString &entityStr, const QTextHtmlEntity &entity) |
|
337 { |
|
338 return entityStr < QLatin1String(entity.name); |
|
339 } |
|
340 |
|
341 Q_STATIC_GLOBAL_OPERATOR bool operator<(const QTextHtmlEntity &entity, const QString &entityStr) |
|
342 { |
|
343 return QLatin1String(entity.name) < entityStr; |
|
344 } |
|
345 |
|
346 static QChar resolveEntity(const QString &entity) |
|
347 { |
|
348 const QTextHtmlEntity *start = &entities[0]; |
|
349 const QTextHtmlEntity *end = &entities[(sizeof(entities) / sizeof(entities[0]))]; |
|
350 const QTextHtmlEntity *e = qBinaryFind(start, end, entity); |
|
351 if (e == end) |
|
352 return QChar(); |
|
353 return e->code; |
|
354 } |
|
355 |
|
356 static const uint latin1Extended[0xA0 - 0x80] = { |
|
357 0x20ac, // 0x80 |
|
358 0x0081, // 0x81 direct mapping |
|
359 0x201a, // 0x82 |
|
360 0x0192, // 0x83 |
|
361 0x201e, // 0x84 |
|
362 0x2026, // 0x85 |
|
363 0x2020, // 0x86 |
|
364 0x2021, // 0x87 |
|
365 0x02C6, // 0x88 |
|
366 0x2030, // 0x89 |
|
367 0x0160, // 0x8A |
|
368 0x2039, // 0x8B |
|
369 0x0152, // 0x8C |
|
370 0x008D, // 0x8D direct mapping |
|
371 0x017D, // 0x8E |
|
372 0x008F, // 0x8F directmapping |
|
373 0x0090, // 0x90 directmapping |
|
374 0x2018, // 0x91 |
|
375 0x2019, // 0x92 |
|
376 0x201C, // 0x93 |
|
377 0X201D, // 0x94 |
|
378 0x2022, // 0x95 |
|
379 0x2013, // 0x96 |
|
380 0x2014, // 0x97 |
|
381 0x02DC, // 0x98 |
|
382 0x2122, // 0x99 |
|
383 0x0161, // 0x9A |
|
384 0x203A, // 0x9B |
|
385 0x0153, // 0x9C |
|
386 0x009D, // 0x9D direct mapping |
|
387 0x017E, // 0x9E |
|
388 0x0178 // 0x9F |
|
389 }; |
|
390 // end taken from qtexthtmlparser |
|
391 |
|
392 class DocumentHelper |
|
393 { |
|
394 public: |
|
395 DocumentHelper(const QString &fileName, const QByteArray &data) |
|
396 : fileName(fileName) , data(readData(data)) {} |
|
397 ~DocumentHelper() {} |
|
398 |
|
399 bool addFieldsToDocument(QCLuceneDocument *document, |
|
400 const QString &namespaceName, const QString &attributes = QString()) |
|
401 { |
|
402 if (!document) |
|
403 return false; |
|
404 |
|
405 if(!data.isEmpty()) { |
|
406 QString parsedData = parseData(); |
|
407 QString parsedTitle = QHelpGlobal::documentTitle(data); |
|
408 |
|
409 if(!parsedData.isEmpty()) { |
|
410 document->add(new QCLuceneField(QLatin1String("content"), |
|
411 parsedData,QCLuceneField::INDEX_TOKENIZED)); |
|
412 document->add(new QCLuceneField(QLatin1String("path"), fileName, |
|
413 QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED)); |
|
414 document->add(new QCLuceneField(QLatin1String("title"), parsedTitle, |
|
415 QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED)); |
|
416 document->add(new QCLuceneField(QLatin1String("titleTokenized"), parsedTitle, |
|
417 QCLuceneField::STORE_YES | QCLuceneField::INDEX_TOKENIZED)); |
|
418 document->add(new QCLuceneField(QLatin1String("namespace"), namespaceName, |
|
419 QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED)); |
|
420 document->add(new QCLuceneField(QLatin1String("attribute"), attributes, |
|
421 QCLuceneField::STORE_YES | QCLuceneField::INDEX_TOKENIZED)); |
|
422 return true; |
|
423 } |
|
424 } |
|
425 |
|
426 return false; |
|
427 } |
|
428 |
|
429 private: |
|
430 QString readData(const QByteArray &data) |
|
431 { |
|
432 QTextStream textStream(data); |
|
433 QByteArray charSet = QHelpGlobal::charsetFromData(data).toLatin1(); |
|
434 textStream.setCodec(QTextCodec::codecForName(charSet.constData())); |
|
435 |
|
436 QString stream = textStream.readAll(); |
|
437 if (stream.isNull() || stream.isEmpty()) |
|
438 return QString(); |
|
439 |
|
440 return stream; |
|
441 } |
|
442 |
|
443 QString parseData() const |
|
444 { |
|
445 const int length = data.length(); |
|
446 const QChar *buf = data.unicode(); |
|
447 |
|
448 QString parsedContent; |
|
449 parsedContent.reserve(length); |
|
450 |
|
451 bool valid = true; |
|
452 int j = 0, count = 0; |
|
453 |
|
454 QChar c; |
|
455 while (j < length) { |
|
456 c = buf[j++]; |
|
457 if (c == QLatin1Char('<') || c == QLatin1Char('&')) { |
|
458 if (count > 1 && c != QLatin1Char('&')) |
|
459 parsedContent.append(QLatin1Char(' ')); |
|
460 else if (c == QLatin1Char('&')) { |
|
461 // Note: this will modify the counter j, in case we sucessful parsed the entity |
|
462 // we will have modified the counter to stay 1 before the closing ';', so |
|
463 // the following if condition will be met with if (c == QLatin1Char(';')) |
|
464 parsedContent.append(parseEntity(length, buf, j)); |
|
465 } |
|
466 |
|
467 count = 0; |
|
468 valid = false; |
|
469 continue; |
|
470 } |
|
471 if ((c == QLatin1Char('>') || c == QLatin1Char(';')) && !valid) { |
|
472 valid = true; |
|
473 continue; |
|
474 } |
|
475 if (!valid) |
|
476 continue; |
|
477 |
|
478 if (c.isLetterOrNumber() || c.isPrint()) { |
|
479 ++count; |
|
480 parsedContent.append(c.toLower()); |
|
481 } else { |
|
482 if (count > 1) |
|
483 parsedContent.append(QLatin1Char(' ')); |
|
484 count = 0; |
|
485 } |
|
486 } |
|
487 |
|
488 return parsedContent; |
|
489 } |
|
490 |
|
491 // taken from qtexthtmlparser |
|
492 // parses an entity after "&", and returns it |
|
493 QString parseEntity(int len, const QChar *buf, int &pos) const |
|
494 { |
|
495 int recover = pos; |
|
496 QString entity; |
|
497 while (pos < len) { |
|
498 QChar c = buf[pos++]; |
|
499 if (c.isSpace() || pos - recover > 9) { |
|
500 goto error; |
|
501 } |
|
502 if (c == QLatin1Char(';')) { |
|
503 pos--; |
|
504 break; |
|
505 } |
|
506 entity += c; |
|
507 } |
|
508 { |
|
509 QChar resolved = resolveEntity(entity); |
|
510 if (!resolved.isNull()) |
|
511 return QString(resolved); |
|
512 } |
|
513 if (entity.length() > 1 && entity.at(0) == QLatin1Char('#')) { |
|
514 entity.remove(0, 1); // removing leading # |
|
515 |
|
516 int base = 10; |
|
517 bool ok = false; |
|
518 |
|
519 if (entity.at(0).toLower() == QLatin1Char('x')) { // hex entity? |
|
520 entity.remove(0, 1); |
|
521 base = 16; |
|
522 } |
|
523 |
|
524 uint uc = entity.toUInt(&ok, base); |
|
525 if (ok) { |
|
526 if (uc >= 0x80 && uc < 0x80 + (sizeof(latin1Extended) / sizeof(latin1Extended[0]))) |
|
527 uc = latin1Extended[uc - 0x80]; // windows latin 1 extended |
|
528 QString str; |
|
529 if (uc > 0xffff) { |
|
530 // surrogate pair |
|
531 uc -= 0x10000; |
|
532 ushort high = uc/0x400 + 0xd800; |
|
533 ushort low = uc%0x400 + 0xdc00; |
|
534 str.append(QChar(high)); |
|
535 str.append(QChar(low)); |
|
536 } else { |
|
537 str.append(QChar(uc)); |
|
538 } |
|
539 return str; |
|
540 } |
|
541 } |
|
542 error: |
|
543 pos = recover; |
|
544 return QLatin1String(" "); |
|
545 } |
|
546 // end taken from qtexthtmlparser |
|
547 |
|
548 private: |
|
549 QString fileName; |
|
550 QString data; |
|
551 }; |
|
552 |
|
553 |
|
554 QHelpSearchIndexWriter::QHelpSearchIndexWriter() |
|
555 : QThread(0) |
|
556 , m_cancel(false) |
|
557 { |
|
558 // nothing todo |
|
559 } |
|
560 |
|
561 QHelpSearchIndexWriter::~QHelpSearchIndexWriter() |
|
562 { |
|
563 mutex.lock(); |
|
564 this->m_cancel = true; |
|
565 waitCondition.wakeOne(); |
|
566 mutex.unlock(); |
|
567 |
|
568 wait(); |
|
569 } |
|
570 |
|
571 void QHelpSearchIndexWriter::cancelIndexing() |
|
572 { |
|
573 mutex.lock(); |
|
574 this->m_cancel = true; |
|
575 mutex.unlock(); |
|
576 } |
|
577 |
|
578 void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile, |
|
579 const QString &indexFilesFolder, bool reindex) |
|
580 { |
|
581 mutex.lock(); |
|
582 this->m_cancel = false; |
|
583 this->m_reindex = reindex; |
|
584 this->m_collectionFile = collectionFile; |
|
585 this->m_indexFilesFolder = indexFilesFolder; |
|
586 mutex.unlock(); |
|
587 |
|
588 start(QThread::NormalPriority); |
|
589 } |
|
590 |
|
591 void QHelpSearchIndexWriter::optimizeIndex() |
|
592 { |
|
593 #if !defined(QT_NO_EXCEPTIONS) |
|
594 try { |
|
595 #endif |
|
596 if (QCLuceneIndexReader::indexExists(m_indexFilesFolder)) { |
|
597 if (QCLuceneIndexReader::isLocked(m_indexFilesFolder)) |
|
598 return; |
|
599 |
|
600 QCLuceneStandardAnalyzer analyzer; |
|
601 QCLuceneIndexWriter writer(m_indexFilesFolder, analyzer, false); |
|
602 writer.optimize(); |
|
603 writer.close(); |
|
604 } |
|
605 #if !defined(QT_NO_EXCEPTIONS) |
|
606 } catch (...) { |
|
607 qWarning("Full Text Search, could not optimize index."); |
|
608 return; |
|
609 } |
|
610 #endif |
|
611 } |
|
612 |
|
613 void QHelpSearchIndexWriter::run() |
|
614 { |
|
615 QMutexLocker mutexLocker(&mutex); |
|
616 |
|
617 if (m_cancel) |
|
618 return; |
|
619 |
|
620 const bool reindex = this->m_reindex; |
|
621 const QString collectionFile(this->m_collectionFile); |
|
622 |
|
623 mutexLocker.unlock(); |
|
624 |
|
625 QHelpEngineCore engine(collectionFile, 0); |
|
626 if (!engine.setupData()) |
|
627 return; |
|
628 |
|
629 const QLatin1String key("CluceneIndexedNamespaces"); |
|
630 if (reindex) |
|
631 engine.setCustomValue(key, QLatin1String("")); |
|
632 |
|
633 QMap<QString, QDateTime> indexMap; |
|
634 const QLatin1String oldKey("CluceneSearchNamespaces"); |
|
635 if (!engine.customValue(oldKey, QString()).isNull()) { |
|
636 // old style qhc file < 4.4.2, need to convert... |
|
637 const QStringList indexedNamespaces = engine.customValue(oldKey). |
|
638 toString().split(QLatin1String("|"), QString::SkipEmptyParts); |
|
639 foreach (const QString &nameSpace, indexedNamespaces) |
|
640 indexMap.insert(nameSpace, QDateTime()); |
|
641 engine.removeCustomValue(oldKey); |
|
642 } else { |
|
643 QDataStream dataStream(engine.customValue(key).toByteArray()); |
|
644 dataStream >> indexMap; |
|
645 } |
|
646 |
|
647 QString indexPath = m_indexFilesFolder; |
|
648 |
|
649 QFileInfo fInfo(indexPath); |
|
650 if (fInfo.exists() && !fInfo.isWritable()) { |
|
651 qWarning("Full Text Search, could not create index (missing permissions for '%s').", qPrintable(indexPath)); |
|
652 return; |
|
653 } |
|
654 |
|
655 emit indexingStarted(); |
|
656 |
|
657 QCLuceneIndexWriter *writer = 0; |
|
658 QCLuceneStandardAnalyzer analyzer; |
|
659 const QStringList registeredDocs = engine.registeredDocumentations(); |
|
660 |
|
661 QLocalSocket localSocket; |
|
662 localSocket.connectToServer(QString(QLatin1String("QtAssistant%1")) |
|
663 .arg(QLatin1String(QT_VERSION_STR))); |
|
664 |
|
665 QLocalServer localServer; |
|
666 bool otherInstancesRunning = true; |
|
667 if (!localSocket.waitForConnected()) { |
|
668 otherInstancesRunning = false; |
|
669 localServer.listen(QString(QLatin1String("QtAssistant%1")) |
|
670 .arg(QLatin1String(QT_VERSION_STR))); |
|
671 } |
|
672 |
|
673 #if !defined(QT_NO_EXCEPTIONS) |
|
674 try { |
|
675 #endif |
|
676 // check if it's locked, and if the other instance is running |
|
677 if (!otherInstancesRunning && QCLuceneIndexReader::isLocked(indexPath)) |
|
678 QCLuceneIndexReader::unlock(indexPath); |
|
679 |
|
680 if (QCLuceneIndexReader::isLocked(indexPath)) { |
|
681 // poll unless indexing finished to fake progress |
|
682 while (QCLuceneIndexReader::isLocked(indexPath)) { |
|
683 mutexLocker.relock(); |
|
684 if (m_cancel) |
|
685 break; |
|
686 mutexLocker.unlock(); |
|
687 this->sleep(1); |
|
688 } |
|
689 emit indexingFinished(); |
|
690 return; |
|
691 } |
|
692 |
|
693 if (QCLuceneIndexReader::indexExists(indexPath) && !reindex) { |
|
694 foreach(const QString &namespaceName, registeredDocs) { |
|
695 mutexLocker.relock(); |
|
696 if (m_cancel) { |
|
697 emit indexingFinished(); |
|
698 return; |
|
699 } |
|
700 mutexLocker.unlock(); |
|
701 |
|
702 if (!indexMap.contains(namespaceName)) { |
|
703 // make sure we remove some partly indexed stuff |
|
704 removeDocuments(indexPath, namespaceName); |
|
705 } else { |
|
706 QString path = engine.documentationFileName(namespaceName); |
|
707 if (indexMap.value(namespaceName) < QFileInfo(path).lastModified()) { |
|
708 // make sure we remove some outdated indexed stuff |
|
709 indexMap.remove(namespaceName); |
|
710 removeDocuments(indexPath, namespaceName); |
|
711 } |
|
712 |
|
713 if (indexMap.contains(namespaceName)) { |
|
714 // make sure we really have content indexed for namespace |
|
715 // NOTE: Extra variable just for GCC 3.3.5 |
|
716 QLatin1String key("namespace"); |
|
717 QCLuceneTermQuery query(QCLuceneTerm(key, namespaceName)); |
|
718 QCLuceneIndexSearcher indexSearcher(indexPath); |
|
719 QCLuceneHits hits = indexSearcher.search(query); |
|
720 if (hits.length() <= 0) |
|
721 indexMap.remove(namespaceName); |
|
722 } |
|
723 } |
|
724 } |
|
725 writer = new QCLuceneIndexWriter(indexPath, analyzer, false); |
|
726 } else { |
|
727 indexMap.clear(); |
|
728 writer = new QCLuceneIndexWriter(indexPath, analyzer, true); |
|
729 } |
|
730 #if !defined(QT_NO_EXCEPTIONS) |
|
731 } catch (...) { |
|
732 qWarning("Full Text Search, could not create index writer in '%s'.", |
|
733 qPrintable(indexPath)); |
|
734 return; |
|
735 } |
|
736 #endif |
|
737 |
|
738 #if !defined(QT_NO_EXCEPTIONS) |
|
739 try { |
|
740 #endif |
|
741 writer->setMergeFactor(100); |
|
742 writer->setMinMergeDocs(1000); |
|
743 writer->setMaxFieldLength(QCLuceneIndexWriter::DEFAULT_MAX_FIELD_LENGTH); |
|
744 #if !defined(QT_NO_EXCEPTIONS) |
|
745 } catch (...) { |
|
746 qWarning("Full Text Search, could not set writer properties."); |
|
747 return; |
|
748 } |
|
749 #endif |
|
750 |
|
751 QStringList namespaces; |
|
752 foreach(const QString &namespaceName, registeredDocs) { |
|
753 mutexLocker.relock(); |
|
754 if (m_cancel) { |
|
755 closeIndexWriter(writer); |
|
756 emit indexingFinished(); |
|
757 return; |
|
758 } |
|
759 mutexLocker.unlock(); |
|
760 |
|
761 namespaces.append(namespaceName); |
|
762 if (indexMap.contains(namespaceName)) |
|
763 continue; |
|
764 |
|
765 const QList<QStringList> attributeSets = |
|
766 engine.filterAttributeSets(namespaceName); |
|
767 |
|
768 if (attributeSets.isEmpty()) { |
|
769 const QList<QUrl> docFiles = indexableFiles(&engine, namespaceName, |
|
770 QStringList()); |
|
771 if (!addDocuments(docFiles, engine, QStringList(), namespaceName, |
|
772 writer, analyzer)) |
|
773 break; |
|
774 } else { |
|
775 bool bail = false; |
|
776 foreach (const QStringList &attributes, attributeSets) { |
|
777 const QList<QUrl> docFiles = indexableFiles(&engine, |
|
778 namespaceName, attributes); |
|
779 if (!addDocuments(docFiles, engine, attributes, namespaceName, |
|
780 writer, analyzer)) { |
|
781 bail = true; |
|
782 break; |
|
783 } |
|
784 } |
|
785 if (bail) |
|
786 break; |
|
787 } |
|
788 |
|
789 mutexLocker.relock(); |
|
790 if (!m_cancel) { |
|
791 QString path(engine.documentationFileName(namespaceName)); |
|
792 indexMap.insert(namespaceName, QFileInfo(path).lastModified()); |
|
793 writeIndexMap(engine, indexMap); |
|
794 } |
|
795 mutexLocker.unlock(); |
|
796 } |
|
797 |
|
798 closeIndexWriter(writer); |
|
799 |
|
800 mutexLocker.relock(); |
|
801 if (!m_cancel) { |
|
802 mutexLocker.unlock(); |
|
803 |
|
804 QStringList indexedNamespaces = indexMap.keys(); |
|
805 foreach(const QString &namespaceName, indexedNamespaces) { |
|
806 mutexLocker.relock(); |
|
807 if (m_cancel) |
|
808 break; |
|
809 mutexLocker.unlock(); |
|
810 |
|
811 if (!namespaces.contains(namespaceName)) { |
|
812 indexMap.remove(namespaceName); |
|
813 writeIndexMap(engine, indexMap); |
|
814 removeDocuments(indexPath, namespaceName); |
|
815 } |
|
816 } |
|
817 } |
|
818 emit indexingFinished(); |
|
819 } |
|
820 |
|
821 bool QHelpSearchIndexWriter::addDocuments(const QList<QUrl> docFiles, |
|
822 const QHelpEngineCore &engine, const QStringList &attributes, |
|
823 const QString &namespaceName, QCLuceneIndexWriter *writer, |
|
824 QCLuceneAnalyzer &analyzer) |
|
825 { |
|
826 QMutexLocker locker(&mutex); |
|
827 const QString attrList = attributes.join(QLatin1String(" ")); |
|
828 |
|
829 locker.unlock(); |
|
830 foreach(const QUrl &url, docFiles) { |
|
831 QCLuceneDocument document; |
|
832 DocumentHelper helper(url.toString(), engine.fileData(url)); |
|
833 if (helper.addFieldsToDocument(&document, namespaceName, attrList)) { |
|
834 #if !defined(QT_NO_EXCEPTIONS) |
|
835 try { |
|
836 #endif |
|
837 writer->addDocument(document, analyzer); |
|
838 #if !defined(QT_NO_EXCEPTIONS) |
|
839 } catch (...) { |
|
840 qWarning("Full Text Search, could not properly add documents."); |
|
841 return false; |
|
842 } |
|
843 #endif |
|
844 } |
|
845 locker.relock(); |
|
846 if (m_cancel) |
|
847 return false; |
|
848 locker.unlock(); |
|
849 } |
|
850 return true; |
|
851 } |
|
852 |
|
853 void QHelpSearchIndexWriter::removeDocuments(const QString &indexPath, |
|
854 const QString &namespaceName) |
|
855 { |
|
856 if (namespaceName.isEmpty() || QCLuceneIndexReader::isLocked(indexPath)) |
|
857 return; |
|
858 |
|
859 QCLuceneIndexReader reader = QCLuceneIndexReader::open(indexPath); |
|
860 reader.deleteDocuments(QCLuceneTerm(QLatin1String("namespace"), |
|
861 namespaceName)); |
|
862 |
|
863 reader.close(); |
|
864 } |
|
865 |
|
866 bool QHelpSearchIndexWriter::writeIndexMap(QHelpEngineCore &engine, |
|
867 const QMap<QString, QDateTime> &indexMap) |
|
868 { |
|
869 QByteArray bArray; |
|
870 |
|
871 QDataStream data(&bArray, QIODevice::ReadWrite); |
|
872 data << indexMap; |
|
873 |
|
874 return engine.setCustomValue(QLatin1String("CluceneIndexedNamespaces"), |
|
875 bArray); |
|
876 } |
|
877 |
|
878 QList<QUrl> QHelpSearchIndexWriter::indexableFiles(QHelpEngineCore *helpEngine, |
|
879 const QString &namespaceName, const QStringList &attributes) const |
|
880 { |
|
881 QList<QUrl> docFiles = helpEngine->files(namespaceName, attributes, |
|
882 QLatin1String("html")); |
|
883 docFiles += helpEngine->files(namespaceName, attributes, QLatin1String("htm")); |
|
884 docFiles += helpEngine->files(namespaceName, attributes, QLatin1String("txt")); |
|
885 |
|
886 return docFiles; |
|
887 } |
|
888 |
|
889 void QHelpSearchIndexWriter::closeIndexWriter(QCLuceneIndexWriter *writer) |
|
890 { |
|
891 #if !defined(QT_NO_EXCEPTIONS) |
|
892 try { |
|
893 #endif |
|
894 writer->close(); |
|
895 delete writer; |
|
896 #if !defined(QT_NO_EXCEPTIONS) |
|
897 } catch (...) { |
|
898 qWarning("Full Text Search, could not properly close index writer."); |
|
899 } |
|
900 #endif |
|
901 } |
|
902 |
|
903 } // namespace clucene |
|
904 } // namespace fulltextsearch |
|
905 } // namespace qt |
|
906 |
|
907 QT_END_NAMESPACE |