author | Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com> |
Tue, 02 Feb 2010 00:43:10 +0200 | |
changeset 3 | 41300fa6a67c |
parent 0 | 1918ee327afb |
child 4 | 3b1da2848fc7 |
permissions | -rw-r--r-- |
0 | 1 |
/**************************************************************************** |
2 |
** |
|
3 |
** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). |
|
4 |
** All rights reserved. |
|
5 |
** Contact: Nokia Corporation (qt-info@nokia.com) |
|
6 |
** |
|
7 |
** This file is part of the Qt Assistant of the Qt Toolkit. |
|
8 |
** |
|
9 |
** $QT_BEGIN_LICENSE:LGPL$ |
|
10 |
** No Commercial Usage |
|
11 |
** This file contains pre-release code and may not be distributed. |
|
12 |
** You may use this file in accordance with the terms and conditions |
|
13 |
** contained in the Technology Preview License Agreement accompanying |
|
14 |
** this package. |
|
15 |
** |
|
16 |
** GNU Lesser General Public License Usage |
|
17 |
** Alternatively, this file may be used under the terms of the GNU Lesser |
|
18 |
** General Public License version 2.1 as published by the Free Software |
|
19 |
** Foundation and appearing in the file LICENSE.LGPL included in the |
|
20 |
** packaging of this file. Please review the following information to |
|
21 |
** ensure the GNU Lesser General Public License version 2.1 requirements |
|
22 |
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
|
23 |
** |
|
24 |
** In addition, as a special exception, Nokia gives you certain additional |
|
25 |
** rights. These rights are described in the Nokia Qt LGPL Exception |
|
26 |
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
|
27 |
** |
|
28 |
** If you have questions regarding the use of this file, please contact |
|
29 |
** Nokia at qt-info@nokia.com. |
|
30 |
** |
|
31 |
** |
|
32 |
** |
|
33 |
** |
|
34 |
** |
|
35 |
** |
|
36 |
** |
|
37 |
** |
|
38 |
** $QT_END_LICENSE$ |
|
39 |
** |
|
40 |
****************************************************************************/ |
|
41 |
||
42 |
#include "qhelpenginecore.h" |
|
43 |
#include "qhelp_global.h" |
|
44 |
#include "fulltextsearch/qhits_p.h" |
|
45 |
#include "fulltextsearch/qquery_p.h" |
|
46 |
#include "fulltextsearch/qanalyzer_p.h" |
|
47 |
#include "fulltextsearch/qdocument_p.h" |
|
48 |
#include "fulltextsearch/qsearchable_p.h" |
|
49 |
#include "fulltextsearch/qindexreader_p.h" |
|
50 |
#include "fulltextsearch/qindexwriter_p.h" |
|
51 |
#include "qhelpsearchindexwriter_clucene_p.h" |
|
52 |
||
53 |
#include <QtCore/QDir> |
|
54 |
#include <QtCore/QString> |
|
55 |
#include <QtCore/QFileInfo> |
|
56 |
#include <QtCore/QTextCodec> |
|
57 |
#include <QtCore/QTextStream> |
|
58 |
||
59 |
#include <QtNetwork/QLocalSocket> |
|
60 |
#include <QtNetwork/QLocalServer> |
|
61 |
||
62 |
#include "private/qfunctions_p.h" |
|
63 |
||
64 |
QT_BEGIN_NAMESPACE |
|
65 |
||
66 |
namespace qt { |
|
67 |
namespace fulltextsearch { |
|
68 |
namespace clucene { |
|
69 |
||
70 |
// taken from qtexthtmlparser |
|
71 |
static const struct QTextHtmlEntity |
|
72 |
{ |
|
73 |
const char *name; |
|
74 |
quint16 code; |
|
75 |
} entities[] = { |
|
76 |
{ "AElig", 0x00c6 }, |
|
77 |
{ "AMP", 38 }, |
|
78 |
{ "Aacute", 0x00c1 }, |
|
79 |
{ "Acirc", 0x00c2 }, |
|
80 |
{ "Agrave", 0x00c0 }, |
|
81 |
{ "Alpha", 0x0391 }, |
|
82 |
{ "Aring", 0x00c5 }, |
|
83 |
{ "Atilde", 0x00c3 }, |
|
84 |
{ "Auml", 0x00c4 }, |
|
85 |
{ "Beta", 0x0392 }, |
|
86 |
{ "Ccedil", 0x00c7 }, |
|
87 |
{ "Chi", 0x03a7 }, |
|
88 |
{ "Dagger", 0x2021 }, |
|
89 |
{ "Delta", 0x0394 }, |
|
90 |
{ "ETH", 0x00d0 }, |
|
91 |
{ "Eacute", 0x00c9 }, |
|
92 |
{ "Ecirc", 0x00ca }, |
|
93 |
{ "Egrave", 0x00c8 }, |
|
94 |
{ "Epsilon", 0x0395 }, |
|
95 |
{ "Eta", 0x0397 }, |
|
96 |
{ "Euml", 0x00cb }, |
|
97 |
{ "GT", 62 }, |
|
98 |
{ "Gamma", 0x0393 }, |
|
99 |
{ "Iacute", 0x00cd }, |
|
100 |
{ "Icirc", 0x00ce }, |
|
101 |
{ "Igrave", 0x00cc }, |
|
102 |
{ "Iota", 0x0399 }, |
|
103 |
{ "Iuml", 0x00cf }, |
|
104 |
{ "Kappa", 0x039a }, |
|
105 |
{ "LT", 60 }, |
|
106 |
{ "Lambda", 0x039b }, |
|
107 |
{ "Mu", 0x039c }, |
|
108 |
{ "Ntilde", 0x00d1 }, |
|
109 |
{ "Nu", 0x039d }, |
|
110 |
{ "OElig", 0x0152 }, |
|
111 |
{ "Oacute", 0x00d3 }, |
|
112 |
{ "Ocirc", 0x00d4 }, |
|
113 |
{ "Ograve", 0x00d2 }, |
|
114 |
{ "Omega", 0x03a9 }, |
|
115 |
{ "Omicron", 0x039f }, |
|
116 |
{ "Oslash", 0x00d8 }, |
|
117 |
{ "Otilde", 0x00d5 }, |
|
118 |
{ "Ouml", 0x00d6 }, |
|
119 |
{ "Phi", 0x03a6 }, |
|
120 |
{ "Pi", 0x03a0 }, |
|
121 |
{ "Prime", 0x2033 }, |
|
122 |
{ "Psi", 0x03a8 }, |
|
123 |
{ "QUOT", 34 }, |
|
124 |
{ "Rho", 0x03a1 }, |
|
125 |
{ "Scaron", 0x0160 }, |
|
126 |
{ "Sigma", 0x03a3 }, |
|
127 |
{ "THORN", 0x00de }, |
|
128 |
{ "Tau", 0x03a4 }, |
|
129 |
{ "Theta", 0x0398 }, |
|
130 |
{ "Uacute", 0x00da }, |
|
131 |
{ "Ucirc", 0x00db }, |
|
132 |
{ "Ugrave", 0x00d9 }, |
|
133 |
{ "Upsilon", 0x03a5 }, |
|
134 |
{ "Uuml", 0x00dc }, |
|
135 |
{ "Xi", 0x039e }, |
|
136 |
{ "Yacute", 0x00dd }, |
|
137 |
{ "Yuml", 0x0178 }, |
|
138 |
{ "Zeta", 0x0396 }, |
|
139 |
{ "aacute", 0x00e1 }, |
|
140 |
{ "acirc", 0x00e2 }, |
|
141 |
{ "acute", 0x00b4 }, |
|
142 |
{ "aelig", 0x00e6 }, |
|
143 |
{ "agrave", 0x00e0 }, |
|
144 |
{ "alefsym", 0x2135 }, |
|
145 |
{ "alpha", 0x03b1 }, |
|
146 |
{ "amp", 38 }, |
|
147 |
{ "and", 0x22a5 }, |
|
148 |
{ "ang", 0x2220 }, |
|
149 |
{ "apos", 0x0027 }, |
|
150 |
{ "aring", 0x00e5 }, |
|
151 |
{ "asymp", 0x2248 }, |
|
152 |
{ "atilde", 0x00e3 }, |
|
153 |
{ "auml", 0x00e4 }, |
|
154 |
{ "bdquo", 0x201e }, |
|
155 |
{ "beta", 0x03b2 }, |
|
156 |
{ "brvbar", 0x00a6 }, |
|
157 |
{ "bull", 0x2022 }, |
|
158 |
{ "cap", 0x2229 }, |
|
159 |
{ "ccedil", 0x00e7 }, |
|
160 |
{ "cedil", 0x00b8 }, |
|
161 |
{ "cent", 0x00a2 }, |
|
162 |
{ "chi", 0x03c7 }, |
|
163 |
{ "circ", 0x02c6 }, |
|
164 |
{ "clubs", 0x2663 }, |
|
165 |
{ "cong", 0x2245 }, |
|
166 |
{ "copy", 0x00a9 }, |
|
167 |
{ "crarr", 0x21b5 }, |
|
168 |
{ "cup", 0x222a }, |
|
169 |
{ "curren", 0x00a4 }, |
|
170 |
{ "dArr", 0x21d3 }, |
|
171 |
{ "dagger", 0x2020 }, |
|
172 |
{ "darr", 0x2193 }, |
|
173 |
{ "deg", 0x00b0 }, |
|
174 |
{ "delta", 0x03b4 }, |
|
175 |
{ "diams", 0x2666 }, |
|
176 |
{ "divide", 0x00f7 }, |
|
177 |
{ "eacute", 0x00e9 }, |
|
178 |
{ "ecirc", 0x00ea }, |
|
179 |
{ "egrave", 0x00e8 }, |
|
180 |
{ "empty", 0x2205 }, |
|
181 |
{ "emsp", 0x2003 }, |
|
182 |
{ "ensp", 0x2002 }, |
|
183 |
{ "epsilon", 0x03b5 }, |
|
184 |
{ "equiv", 0x2261 }, |
|
185 |
{ "eta", 0x03b7 }, |
|
186 |
{ "eth", 0x00f0 }, |
|
187 |
{ "euml", 0x00eb }, |
|
188 |
{ "euro", 0x20ac }, |
|
189 |
{ "exist", 0x2203 }, |
|
190 |
{ "fnof", 0x0192 }, |
|
191 |
{ "forall", 0x2200 }, |
|
192 |
{ "frac12", 0x00bd }, |
|
193 |
{ "frac14", 0x00bc }, |
|
194 |
{ "frac34", 0x00be }, |
|
195 |
{ "frasl", 0x2044 }, |
|
196 |
{ "gamma", 0x03b3 }, |
|
197 |
{ "ge", 0x2265 }, |
|
198 |
{ "gt", 62 }, |
|
199 |
{ "hArr", 0x21d4 }, |
|
200 |
{ "harr", 0x2194 }, |
|
201 |
{ "hearts", 0x2665 }, |
|
202 |
{ "hellip", 0x2026 }, |
|
203 |
{ "iacute", 0x00ed }, |
|
204 |
{ "icirc", 0x00ee }, |
|
205 |
{ "iexcl", 0x00a1 }, |
|
206 |
{ "igrave", 0x00ec }, |
|
207 |
{ "image", 0x2111 }, |
|
208 |
{ "infin", 0x221e }, |
|
209 |
{ "int", 0x222b }, |
|
210 |
{ "iota", 0x03b9 }, |
|
211 |
{ "iquest", 0x00bf }, |
|
212 |
{ "isin", 0x2208 }, |
|
213 |
{ "iuml", 0x00ef }, |
|
214 |
{ "kappa", 0x03ba }, |
|
215 |
{ "lArr", 0x21d0 }, |
|
216 |
{ "lambda", 0x03bb }, |
|
217 |
{ "lang", 0x2329 }, |
|
218 |
{ "laquo", 0x00ab }, |
|
219 |
{ "larr", 0x2190 }, |
|
220 |
{ "lceil", 0x2308 }, |
|
221 |
{ "ldquo", 0x201c }, |
|
222 |
{ "le", 0x2264 }, |
|
223 |
{ "lfloor", 0x230a }, |
|
224 |
{ "lowast", 0x2217 }, |
|
225 |
{ "loz", 0x25ca }, |
|
226 |
{ "lrm", 0x200e }, |
|
227 |
{ "lsaquo", 0x2039 }, |
|
228 |
{ "lsquo", 0x2018 }, |
|
229 |
{ "lt", 60 }, |
|
230 |
{ "macr", 0x00af }, |
|
231 |
{ "mdash", 0x2014 }, |
|
232 |
{ "micro", 0x00b5 }, |
|
233 |
{ "middot", 0x00b7 }, |
|
234 |
{ "minus", 0x2212 }, |
|
235 |
{ "mu", 0x03bc }, |
|
236 |
{ "nabla", 0x2207 }, |
|
237 |
{ "nbsp", 0x00a0 }, |
|
238 |
{ "ndash", 0x2013 }, |
|
239 |
{ "ne", 0x2260 }, |
|
240 |
{ "ni", 0x220b }, |
|
241 |
{ "not", 0x00ac }, |
|
242 |
{ "notin", 0x2209 }, |
|
243 |
{ "nsub", 0x2284 }, |
|
244 |
{ "ntilde", 0x00f1 }, |
|
245 |
{ "nu", 0x03bd }, |
|
246 |
{ "oacute", 0x00f3 }, |
|
247 |
{ "ocirc", 0x00f4 }, |
|
248 |
{ "oelig", 0x0153 }, |
|
249 |
{ "ograve", 0x00f2 }, |
|
250 |
{ "oline", 0x203e }, |
|
251 |
{ "omega", 0x03c9 }, |
|
252 |
{ "omicron", 0x03bf }, |
|
253 |
{ "oplus", 0x2295 }, |
|
254 |
{ "or", 0x22a6 }, |
|
255 |
{ "ordf", 0x00aa }, |
|
256 |
{ "ordm", 0x00ba }, |
|
257 |
{ "oslash", 0x00f8 }, |
|
258 |
{ "otilde", 0x00f5 }, |
|
259 |
{ "otimes", 0x2297 }, |
|
260 |
{ "ouml", 0x00f6 }, |
|
261 |
{ "para", 0x00b6 }, |
|
262 |
{ "part", 0x2202 }, |
|
263 |
{ "percnt", 0x0025 }, |
|
264 |
{ "permil", 0x2030 }, |
|
265 |
{ "perp", 0x22a5 }, |
|
266 |
{ "phi", 0x03c6 }, |
|
267 |
{ "pi", 0x03c0 }, |
|
268 |
{ "piv", 0x03d6 }, |
|
269 |
{ "plusmn", 0x00b1 }, |
|
270 |
{ "pound", 0x00a3 }, |
|
271 |
{ "prime", 0x2032 }, |
|
272 |
{ "prod", 0x220f }, |
|
273 |
{ "prop", 0x221d }, |
|
274 |
{ "psi", 0x03c8 }, |
|
275 |
{ "quot", 34 }, |
|
276 |
{ "rArr", 0x21d2 }, |
|
277 |
{ "radic", 0x221a }, |
|
278 |
{ "rang", 0x232a }, |
|
279 |
{ "raquo", 0x00bb }, |
|
280 |
{ "rarr", 0x2192 }, |
|
281 |
{ "rceil", 0x2309 }, |
|
282 |
{ "rdquo", 0x201d }, |
|
283 |
{ "real", 0x211c }, |
|
284 |
{ "reg", 0x00ae }, |
|
285 |
{ "rfloor", 0x230b }, |
|
286 |
{ "rho", 0x03c1 }, |
|
287 |
{ "rlm", 0x200f }, |
|
288 |
{ "rsaquo", 0x203a }, |
|
289 |
{ "rsquo", 0x2019 }, |
|
290 |
{ "sbquo", 0x201a }, |
|
291 |
{ "scaron", 0x0161 }, |
|
292 |
{ "sdot", 0x22c5 }, |
|
293 |
{ "sect", 0x00a7 }, |
|
294 |
{ "shy", 0x00ad }, |
|
295 |
{ "sigma", 0x03c3 }, |
|
296 |
{ "sigmaf", 0x03c2 }, |
|
297 |
{ "sim", 0x223c }, |
|
298 |
{ "spades", 0x2660 }, |
|
299 |
{ "sub", 0x2282 }, |
|
300 |
{ "sube", 0x2286 }, |
|
301 |
{ "sum", 0x2211 }, |
|
302 |
{ "sup", 0x2283 }, |
|
303 |
{ "sup1", 0x00b9 }, |
|
304 |
{ "sup2", 0x00b2 }, |
|
305 |
{ "sup3", 0x00b3 }, |
|
306 |
{ "supe", 0x2287 }, |
|
307 |
{ "szlig", 0x00df }, |
|
308 |
{ "tau", 0x03c4 }, |
|
309 |
{ "there4", 0x2234 }, |
|
310 |
{ "theta", 0x03b8 }, |
|
311 |
{ "thetasym", 0x03d1 }, |
|
312 |
{ "thinsp", 0x2009 }, |
|
313 |
{ "thorn", 0x00fe }, |
|
314 |
{ "tilde", 0x02dc }, |
|
315 |
{ "times", 0x00d7 }, |
|
316 |
{ "trade", 0x2122 }, |
|
317 |
{ "uArr", 0x21d1 }, |
|
318 |
{ "uacute", 0x00fa }, |
|
319 |
{ "uarr", 0x2191 }, |
|
320 |
{ "ucirc", 0x00fb }, |
|
321 |
{ "ugrave", 0x00f9 }, |
|
322 |
{ "uml", 0x00a8 }, |
|
323 |
{ "upsih", 0x03d2 }, |
|
324 |
{ "upsilon", 0x03c5 }, |
|
325 |
{ "uuml", 0x00fc }, |
|
326 |
{ "weierp", 0x2118 }, |
|
327 |
{ "xi", 0x03be }, |
|
328 |
{ "yacute", 0x00fd }, |
|
329 |
{ "yen", 0x00a5 }, |
|
330 |
{ "yuml", 0x00ff }, |
|
331 |
{ "zeta", 0x03b6 }, |
|
332 |
{ "zwj", 0x200d }, |
|
333 |
{ "zwnj", 0x200c } |
|
334 |
}; |
|
335 |
||
336 |
Q_STATIC_GLOBAL_OPERATOR bool operator<(const QString &entityStr, const QTextHtmlEntity &entity) |
|
337 |
{ |
|
338 |
return entityStr < QLatin1String(entity.name); |
|
339 |
} |
|
340 |
||
341 |
Q_STATIC_GLOBAL_OPERATOR bool operator<(const QTextHtmlEntity &entity, const QString &entityStr) |
|
342 |
{ |
|
343 |
return QLatin1String(entity.name) < entityStr; |
|
344 |
} |
|
345 |
||
346 |
static QChar resolveEntity(const QString &entity) |
|
347 |
{ |
|
348 |
const QTextHtmlEntity *start = &entities[0]; |
|
349 |
const QTextHtmlEntity *end = &entities[(sizeof(entities) / sizeof(entities[0]))]; |
|
350 |
const QTextHtmlEntity *e = qBinaryFind(start, end, entity); |
|
351 |
if (e == end) |
|
352 |
return QChar(); |
|
353 |
return e->code; |
|
354 |
} |
|
355 |
||
356 |
static const uint latin1Extended[0xA0 - 0x80] = { |
|
357 |
0x20ac, // 0x80 |
|
358 |
0x0081, // 0x81 direct mapping |
|
359 |
0x201a, // 0x82 |
|
360 |
0x0192, // 0x83 |
|
361 |
0x201e, // 0x84 |
|
362 |
0x2026, // 0x85 |
|
363 |
0x2020, // 0x86 |
|
364 |
0x2021, // 0x87 |
|
365 |
0x02C6, // 0x88 |
|
366 |
0x2030, // 0x89 |
|
367 |
0x0160, // 0x8A |
|
368 |
0x2039, // 0x8B |
|
369 |
0x0152, // 0x8C |
|
370 |
0x008D, // 0x8D direct mapping |
|
371 |
0x017D, // 0x8E |
|
372 |
0x008F, // 0x8F directmapping |
|
373 |
0x0090, // 0x90 directmapping |
|
374 |
0x2018, // 0x91 |
|
375 |
0x2019, // 0x92 |
|
376 |
0x201C, // 0x93 |
|
377 |
0X201D, // 0x94 |
|
378 |
0x2022, // 0x95 |
|
379 |
0x2013, // 0x96 |
|
380 |
0x2014, // 0x97 |
|
381 |
0x02DC, // 0x98 |
|
382 |
0x2122, // 0x99 |
|
383 |
0x0161, // 0x9A |
|
384 |
0x203A, // 0x9B |
|
385 |
0x0153, // 0x9C |
|
386 |
0x009D, // 0x9D direct mapping |
|
387 |
0x017E, // 0x9E |
|
388 |
0x0178 // 0x9F |
|
389 |
}; |
|
390 |
// end taken from qtexthtmlparser |
|
391 |
||
392 |
class DocumentHelper |
|
393 |
{ |
|
394 |
public: |
|
395 |
DocumentHelper(const QString &fileName, const QByteArray &data) |
|
396 |
: fileName(fileName) , data(readData(data)) {} |
|
397 |
~DocumentHelper() {} |
|
398 |
||
399 |
bool addFieldsToDocument(QCLuceneDocument *document, |
|
400 |
const QString &namespaceName, const QString &attributes = QString()) |
|
401 |
{ |
|
402 |
if (!document) |
|
403 |
return false; |
|
404 |
||
405 |
if(!data.isEmpty()) { |
|
406 |
QString parsedData = parseData(); |
|
407 |
QString parsedTitle = QHelpGlobal::documentTitle(data); |
|
408 |
||
409 |
if(!parsedData.isEmpty()) { |
|
410 |
document->add(new QCLuceneField(QLatin1String("content"), |
|
411 |
parsedData,QCLuceneField::INDEX_TOKENIZED)); |
|
412 |
document->add(new QCLuceneField(QLatin1String("path"), fileName, |
|
413 |
QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED)); |
|
414 |
document->add(new QCLuceneField(QLatin1String("title"), parsedTitle, |
|
415 |
QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED)); |
|
416 |
document->add(new QCLuceneField(QLatin1String("titleTokenized"), parsedTitle, |
|
417 |
QCLuceneField::STORE_YES | QCLuceneField::INDEX_TOKENIZED)); |
|
418 |
document->add(new QCLuceneField(QLatin1String("namespace"), namespaceName, |
|
419 |
QCLuceneField::STORE_YES | QCLuceneField::INDEX_UNTOKENIZED)); |
|
420 |
document->add(new QCLuceneField(QLatin1String("attribute"), attributes, |
|
421 |
QCLuceneField::STORE_YES | QCLuceneField::INDEX_TOKENIZED)); |
|
422 |
return true; |
|
423 |
} |
|
424 |
} |
|
425 |
||
426 |
return false; |
|
427 |
} |
|
428 |
||
429 |
private: |
|
430 |
QString readData(const QByteArray &data) |
|
431 |
{ |
|
432 |
QTextStream textStream(data); |
|
3
41300fa6a67c
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
433 |
const QByteArray &codec = QHelpGlobal::codecFromData(data).toLatin1(); |
41300fa6a67c
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
434 |
textStream.setCodec(QTextCodec::codecForName(codec.constData())); |
0 | 435 |
|
436 |
QString stream = textStream.readAll(); |
|
437 |
if (stream.isNull() || stream.isEmpty()) |
|
438 |
return QString(); |
|
439 |
||
440 |
return stream; |
|
441 |
} |
|
442 |
||
443 |
QString parseData() const |
|
444 |
{ |
|
445 |
const int length = data.length(); |
|
446 |
const QChar *buf = data.unicode(); |
|
447 |
||
448 |
QString parsedContent; |
|
449 |
parsedContent.reserve(length); |
|
450 |
||
451 |
bool valid = true; |
|
452 |
int j = 0, count = 0; |
|
453 |
||
454 |
QChar c; |
|
455 |
while (j < length) { |
|
456 |
c = buf[j++]; |
|
457 |
if (c == QLatin1Char('<') || c == QLatin1Char('&')) { |
|
458 |
if (count > 1 && c != QLatin1Char('&')) |
|
459 |
parsedContent.append(QLatin1Char(' ')); |
|
460 |
else if (c == QLatin1Char('&')) { |
|
461 |
// Note: this will modify the counter j, in case we sucessful parsed the entity |
|
462 |
// we will have modified the counter to stay 1 before the closing ';', so |
|
463 |
// the following if condition will be met with if (c == QLatin1Char(';')) |
|
464 |
parsedContent.append(parseEntity(length, buf, j)); |
|
465 |
} |
|
466 |
||
467 |
count = 0; |
|
468 |
valid = false; |
|
469 |
continue; |
|
470 |
} |
|
471 |
if ((c == QLatin1Char('>') || c == QLatin1Char(';')) && !valid) { |
|
472 |
valid = true; |
|
473 |
continue; |
|
474 |
} |
|
475 |
if (!valid) |
|
476 |
continue; |
|
477 |
||
478 |
if (c.isLetterOrNumber() || c.isPrint()) { |
|
479 |
++count; |
|
480 |
parsedContent.append(c.toLower()); |
|
481 |
} else { |
|
482 |
if (count > 1) |
|
483 |
parsedContent.append(QLatin1Char(' ')); |
|
484 |
count = 0; |
|
485 |
} |
|
486 |
} |
|
487 |
||
488 |
return parsedContent; |
|
489 |
} |
|
490 |
||
491 |
// taken from qtexthtmlparser |
|
492 |
// parses an entity after "&", and returns it |
|
493 |
QString parseEntity(int len, const QChar *buf, int &pos) const |
|
494 |
{ |
|
495 |
int recover = pos; |
|
496 |
QString entity; |
|
497 |
while (pos < len) { |
|
498 |
QChar c = buf[pos++]; |
|
499 |
if (c.isSpace() || pos - recover > 9) { |
|
500 |
goto error; |
|
501 |
} |
|
502 |
if (c == QLatin1Char(';')) { |
|
503 |
pos--; |
|
504 |
break; |
|
505 |
} |
|
506 |
entity += c; |
|
507 |
} |
|
508 |
{ |
|
509 |
QChar resolved = resolveEntity(entity); |
|
510 |
if (!resolved.isNull()) |
|
511 |
return QString(resolved); |
|
512 |
} |
|
513 |
if (entity.length() > 1 && entity.at(0) == QLatin1Char('#')) { |
|
514 |
entity.remove(0, 1); // removing leading # |
|
515 |
||
516 |
int base = 10; |
|
517 |
bool ok = false; |
|
518 |
||
519 |
if (entity.at(0).toLower() == QLatin1Char('x')) { // hex entity? |
|
520 |
entity.remove(0, 1); |
|
521 |
base = 16; |
|
522 |
} |
|
523 |
||
524 |
uint uc = entity.toUInt(&ok, base); |
|
525 |
if (ok) { |
|
526 |
if (uc >= 0x80 && uc < 0x80 + (sizeof(latin1Extended) / sizeof(latin1Extended[0]))) |
|
527 |
uc = latin1Extended[uc - 0x80]; // windows latin 1 extended |
|
528 |
QString str; |
|
529 |
if (uc > 0xffff) { |
|
530 |
// surrogate pair |
|
531 |
uc -= 0x10000; |
|
532 |
ushort high = uc/0x400 + 0xd800; |
|
533 |
ushort low = uc%0x400 + 0xdc00; |
|
534 |
str.append(QChar(high)); |
|
535 |
str.append(QChar(low)); |
|
536 |
} else { |
|
537 |
str.append(QChar(uc)); |
|
538 |
} |
|
539 |
return str; |
|
540 |
} |
|
541 |
} |
|
542 |
error: |
|
543 |
pos = recover; |
|
544 |
return QLatin1String(" "); |
|
545 |
} |
|
546 |
// end taken from qtexthtmlparser |
|
547 |
||
548 |
private: |
|
549 |
QString fileName; |
|
550 |
QString data; |
|
551 |
}; |
|
552 |
||
553 |
||
554 |
QHelpSearchIndexWriter::QHelpSearchIndexWriter() |
|
555 |
: QThread(0) |
|
556 |
, m_cancel(false) |
|
557 |
{ |
|
558 |
// nothing todo |
|
559 |
} |
|
560 |
||
561 |
QHelpSearchIndexWriter::~QHelpSearchIndexWriter() |
|
562 |
{ |
|
563 |
mutex.lock(); |
|
564 |
this->m_cancel = true; |
|
565 |
waitCondition.wakeOne(); |
|
566 |
mutex.unlock(); |
|
567 |
||
568 |
wait(); |
|
569 |
} |
|
570 |
||
571 |
void QHelpSearchIndexWriter::cancelIndexing() |
|
572 |
{ |
|
573 |
mutex.lock(); |
|
574 |
this->m_cancel = true; |
|
575 |
mutex.unlock(); |
|
576 |
} |
|
577 |
||
578 |
void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile, |
|
579 |
const QString &indexFilesFolder, bool reindex) |
|
580 |
{ |
|
3
41300fa6a67c
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
581 |
wait(); |
0 | 582 |
mutex.lock(); |
583 |
this->m_cancel = false; |
|
584 |
this->m_reindex = reindex; |
|
585 |
this->m_collectionFile = collectionFile; |
|
586 |
this->m_indexFilesFolder = indexFilesFolder; |
|
587 |
mutex.unlock(); |
|
588 |
||
589 |
start(QThread::NormalPriority); |
|
590 |
} |
|
591 |
||
592 |
void QHelpSearchIndexWriter::optimizeIndex() |
|
593 |
{ |
|
594 |
#if !defined(QT_NO_EXCEPTIONS) |
|
595 |
try { |
|
596 |
#endif |
|
597 |
if (QCLuceneIndexReader::indexExists(m_indexFilesFolder)) { |
|
598 |
if (QCLuceneIndexReader::isLocked(m_indexFilesFolder)) |
|
599 |
return; |
|
600 |
||
601 |
QCLuceneStandardAnalyzer analyzer; |
|
602 |
QCLuceneIndexWriter writer(m_indexFilesFolder, analyzer, false); |
|
603 |
writer.optimize(); |
|
604 |
writer.close(); |
|
605 |
} |
|
606 |
#if !defined(QT_NO_EXCEPTIONS) |
|
607 |
} catch (...) { |
|
608 |
qWarning("Full Text Search, could not optimize index."); |
|
609 |
return; |
|
610 |
} |
|
611 |
#endif |
|
612 |
} |
|
613 |
||
614 |
void QHelpSearchIndexWriter::run() |
|
615 |
{ |
|
616 |
QMutexLocker mutexLocker(&mutex); |
|
617 |
||
618 |
if (m_cancel) |
|
619 |
return; |
|
620 |
||
621 |
const bool reindex = this->m_reindex; |
|
622 |
const QString collectionFile(this->m_collectionFile); |
|
623 |
||
624 |
mutexLocker.unlock(); |
|
625 |
||
626 |
QHelpEngineCore engine(collectionFile, 0); |
|
627 |
if (!engine.setupData()) |
|
628 |
return; |
|
629 |
||
630 |
const QLatin1String key("CluceneIndexedNamespaces"); |
|
631 |
if (reindex) |
|
632 |
engine.setCustomValue(key, QLatin1String("")); |
|
633 |
||
634 |
QMap<QString, QDateTime> indexMap; |
|
635 |
const QLatin1String oldKey("CluceneSearchNamespaces"); |
|
636 |
if (!engine.customValue(oldKey, QString()).isNull()) { |
|
637 |
// old style qhc file < 4.4.2, need to convert... |
|
638 |
const QStringList indexedNamespaces = engine.customValue(oldKey). |
|
639 |
toString().split(QLatin1String("|"), QString::SkipEmptyParts); |
|
640 |
foreach (const QString &nameSpace, indexedNamespaces) |
|
641 |
indexMap.insert(nameSpace, QDateTime()); |
|
642 |
engine.removeCustomValue(oldKey); |
|
643 |
} else { |
|
644 |
QDataStream dataStream(engine.customValue(key).toByteArray()); |
|
645 |
dataStream >> indexMap; |
|
646 |
} |
|
647 |
||
648 |
QString indexPath = m_indexFilesFolder; |
|
649 |
||
650 |
QFileInfo fInfo(indexPath); |
|
651 |
if (fInfo.exists() && !fInfo.isWritable()) { |
|
652 |
qWarning("Full Text Search, could not create index (missing permissions for '%s').", qPrintable(indexPath)); |
|
653 |
return; |
|
654 |
} |
|
655 |
||
656 |
emit indexingStarted(); |
|
657 |
||
658 |
QCLuceneIndexWriter *writer = 0; |
|
659 |
QCLuceneStandardAnalyzer analyzer; |
|
660 |
const QStringList registeredDocs = engine.registeredDocumentations(); |
|
661 |
||
662 |
QLocalSocket localSocket; |
|
663 |
localSocket.connectToServer(QString(QLatin1String("QtAssistant%1")) |
|
664 |
.arg(QLatin1String(QT_VERSION_STR))); |
|
665 |
||
666 |
QLocalServer localServer; |
|
667 |
bool otherInstancesRunning = true; |
|
668 |
if (!localSocket.waitForConnected()) { |
|
669 |
otherInstancesRunning = false; |
|
670 |
localServer.listen(QString(QLatin1String("QtAssistant%1")) |
|
671 |
.arg(QLatin1String(QT_VERSION_STR))); |
|
672 |
} |
|
673 |
||
674 |
#if !defined(QT_NO_EXCEPTIONS) |
|
675 |
try { |
|
676 |
#endif |
|
677 |
// check if it's locked, and if the other instance is running |
|
678 |
if (!otherInstancesRunning && QCLuceneIndexReader::isLocked(indexPath)) |
|
679 |
QCLuceneIndexReader::unlock(indexPath); |
|
680 |
||
681 |
if (QCLuceneIndexReader::isLocked(indexPath)) { |
|
682 |
// poll unless indexing finished to fake progress |
|
683 |
while (QCLuceneIndexReader::isLocked(indexPath)) { |
|
684 |
mutexLocker.relock(); |
|
685 |
if (m_cancel) |
|
686 |
break; |
|
687 |
mutexLocker.unlock(); |
|
688 |
this->sleep(1); |
|
689 |
} |
|
690 |
emit indexingFinished(); |
|
691 |
return; |
|
692 |
} |
|
693 |
||
694 |
if (QCLuceneIndexReader::indexExists(indexPath) && !reindex) { |
|
695 |
foreach(const QString &namespaceName, registeredDocs) { |
|
696 |
mutexLocker.relock(); |
|
697 |
if (m_cancel) { |
|
698 |
emit indexingFinished(); |
|
699 |
return; |
|
700 |
} |
|
701 |
mutexLocker.unlock(); |
|
702 |
||
703 |
if (!indexMap.contains(namespaceName)) { |
|
704 |
// make sure we remove some partly indexed stuff |
|
705 |
removeDocuments(indexPath, namespaceName); |
|
706 |
} else { |
|
707 |
QString path = engine.documentationFileName(namespaceName); |
|
708 |
if (indexMap.value(namespaceName) < QFileInfo(path).lastModified()) { |
|
709 |
// make sure we remove some outdated indexed stuff |
|
710 |
indexMap.remove(namespaceName); |
|
711 |
removeDocuments(indexPath, namespaceName); |
|
712 |
} |
|
713 |
||
714 |
if (indexMap.contains(namespaceName)) { |
|
715 |
// make sure we really have content indexed for namespace |
|
716 |
// NOTE: Extra variable just for GCC 3.3.5 |
|
717 |
QLatin1String key("namespace"); |
|
718 |
QCLuceneTermQuery query(QCLuceneTerm(key, namespaceName)); |
|
719 |
QCLuceneIndexSearcher indexSearcher(indexPath); |
|
720 |
QCLuceneHits hits = indexSearcher.search(query); |
|
721 |
if (hits.length() <= 0) |
|
722 |
indexMap.remove(namespaceName); |
|
723 |
} |
|
724 |
} |
|
725 |
} |
|
726 |
writer = new QCLuceneIndexWriter(indexPath, analyzer, false); |
|
727 |
} else { |
|
728 |
indexMap.clear(); |
|
729 |
writer = new QCLuceneIndexWriter(indexPath, analyzer, true); |
|
730 |
} |
|
731 |
#if !defined(QT_NO_EXCEPTIONS) |
|
732 |
} catch (...) { |
|
733 |
qWarning("Full Text Search, could not create index writer in '%s'.", |
|
734 |
qPrintable(indexPath)); |
|
735 |
return; |
|
736 |
} |
|
737 |
#endif |
|
738 |
||
739 |
#if !defined(QT_NO_EXCEPTIONS) |
|
740 |
try { |
|
741 |
#endif |
|
742 |
writer->setMergeFactor(100); |
|
743 |
writer->setMinMergeDocs(1000); |
|
744 |
writer->setMaxFieldLength(QCLuceneIndexWriter::DEFAULT_MAX_FIELD_LENGTH); |
|
745 |
#if !defined(QT_NO_EXCEPTIONS) |
|
746 |
} catch (...) { |
|
747 |
qWarning("Full Text Search, could not set writer properties."); |
|
748 |
return; |
|
749 |
} |
|
750 |
#endif |
|
751 |
||
752 |
QStringList namespaces; |
|
753 |
foreach(const QString &namespaceName, registeredDocs) { |
|
754 |
mutexLocker.relock(); |
|
755 |
if (m_cancel) { |
|
756 |
closeIndexWriter(writer); |
|
757 |
emit indexingFinished(); |
|
758 |
return; |
|
759 |
} |
|
760 |
mutexLocker.unlock(); |
|
761 |
||
762 |
namespaces.append(namespaceName); |
|
763 |
if (indexMap.contains(namespaceName)) |
|
764 |
continue; |
|
765 |
||
766 |
const QList<QStringList> attributeSets = |
|
767 |
engine.filterAttributeSets(namespaceName); |
|
768 |
||
769 |
if (attributeSets.isEmpty()) { |
|
770 |
const QList<QUrl> docFiles = indexableFiles(&engine, namespaceName, |
|
771 |
QStringList()); |
|
772 |
if (!addDocuments(docFiles, engine, QStringList(), namespaceName, |
|
773 |
writer, analyzer)) |
|
774 |
break; |
|
775 |
} else { |
|
776 |
bool bail = false; |
|
777 |
foreach (const QStringList &attributes, attributeSets) { |
|
778 |
const QList<QUrl> docFiles = indexableFiles(&engine, |
|
779 |
namespaceName, attributes); |
|
780 |
if (!addDocuments(docFiles, engine, attributes, namespaceName, |
|
781 |
writer, analyzer)) { |
|
782 |
bail = true; |
|
783 |
break; |
|
784 |
} |
|
785 |
} |
|
786 |
if (bail) |
|
787 |
break; |
|
788 |
} |
|
789 |
||
790 |
mutexLocker.relock(); |
|
791 |
if (!m_cancel) { |
|
792 |
QString path(engine.documentationFileName(namespaceName)); |
|
793 |
indexMap.insert(namespaceName, QFileInfo(path).lastModified()); |
|
794 |
writeIndexMap(engine, indexMap); |
|
795 |
} |
|
796 |
mutexLocker.unlock(); |
|
797 |
} |
|
798 |
||
799 |
closeIndexWriter(writer); |
|
800 |
||
801 |
mutexLocker.relock(); |
|
802 |
if (!m_cancel) { |
|
803 |
mutexLocker.unlock(); |
|
804 |
||
805 |
QStringList indexedNamespaces = indexMap.keys(); |
|
806 |
foreach(const QString &namespaceName, indexedNamespaces) { |
|
807 |
mutexLocker.relock(); |
|
808 |
if (m_cancel) |
|
809 |
break; |
|
810 |
mutexLocker.unlock(); |
|
811 |
||
812 |
if (!namespaces.contains(namespaceName)) { |
|
813 |
indexMap.remove(namespaceName); |
|
814 |
writeIndexMap(engine, indexMap); |
|
815 |
removeDocuments(indexPath, namespaceName); |
|
816 |
} |
|
817 |
} |
|
818 |
} |
|
819 |
emit indexingFinished(); |
|
820 |
} |
|
821 |
||
822 |
bool QHelpSearchIndexWriter::addDocuments(const QList<QUrl> docFiles, |
|
823 |
const QHelpEngineCore &engine, const QStringList &attributes, |
|
824 |
const QString &namespaceName, QCLuceneIndexWriter *writer, |
|
825 |
QCLuceneAnalyzer &analyzer) |
|
826 |
{ |
|
827 |
QMutexLocker locker(&mutex); |
|
828 |
const QString attrList = attributes.join(QLatin1String(" ")); |
|
829 |
||
830 |
locker.unlock(); |
|
831 |
foreach(const QUrl &url, docFiles) { |
|
832 |
QCLuceneDocument document; |
|
833 |
DocumentHelper helper(url.toString(), engine.fileData(url)); |
|
834 |
if (helper.addFieldsToDocument(&document, namespaceName, attrList)) { |
|
835 |
#if !defined(QT_NO_EXCEPTIONS) |
|
836 |
try { |
|
837 |
#endif |
|
838 |
writer->addDocument(document, analyzer); |
|
839 |
#if !defined(QT_NO_EXCEPTIONS) |
|
840 |
} catch (...) { |
|
841 |
qWarning("Full Text Search, could not properly add documents."); |
|
842 |
return false; |
|
843 |
} |
|
844 |
#endif |
|
845 |
} |
|
846 |
locker.relock(); |
|
847 |
if (m_cancel) |
|
848 |
return false; |
|
849 |
locker.unlock(); |
|
850 |
} |
|
851 |
return true; |
|
852 |
} |
|
853 |
||
854 |
void QHelpSearchIndexWriter::removeDocuments(const QString &indexPath, |
|
855 |
const QString &namespaceName) |
|
856 |
{ |
|
857 |
if (namespaceName.isEmpty() || QCLuceneIndexReader::isLocked(indexPath)) |
|
858 |
return; |
|
859 |
||
860 |
QCLuceneIndexReader reader = QCLuceneIndexReader::open(indexPath); |
|
861 |
reader.deleteDocuments(QCLuceneTerm(QLatin1String("namespace"), |
|
862 |
namespaceName)); |
|
863 |
||
864 |
reader.close(); |
|
865 |
} |
|
866 |
||
867 |
bool QHelpSearchIndexWriter::writeIndexMap(QHelpEngineCore &engine, |
|
868 |
const QMap<QString, QDateTime> &indexMap) |
|
869 |
{ |
|
870 |
QByteArray bArray; |
|
871 |
||
872 |
QDataStream data(&bArray, QIODevice::ReadWrite); |
|
873 |
data << indexMap; |
|
874 |
||
875 |
return engine.setCustomValue(QLatin1String("CluceneIndexedNamespaces"), |
|
876 |
bArray); |
|
877 |
} |
|
878 |
||
879 |
QList<QUrl> QHelpSearchIndexWriter::indexableFiles(QHelpEngineCore *helpEngine, |
|
880 |
const QString &namespaceName, const QStringList &attributes) const |
|
881 |
{ |
|
882 |
QList<QUrl> docFiles = helpEngine->files(namespaceName, attributes, |
|
883 |
QLatin1String("html")); |
|
884 |
docFiles += helpEngine->files(namespaceName, attributes, QLatin1String("htm")); |
|
885 |
docFiles += helpEngine->files(namespaceName, attributes, QLatin1String("txt")); |
|
886 |
||
887 |
return docFiles; |
|
888 |
} |
|
889 |
||
890 |
void QHelpSearchIndexWriter::closeIndexWriter(QCLuceneIndexWriter *writer) |
|
891 |
{ |
|
892 |
#if !defined(QT_NO_EXCEPTIONS) |
|
893 |
try { |
|
894 |
#endif |
|
895 |
writer->close(); |
|
896 |
delete writer; |
|
897 |
#if !defined(QT_NO_EXCEPTIONS) |
|
898 |
} catch (...) { |
|
899 |
qWarning("Full Text Search, could not properly close index writer."); |
|
900 |
} |
|
901 |
#endif |
|
902 |
} |
|
903 |
||
904 |
} // namespace clucene |
|
905 |
} // namespace fulltextsearch |
|
906 |
} // namespace qt |
|
907 |
||
908 |
QT_END_NAMESPACE |