author | Eckhart Koeppen <eckhart.koppen@nokia.com> |
Thu, 22 Apr 2010 16:15:11 +0300 | |
branch | RCL_3 |
changeset 14 | 8c4229025c0b |
parent 7 | 3f74d0d4af4c |
permissions | -rw-r--r-- |
0 | 1 |
/**************************************************************************** |
2 |
** |
|
4
3b1da2848fc7
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
3
diff
changeset
|
3 |
** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies). |
0 | 4 |
** All rights reserved. |
5 |
** Contact: Nokia Corporation (qt-info@nokia.com) |
|
6 |
** |
|
7 |
** This file is part of the Qt Assistant of the Qt Toolkit. |
|
8 |
** |
|
9 |
** $QT_BEGIN_LICENSE:LGPL$ |
|
10 |
** No Commercial Usage |
|
11 |
** This file contains pre-release code and may not be distributed. |
|
12 |
** You may use this file in accordance with the terms and conditions |
|
13 |
** contained in the Technology Preview License Agreement accompanying |
|
14 |
** this package. |
|
15 |
** |
|
16 |
** GNU Lesser General Public License Usage |
|
17 |
** Alternatively, this file may be used under the terms of the GNU Lesser |
|
18 |
** General Public License version 2.1 as published by the Free Software |
|
19 |
** Foundation and appearing in the file LICENSE.LGPL included in the |
|
20 |
** packaging of this file. Please review the following information to |
|
21 |
** ensure the GNU Lesser General Public License version 2.1 requirements |
|
22 |
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
|
23 |
** |
|
24 |
** In addition, as a special exception, Nokia gives you certain additional |
|
25 |
** rights. These rights are described in the Nokia Qt LGPL Exception |
|
26 |
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
|
27 |
** |
|
28 |
** If you have questions regarding the use of this file, please contact |
|
29 |
** Nokia at qt-info@nokia.com. |
|
30 |
** |
|
31 |
** |
|
32 |
** |
|
33 |
** |
|
34 |
** |
|
35 |
** |
|
36 |
** |
|
37 |
** |
|
38 |
** $QT_END_LICENSE$ |
|
39 |
** |
|
40 |
****************************************************************************/ |
|
41 |
||
42 |
#include "qhelpsearchindexwriter_default_p.h" |
|
43 |
#include "qhelp_global.h" |
|
44 |
#include "qhelpenginecore.h" |
|
45 |
||
46 |
#include <QtCore/QDir> |
|
47 |
#include <QtCore/QSet> |
|
48 |
#include <QtCore/QUrl> |
|
49 |
#include <QtCore/QFile> |
|
50 |
#include <QtCore/QRegExp> |
|
51 |
#include <QtCore/QVariant> |
|
52 |
#include <QtCore/QFileInfo> |
|
53 |
#include <QtCore/QTextCodec> |
|
54 |
#include <QtCore/QTextStream> |
|
55 |
||
56 |
QT_BEGIN_NAMESPACE |
|
57 |
||
7
3f74d0d4af4c
qt:70947f0f93d948bc89b3b43d00da758a51f1ef84
Eckhart Koeppen <eckhart.koppen@nokia.com>
parents:
5
diff
changeset
|
58 |
namespace fulltextsearch { |
3f74d0d4af4c
qt:70947f0f93d948bc89b3b43d00da758a51f1ef84
Eckhart Koeppen <eckhart.koppen@nokia.com>
parents:
5
diff
changeset
|
59 |
namespace std { |
0 | 60 |
|
61 |
Writer::Writer(const QString &path) |
|
62 |
: indexPath(path) |
|
63 |
, indexFile(QString()) |
|
64 |
, documentFile(QString()) |
|
65 |
{ |
|
66 |
// nothing todo |
|
67 |
} |
|
68 |
||
69 |
Writer::~Writer() |
|
70 |
{ |
|
71 |
reset(); |
|
72 |
} |
|
73 |
||
74 |
void Writer::reset() |
|
75 |
{ |
|
76 |
for(QHash<QString, Entry*>::ConstIterator it = |
|
77 |
index.begin(); it != index.end(); ++it) { |
|
78 |
delete it.value(); |
|
79 |
} |
|
80 |
||
81 |
index.clear(); |
|
82 |
documentList.clear(); |
|
83 |
} |
|
84 |
||
85 |
bool Writer::writeIndex() const |
|
86 |
{ |
|
87 |
bool status; |
|
88 |
QFile idxFile(indexFile); |
|
89 |
if (!(status = idxFile.open(QFile::WriteOnly))) |
|
90 |
return status; |
|
91 |
||
92 |
QDataStream indexStream(&idxFile); |
|
93 |
for(QHash<QString, Entry*>::ConstIterator it = |
|
94 |
index.begin(); it != index.end(); ++it) { |
|
95 |
indexStream << it.key(); |
|
96 |
indexStream << it.value()->documents.count(); |
|
97 |
indexStream << it.value()->documents; |
|
98 |
} |
|
99 |
idxFile.close(); |
|
100 |
||
101 |
QFile docFile(documentFile); |
|
102 |
if (!(status = docFile.open(QFile::WriteOnly))) |
|
103 |
return status; |
|
104 |
||
105 |
QDataStream docStream(&docFile); |
|
5
d3bac044e0f0
Revision: 201007
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
4
diff
changeset
|
106 |
foreach(const QStringList &list, documentList) { |
0 | 107 |
docStream << list.at(0); |
108 |
docStream << list.at(1); |
|
109 |
} |
|
110 |
docFile.close(); |
|
111 |
||
112 |
return status; |
|
113 |
} |
|
114 |
||
115 |
void Writer::removeIndex() const |
|
116 |
{ |
|
117 |
QFile idxFile(indexFile); |
|
118 |
if (idxFile.exists()) |
|
119 |
idxFile.remove(); |
|
120 |
||
121 |
QFile docFile(documentFile); |
|
122 |
if (docFile.exists()) |
|
123 |
docFile.remove(); |
|
124 |
} |
|
125 |
||
126 |
void Writer::setIndexFile(const QString &namespaceName, const QString &attributes) |
|
127 |
{ |
|
5
d3bac044e0f0
Revision: 201007
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
4
diff
changeset
|
128 |
QString extension = namespaceName + QLatin1String("@") + attributes; |
d3bac044e0f0
Revision: 201007
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
4
diff
changeset
|
129 |
indexFile = indexPath + QLatin1String("/indexdb40.") + extension; |
d3bac044e0f0
Revision: 201007
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
4
diff
changeset
|
130 |
documentFile = indexPath + QLatin1String("/indexdoc40.") + extension; |
0 | 131 |
} |
132 |
||
133 |
void Writer::insertInIndex(const QString &string, int docNum) |
|
134 |
{ |
|
135 |
if (string == QLatin1String("amp") || string == QLatin1String("nbsp")) |
|
136 |
return; |
|
137 |
||
138 |
Entry *entry = 0; |
|
139 |
if (index.count()) |
|
140 |
entry = index[string]; |
|
141 |
||
142 |
if (entry) { |
|
143 |
if (entry->documents.last().docNumber != docNum) |
|
144 |
entry->documents.append(Document(docNum, 1)); |
|
145 |
else |
|
146 |
entry->documents.last().frequency++; |
|
147 |
} else { |
|
148 |
index.insert(string, new Entry(docNum)); |
|
149 |
} |
|
150 |
} |
|
151 |
||
152 |
void Writer::insertInDocumentList(const QString &title, const QString &url) |
|
153 |
{ |
|
154 |
documentList.append(QStringList(title) << url); |
|
155 |
} |
|
156 |
||
157 |
||
158 |
QHelpSearchIndexWriter::QHelpSearchIndexWriter() |
|
159 |
: QThread() |
|
160 |
, m_cancel(false) |
|
161 |
{ |
|
162 |
// nothing todo |
|
163 |
} |
|
164 |
||
165 |
QHelpSearchIndexWriter::~QHelpSearchIndexWriter() |
|
166 |
{ |
|
167 |
mutex.lock(); |
|
168 |
this->m_cancel = true; |
|
169 |
waitCondition.wakeOne(); |
|
170 |
mutex.unlock(); |
|
171 |
||
172 |
wait(); |
|
173 |
} |
|
174 |
||
175 |
void QHelpSearchIndexWriter::cancelIndexing() |
|
176 |
{ |
|
177 |
mutex.lock(); |
|
178 |
this->m_cancel = true; |
|
179 |
mutex.unlock(); |
|
180 |
} |
|
181 |
||
182 |
void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile, |
|
183 |
const QString &indexFilesFolder, |
|
184 |
bool reindex) |
|
185 |
{ |
|
3
41300fa6a67c
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
186 |
wait(); |
0 | 187 |
QMutexLocker lock(&mutex); |
188 |
||
189 |
this->m_cancel = false; |
|
190 |
this->m_reindex = reindex; |
|
191 |
this->m_collectionFile = collectionFile; |
|
192 |
this->m_indexFilesFolder = indexFilesFolder; |
|
193 |
||
7
3f74d0d4af4c
qt:70947f0f93d948bc89b3b43d00da758a51f1ef84
Eckhart Koeppen <eckhart.koppen@nokia.com>
parents:
5
diff
changeset
|
194 |
start(QThread::LowestPriority); |
0 | 195 |
} |
196 |
||
197 |
void QHelpSearchIndexWriter::run() |
|
198 |
{ |
|
199 |
mutex.lock(); |
|
200 |
||
201 |
if (m_cancel) { |
|
202 |
mutex.unlock(); |
|
203 |
return; |
|
204 |
} |
|
205 |
||
206 |
const bool reindex(this->m_reindex); |
|
207 |
const QLatin1String key("DefaultSearchNamespaces"); |
|
208 |
const QString collectionFile(this->m_collectionFile); |
|
209 |
const QString indexPath = m_indexFilesFolder; |
|
210 |
||
211 |
mutex.unlock(); |
|
212 |
||
213 |
QHelpEngineCore engine(collectionFile, 0); |
|
214 |
if (!engine.setupData()) |
|
215 |
return; |
|
216 |
||
217 |
if (reindex) |
|
218 |
engine.setCustomValue(key, QLatin1String("")); |
|
219 |
||
220 |
const QStringList registeredDocs = engine.registeredDocumentations(); |
|
221 |
const QStringList indexedNamespaces = engine.customValue(key).toString(). |
|
222 |
split(QLatin1String("|"), QString::SkipEmptyParts); |
|
223 |
||
224 |
emit indexingStarted(); |
|
225 |
||
226 |
QStringList namespaces; |
|
227 |
Writer writer(indexPath); |
|
5
d3bac044e0f0
Revision: 201007
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
4
diff
changeset
|
228 |
foreach(const QString &namespaceName, registeredDocs) { |
0 | 229 |
mutex.lock(); |
230 |
if (m_cancel) { |
|
231 |
mutex.unlock(); |
|
232 |
return; |
|
233 |
} |
|
234 |
mutex.unlock(); |
|
235 |
||
236 |
// if indexed, continue |
|
237 |
namespaces.append(namespaceName); |
|
238 |
if (indexedNamespaces.contains(namespaceName)) |
|
239 |
continue; |
|
240 |
||
241 |
const QList<QStringList> attributeSets = |
|
242 |
engine.filterAttributeSets(namespaceName); |
|
243 |
||
5
d3bac044e0f0
Revision: 201007
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
4
diff
changeset
|
244 |
foreach (const QStringList &attributes, attributeSets) { |
0 | 245 |
// cleanup maybe old or unfinished files |
246 |
writer.setIndexFile(namespaceName, attributes.join(QLatin1String("@"))); |
|
247 |
writer.removeIndex(); |
|
248 |
||
249 |
QSet<QString> documentsSet; |
|
250 |
const QList<QUrl> docFiles = engine.files(namespaceName, attributes); |
|
251 |
foreach(QUrl url, docFiles) { |
|
252 |
if (m_cancel) |
|
253 |
return; |
|
254 |
||
255 |
// get rid of duplicated files |
|
256 |
if (url.hasFragment()) |
|
257 |
url.setFragment(QString()); |
|
258 |
||
259 |
QString s = url.toString(); |
|
260 |
if (s.endsWith(QLatin1String(".html")) |
|
261 |
|| s.endsWith(QLatin1String(".htm")) |
|
262 |
|| s.endsWith(QLatin1String(".txt"))) |
|
263 |
documentsSet.insert(s); |
|
264 |
} |
|
265 |
||
266 |
int docNum = 0; |
|
267 |
const QStringList documentsList(documentsSet.toList()); |
|
5
d3bac044e0f0
Revision: 201007
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
4
diff
changeset
|
268 |
foreach(const QString &url, documentsList) { |
0 | 269 |
if (m_cancel) |
270 |
return; |
|
271 |
||
272 |
QByteArray data(engine.fileData(url)); |
|
273 |
if (data.isEmpty()) |
|
274 |
continue; |
|
275 |
||
276 |
QTextStream s(data); |
|
3
41300fa6a67c
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
0
diff
changeset
|
277 |
QString en = QHelpGlobal::codecFromData(data); |
0 | 278 |
s.setCodec(QTextCodec::codecForName(en.toLatin1().constData())); |
279 |
||
280 |
QString text = s.readAll(); |
|
281 |
if (text.isNull()) |
|
282 |
continue; |
|
283 |
||
284 |
QString title = QHelpGlobal::documentTitle(text); |
|
285 |
||
286 |
int j = 0; |
|
287 |
int i = 0; |
|
288 |
bool valid = true; |
|
289 |
const QChar *buf = text.unicode(); |
|
290 |
QChar str[64]; |
|
291 |
QChar c = buf[0]; |
|
292 |
||
293 |
while ( j < text.length() ) { |
|
294 |
if (m_cancel) |
|
295 |
return; |
|
296 |
||
297 |
if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) { |
|
298 |
valid = false; |
|
299 |
if ( i > 1 ) |
|
300 |
writer.insertInIndex(QString(str,i), docNum); |
|
301 |
i = 0; |
|
302 |
c = buf[++j]; |
|
303 |
continue; |
|
304 |
} |
|
305 |
if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) { |
|
306 |
valid = true; |
|
307 |
c = buf[++j]; |
|
308 |
continue; |
|
309 |
} |
|
310 |
if ( !valid ) { |
|
311 |
c = buf[++j]; |
|
312 |
continue; |
|
313 |
} |
|
314 |
if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) { |
|
315 |
str[i] = c.toLower(); |
|
316 |
++i; |
|
317 |
} else { |
|
318 |
if ( i > 1 ) |
|
319 |
writer.insertInIndex(QString(str,i), docNum); |
|
320 |
i = 0; |
|
321 |
} |
|
322 |
c = buf[++j]; |
|
323 |
} |
|
324 |
if ( i > 1 ) |
|
325 |
writer.insertInIndex(QString(str,i), docNum); |
|
326 |
||
327 |
docNum++; |
|
328 |
writer.insertInDocumentList(title, url); |
|
329 |
} |
|
330 |
||
331 |
if (writer.writeIndex()) { |
|
332 |
engine.setCustomValue(key, addNamespace( |
|
333 |
engine.customValue(key).toString(), namespaceName)); |
|
334 |
} |
|
335 |
||
336 |
writer.reset(); |
|
337 |
} |
|
338 |
} |
|
339 |
||
340 |
QStringListIterator qsli(indexedNamespaces); |
|
341 |
while (qsli.hasNext()) { |
|
342 |
const QString namespaceName = qsli.next(); |
|
343 |
if (namespaces.contains(namespaceName)) |
|
344 |
continue; |
|
345 |
||
346 |
const QList<QStringList> attributeSets = |
|
347 |
engine.filterAttributeSets(namespaceName); |
|
348 |
||
5
d3bac044e0f0
Revision: 201007
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
4
diff
changeset
|
349 |
foreach (const QStringList &attributes, attributeSets) { |
0 | 350 |
writer.setIndexFile(namespaceName, attributes.join(QLatin1String("@"))); |
351 |
writer.removeIndex(); |
|
352 |
} |
|
353 |
||
354 |
engine.setCustomValue(key, removeNamespace( |
|
355 |
engine.customValue(key).toString(), namespaceName)); |
|
356 |
} |
|
357 |
||
358 |
emit indexingFinished(); |
|
359 |
} |
|
360 |
||
361 |
QString QHelpSearchIndexWriter::addNamespace(const QString namespaces, |
|
362 |
const QString &namespaceName) |
|
363 |
{ |
|
364 |
QString value = namespaces; |
|
365 |
if (!value.contains(namespaceName)) |
|
366 |
value.append(namespaceName).append(QLatin1String("|")); |
|
367 |
||
368 |
return value; |
|
369 |
} |
|
370 |
||
371 |
QString QHelpSearchIndexWriter::removeNamespace(const QString namespaces, |
|
372 |
const QString &namespaceName) |
|
373 |
{ |
|
374 |
QString value = namespaces; |
|
375 |
if (value.contains(namespaceName)) |
|
376 |
value.remove(namespaceName + QLatin1String("|")); |
|
377 |
||
378 |
return value; |
|
379 |
} |
|
380 |
||
7
3f74d0d4af4c
qt:70947f0f93d948bc89b3b43d00da758a51f1ef84
Eckhart Koeppen <eckhart.koppen@nokia.com>
parents:
5
diff
changeset
|
381 |
} // namespace std |
3f74d0d4af4c
qt:70947f0f93d948bc89b3b43d00da758a51f1ef84
Eckhart Koeppen <eckhart.koppen@nokia.com>
parents:
5
diff
changeset
|
382 |
} // namespace fulltextsearch |
0 | 383 |
|
384 |
QT_END_NAMESPACE |