|
1 /**************************************************************************** |
|
2 ** |
|
3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). |
|
4 ** All rights reserved. |
|
5 ** Contact: Nokia Corporation (qt-info@nokia.com) |
|
6 ** |
|
7 ** This file is part of the Qt Assistant of the Qt Toolkit. |
|
8 ** |
|
9 ** $QT_BEGIN_LICENSE:LGPL$ |
|
10 ** No Commercial Usage |
|
11 ** This file contains pre-release code and may not be distributed. |
|
12 ** You may use this file in accordance with the terms and conditions |
|
13 ** contained in the Technology Preview License Agreement accompanying |
|
14 ** this package. |
|
15 ** |
|
16 ** GNU Lesser General Public License Usage |
|
17 ** Alternatively, this file may be used under the terms of the GNU Lesser |
|
18 ** General Public License version 2.1 as published by the Free Software |
|
19 ** Foundation and appearing in the file LICENSE.LGPL included in the |
|
20 ** packaging of this file. Please review the following information to |
|
21 ** ensure the GNU Lesser General Public License version 2.1 requirements |
|
22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
|
23 ** |
|
24 ** In addition, as a special exception, Nokia gives you certain additional |
|
25 ** rights. These rights are described in the Nokia Qt LGPL Exception |
|
26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
|
27 ** |
|
28 ** If you have questions regarding the use of this file, please contact |
|
29 ** Nokia at qt-info@nokia.com. |
|
30 ** |
|
31 ** |
|
32 ** |
|
33 ** |
|
34 ** |
|
35 ** |
|
36 ** |
|
37 ** |
|
38 ** $QT_END_LICENSE$ |
|
39 ** |
|
40 ****************************************************************************/ |
|
41 |
|
42 #include "qhelpsearchindexwriter_default_p.h" |
|
43 #include "qhelp_global.h" |
|
44 #include "qhelpenginecore.h" |
|
45 |
|
46 #include <QtCore/QDir> |
|
47 #include <QtCore/QSet> |
|
48 #include <QtCore/QUrl> |
|
49 #include <QtCore/QFile> |
|
50 #include <QtCore/QRegExp> |
|
51 #include <QtCore/QVariant> |
|
52 #include <QtCore/QFileInfo> |
|
53 #include <QtCore/QTextCodec> |
|
54 #include <QtCore/QTextStream> |
|
55 |
|
56 QT_BEGIN_NAMESPACE |
|
57 |
|
58 namespace qt { |
|
59 namespace fulltextsearch { |
|
60 namespace std { |
|
61 |
|
62 Writer::Writer(const QString &path) |
|
63 : indexPath(path) |
|
64 , indexFile(QString()) |
|
65 , documentFile(QString()) |
|
66 { |
|
67 // nothing todo |
|
68 } |
|
69 |
|
70 Writer::~Writer() |
|
71 { |
|
72 reset(); |
|
73 } |
|
74 |
|
75 void Writer::reset() |
|
76 { |
|
77 for(QHash<QString, Entry*>::ConstIterator it = |
|
78 index.begin(); it != index.end(); ++it) { |
|
79 delete it.value(); |
|
80 } |
|
81 |
|
82 index.clear(); |
|
83 documentList.clear(); |
|
84 } |
|
85 |
|
86 bool Writer::writeIndex() const |
|
87 { |
|
88 bool status; |
|
89 QFile idxFile(indexFile); |
|
90 if (!(status = idxFile.open(QFile::WriteOnly))) |
|
91 return status; |
|
92 |
|
93 QDataStream indexStream(&idxFile); |
|
94 for(QHash<QString, Entry*>::ConstIterator it = |
|
95 index.begin(); it != index.end(); ++it) { |
|
96 indexStream << it.key(); |
|
97 indexStream << it.value()->documents.count(); |
|
98 indexStream << it.value()->documents; |
|
99 } |
|
100 idxFile.close(); |
|
101 |
|
102 QFile docFile(documentFile); |
|
103 if (!(status = docFile.open(QFile::WriteOnly))) |
|
104 return status; |
|
105 |
|
106 QDataStream docStream(&docFile); |
|
107 foreach(const QStringList list, documentList) { |
|
108 docStream << list.at(0); |
|
109 docStream << list.at(1); |
|
110 } |
|
111 docFile.close(); |
|
112 |
|
113 return status; |
|
114 } |
|
115 |
|
116 void Writer::removeIndex() const |
|
117 { |
|
118 QFile idxFile(indexFile); |
|
119 if (idxFile.exists()) |
|
120 idxFile.remove(); |
|
121 |
|
122 QFile docFile(documentFile); |
|
123 if (docFile.exists()) |
|
124 docFile.remove(); |
|
125 } |
|
126 |
|
127 void Writer::setIndexFile(const QString &namespaceName, const QString &attributes) |
|
128 { |
|
129 QString extention = namespaceName + QLatin1String("@") + attributes; |
|
130 indexFile = indexPath + QLatin1String("/indexdb40.") + extention; |
|
131 documentFile = indexPath + QLatin1String("/indexdoc40.") + extention; |
|
132 } |
|
133 |
|
134 void Writer::insertInIndex(const QString &string, int docNum) |
|
135 { |
|
136 if (string == QLatin1String("amp") || string == QLatin1String("nbsp")) |
|
137 return; |
|
138 |
|
139 Entry *entry = 0; |
|
140 if (index.count()) |
|
141 entry = index[string]; |
|
142 |
|
143 if (entry) { |
|
144 if (entry->documents.last().docNumber != docNum) |
|
145 entry->documents.append(Document(docNum, 1)); |
|
146 else |
|
147 entry->documents.last().frequency++; |
|
148 } else { |
|
149 index.insert(string, new Entry(docNum)); |
|
150 } |
|
151 } |
|
152 |
|
153 void Writer::insertInDocumentList(const QString &title, const QString &url) |
|
154 { |
|
155 documentList.append(QStringList(title) << url); |
|
156 } |
|
157 |
|
158 |
|
159 QHelpSearchIndexWriter::QHelpSearchIndexWriter() |
|
160 : QThread() |
|
161 , m_cancel(false) |
|
162 { |
|
163 // nothing todo |
|
164 } |
|
165 |
|
166 QHelpSearchIndexWriter::~QHelpSearchIndexWriter() |
|
167 { |
|
168 mutex.lock(); |
|
169 this->m_cancel = true; |
|
170 waitCondition.wakeOne(); |
|
171 mutex.unlock(); |
|
172 |
|
173 wait(); |
|
174 } |
|
175 |
|
176 void QHelpSearchIndexWriter::cancelIndexing() |
|
177 { |
|
178 mutex.lock(); |
|
179 this->m_cancel = true; |
|
180 mutex.unlock(); |
|
181 } |
|
182 |
|
183 void QHelpSearchIndexWriter::updateIndex(const QString &collectionFile, |
|
184 const QString &indexFilesFolder, |
|
185 bool reindex) |
|
186 { |
|
187 QMutexLocker lock(&mutex); |
|
188 |
|
189 this->m_cancel = false; |
|
190 this->m_reindex = reindex; |
|
191 this->m_collectionFile = collectionFile; |
|
192 this->m_indexFilesFolder = indexFilesFolder; |
|
193 |
|
194 start(QThread::NormalPriority); |
|
195 } |
|
196 |
|
197 void QHelpSearchIndexWriter::run() |
|
198 { |
|
199 mutex.lock(); |
|
200 |
|
201 if (m_cancel) { |
|
202 mutex.unlock(); |
|
203 return; |
|
204 } |
|
205 |
|
206 const bool reindex(this->m_reindex); |
|
207 const QLatin1String key("DefaultSearchNamespaces"); |
|
208 const QString collectionFile(this->m_collectionFile); |
|
209 const QString indexPath = m_indexFilesFolder; |
|
210 |
|
211 mutex.unlock(); |
|
212 |
|
213 QHelpEngineCore engine(collectionFile, 0); |
|
214 if (!engine.setupData()) |
|
215 return; |
|
216 |
|
217 if (reindex) |
|
218 engine.setCustomValue(key, QLatin1String("")); |
|
219 |
|
220 const QStringList registeredDocs = engine.registeredDocumentations(); |
|
221 const QStringList indexedNamespaces = engine.customValue(key).toString(). |
|
222 split(QLatin1String("|"), QString::SkipEmptyParts); |
|
223 |
|
224 emit indexingStarted(); |
|
225 |
|
226 QStringList namespaces; |
|
227 Writer writer(indexPath); |
|
228 foreach(const QString namespaceName, registeredDocs) { |
|
229 mutex.lock(); |
|
230 if (m_cancel) { |
|
231 mutex.unlock(); |
|
232 return; |
|
233 } |
|
234 mutex.unlock(); |
|
235 |
|
236 // if indexed, continue |
|
237 namespaces.append(namespaceName); |
|
238 if (indexedNamespaces.contains(namespaceName)) |
|
239 continue; |
|
240 |
|
241 const QList<QStringList> attributeSets = |
|
242 engine.filterAttributeSets(namespaceName); |
|
243 |
|
244 foreach (QStringList attributes, attributeSets) { |
|
245 // cleanup maybe old or unfinished files |
|
246 writer.setIndexFile(namespaceName, attributes.join(QLatin1String("@"))); |
|
247 writer.removeIndex(); |
|
248 |
|
249 QSet<QString> documentsSet; |
|
250 const QList<QUrl> docFiles = engine.files(namespaceName, attributes); |
|
251 foreach(QUrl url, docFiles) { |
|
252 if (m_cancel) |
|
253 return; |
|
254 |
|
255 // get rid of duplicated files |
|
256 if (url.hasFragment()) |
|
257 url.setFragment(QString()); |
|
258 |
|
259 QString s = url.toString(); |
|
260 if (s.endsWith(QLatin1String(".html")) |
|
261 || s.endsWith(QLatin1String(".htm")) |
|
262 || s.endsWith(QLatin1String(".txt"))) |
|
263 documentsSet.insert(s); |
|
264 } |
|
265 |
|
266 int docNum = 0; |
|
267 const QStringList documentsList(documentsSet.toList()); |
|
268 foreach(const QString url, documentsList) { |
|
269 if (m_cancel) |
|
270 return; |
|
271 |
|
272 QByteArray data(engine.fileData(url)); |
|
273 if (data.isEmpty()) |
|
274 continue; |
|
275 |
|
276 QTextStream s(data); |
|
277 QString en = QHelpGlobal::charsetFromData(data); |
|
278 s.setCodec(QTextCodec::codecForName(en.toLatin1().constData())); |
|
279 |
|
280 QString text = s.readAll(); |
|
281 if (text.isNull()) |
|
282 continue; |
|
283 |
|
284 QString title = QHelpGlobal::documentTitle(text); |
|
285 |
|
286 int j = 0; |
|
287 int i = 0; |
|
288 bool valid = true; |
|
289 const QChar *buf = text.unicode(); |
|
290 QChar str[64]; |
|
291 QChar c = buf[0]; |
|
292 |
|
293 while ( j < text.length() ) { |
|
294 if (m_cancel) |
|
295 return; |
|
296 |
|
297 if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) { |
|
298 valid = false; |
|
299 if ( i > 1 ) |
|
300 writer.insertInIndex(QString(str,i), docNum); |
|
301 i = 0; |
|
302 c = buf[++j]; |
|
303 continue; |
|
304 } |
|
305 if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) { |
|
306 valid = true; |
|
307 c = buf[++j]; |
|
308 continue; |
|
309 } |
|
310 if ( !valid ) { |
|
311 c = buf[++j]; |
|
312 continue; |
|
313 } |
|
314 if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) { |
|
315 str[i] = c.toLower(); |
|
316 ++i; |
|
317 } else { |
|
318 if ( i > 1 ) |
|
319 writer.insertInIndex(QString(str,i), docNum); |
|
320 i = 0; |
|
321 } |
|
322 c = buf[++j]; |
|
323 } |
|
324 if ( i > 1 ) |
|
325 writer.insertInIndex(QString(str,i), docNum); |
|
326 |
|
327 docNum++; |
|
328 writer.insertInDocumentList(title, url); |
|
329 } |
|
330 |
|
331 if (writer.writeIndex()) { |
|
332 engine.setCustomValue(key, addNamespace( |
|
333 engine.customValue(key).toString(), namespaceName)); |
|
334 } |
|
335 |
|
336 writer.reset(); |
|
337 } |
|
338 } |
|
339 |
|
340 QStringListIterator qsli(indexedNamespaces); |
|
341 while (qsli.hasNext()) { |
|
342 const QString namespaceName = qsli.next(); |
|
343 if (namespaces.contains(namespaceName)) |
|
344 continue; |
|
345 |
|
346 const QList<QStringList> attributeSets = |
|
347 engine.filterAttributeSets(namespaceName); |
|
348 |
|
349 foreach (QStringList attributes, attributeSets) { |
|
350 writer.setIndexFile(namespaceName, attributes.join(QLatin1String("@"))); |
|
351 writer.removeIndex(); |
|
352 } |
|
353 |
|
354 engine.setCustomValue(key, removeNamespace( |
|
355 engine.customValue(key).toString(), namespaceName)); |
|
356 } |
|
357 |
|
358 emit indexingFinished(); |
|
359 } |
|
360 |
|
361 QString QHelpSearchIndexWriter::addNamespace(const QString namespaces, |
|
362 const QString &namespaceName) |
|
363 { |
|
364 QString value = namespaces; |
|
365 if (!value.contains(namespaceName)) |
|
366 value.append(namespaceName).append(QLatin1String("|")); |
|
367 |
|
368 return value; |
|
369 } |
|
370 |
|
371 QString QHelpSearchIndexWriter::removeNamespace(const QString namespaces, |
|
372 const QString &namespaceName) |
|
373 { |
|
374 QString value = namespaces; |
|
375 if (value.contains(namespaceName)) |
|
376 value.remove(namespaceName + QLatin1String("|")); |
|
377 |
|
378 return value; |
|
379 } |
|
380 |
|
381 } // namespace std |
|
382 } // namespace fulltextsearch |
|
383 } // namespace qt |
|
384 |
|
385 QT_END_NAMESPACE |