|
1 /**************************************************************************** |
|
2 ** |
|
3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). |
|
4 ** All rights reserved. |
|
5 ** Contact: Nokia Corporation (qt-info@nokia.com) |
|
6 ** |
|
7 ** This file is part of the Qt Assistant of the Qt Toolkit. |
|
8 ** |
|
9 ** $QT_BEGIN_LICENSE:LGPL$ |
|
10 ** No Commercial Usage |
|
11 ** This file contains pre-release code and may not be distributed. |
|
12 ** You may use this file in accordance with the terms and conditions |
|
13 ** contained in the Technology Preview License Agreement accompanying |
|
14 ** this package. |
|
15 ** |
|
16 ** GNU Lesser General Public License Usage |
|
17 ** Alternatively, this file may be used under the terms of the GNU Lesser |
|
18 ** General Public License version 2.1 as published by the Free Software |
|
19 ** Foundation and appearing in the file LICENSE.LGPL included in the |
|
20 ** packaging of this file. Please review the following information to |
|
21 ** ensure the GNU Lesser General Public License version 2.1 requirements |
|
22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
|
23 ** |
|
24 ** In addition, as a special exception, Nokia gives you certain additional |
|
25 ** rights. These rights are described in the Nokia Qt LGPL Exception |
|
26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
|
27 ** |
|
28 ** If you have questions regarding the use of this file, please contact |
|
29 ** Nokia at qt-info@nokia.com. |
|
30 ** |
|
31 ** |
|
32 ** |
|
33 ** |
|
34 ** |
|
35 ** |
|
36 ** |
|
37 ** |
|
38 ** $QT_END_LICENSE$ |
|
39 ** |
|
40 ****************************************************************************/ |
|
41 |
|
42 #include "qhelpenginecore.h" |
|
43 #include "qhelpsearchindexreader_default_p.h" |
|
44 |
|
45 #include <QtCore/QDir> |
|
46 #include <QtCore/QUrl> |
|
47 #include <QtCore/QFile> |
|
48 #include <QtCore/QVariant> |
|
49 #include <QtCore/QFileInfo> |
|
50 #include <QtCore/QDataStream> |
|
51 #include <QtCore/QTextStream> |
|
52 |
|
53 QT_BEGIN_NAMESPACE |
|
54 |
|
55 namespace qt { |
|
56 namespace fulltextsearch { |
|
57 namespace std { |
|
58 |
|
59 namespace { |
|
60 QStringList split( const QString &str ) |
|
61 { |
|
62 QStringList lst; |
|
63 int j = 0; |
|
64 int i = str.indexOf(QLatin1Char('*'), j ); |
|
65 |
|
66 if (str.startsWith(QLatin1String("*"))) |
|
67 lst << QLatin1String("*"); |
|
68 |
|
69 while ( i != -1 ) { |
|
70 if ( i > j && i <= (int)str.length() ) { |
|
71 lst << str.mid( j, i - j ); |
|
72 lst << QLatin1String("*"); |
|
73 } |
|
74 j = i + 1; |
|
75 i = str.indexOf(QLatin1Char('*'), j ); |
|
76 } |
|
77 |
|
78 int l = str.length() - 1; |
|
79 if ( str.mid( j, l - j + 1 ).length() > 0 ) |
|
80 lst << str.mid( j, l - j + 1 ); |
|
81 |
|
82 return lst; |
|
83 } |
|
84 } |
|
85 |
|
86 |
|
87 Reader::Reader() |
|
88 : indexPath(QString()) |
|
89 , indexFile(QString()) |
|
90 , documentFile(QString()) |
|
91 { |
|
92 termList.clear(); |
|
93 indexTable.clear(); |
|
94 searchIndexTable.clear(); |
|
95 } |
|
96 |
|
97 Reader::~Reader() |
|
98 { |
|
99 reset(); |
|
100 searchIndexTable.clear(); |
|
101 } |
|
102 |
|
103 bool Reader::readIndex() |
|
104 { |
|
105 if (indexTable.contains(indexFile)) |
|
106 return true; |
|
107 |
|
108 QFile idxFile(indexFile); |
|
109 if (!idxFile.open(QFile::ReadOnly)) |
|
110 return false; |
|
111 |
|
112 QString key; |
|
113 int numOfDocs; |
|
114 EntryTable entryTable; |
|
115 QVector<Document> docs; |
|
116 QDataStream dictStream(&idxFile); |
|
117 while (!dictStream.atEnd()) { |
|
118 dictStream >> key; |
|
119 dictStream >> numOfDocs; |
|
120 docs.resize(numOfDocs); |
|
121 dictStream >> docs; |
|
122 entryTable.insert(key, new Entry(docs)); |
|
123 } |
|
124 idxFile.close(); |
|
125 |
|
126 if (entryTable.isEmpty()) |
|
127 return false; |
|
128 |
|
129 QFile docFile(documentFile); |
|
130 if (!docFile.open(QFile::ReadOnly)) |
|
131 return false; |
|
132 |
|
133 QString title, url; |
|
134 DocumentList documentList; |
|
135 QDataStream docStream(&docFile); |
|
136 while (!docStream.atEnd()) { |
|
137 docStream >> title; |
|
138 docStream >> url; |
|
139 documentList.append(QStringList(title) << url); |
|
140 } |
|
141 docFile.close(); |
|
142 |
|
143 if (documentList.isEmpty()) { |
|
144 cleanupIndex(entryTable); |
|
145 return false; |
|
146 } |
|
147 |
|
148 indexTable.insert(indexFile, Index(entryTable, documentList)); |
|
149 return true; |
|
150 } |
|
151 |
|
152 bool Reader::initCheck() const |
|
153 { |
|
154 return !searchIndexTable.isEmpty(); |
|
155 } |
|
156 |
|
157 void Reader::setIndexPath(const QString &path) |
|
158 { |
|
159 indexPath = path; |
|
160 } |
|
161 |
|
162 void Reader::filterFilesForAttributes(const QStringList &attributes) |
|
163 { |
|
164 searchIndexTable.clear(); |
|
165 for(IndexTable::ConstIterator it = indexTable.begin(); it != indexTable.end(); ++it) { |
|
166 const QString fileName = it.key(); |
|
167 bool containsAll = true; |
|
168 QStringList split = fileName.split(QLatin1String("@")); |
|
169 foreach (const QString attribute, attributes) { |
|
170 if (!split.contains(attribute, Qt::CaseInsensitive)) { |
|
171 containsAll = false; |
|
172 break; |
|
173 } |
|
174 } |
|
175 |
|
176 if (containsAll) |
|
177 searchIndexTable.insert(fileName, it.value()); |
|
178 } |
|
179 } |
|
180 |
|
181 void Reader::setIndexFile(const QString &namespaceName, const QString &attributes) |
|
182 { |
|
183 QString extention = namespaceName + QLatin1String("@") + attributes; |
|
184 indexFile = indexPath + QLatin1String("/indexdb40.") + extention; |
|
185 documentFile = indexPath + QLatin1String("/indexdoc40.") + extention; |
|
186 } |
|
187 |
|
188 bool Reader::splitSearchTerm(const QString &searchTerm, QStringList *terms, |
|
189 QStringList *termSeq, QStringList *seqWords) |
|
190 { |
|
191 QString term = searchTerm; |
|
192 |
|
193 term = term.simplified(); |
|
194 term = term.replace(QLatin1String("\'"), QLatin1String("\"")); |
|
195 term = term.replace(QLatin1String("`"), QLatin1String("\"")); |
|
196 term = term.replace(QLatin1String("-"), QLatin1String(" ")); |
|
197 term = term.replace(QRegExp(QLatin1String("\\s[\\S]?\\s")), QLatin1String(" ")); |
|
198 |
|
199 *terms = term.split(QLatin1Char(' ')); |
|
200 QStringList::iterator it = terms->begin(); |
|
201 for (; it != terms->end(); ++it) { |
|
202 (*it) = (*it).simplified(); |
|
203 (*it) = (*it).toLower(); |
|
204 (*it) = (*it).replace(QLatin1String("\""), QLatin1String("")); |
|
205 } |
|
206 |
|
207 if (term.contains(QLatin1Char('\"'))) { |
|
208 if ((term.count(QLatin1Char('\"')))%2 == 0) { |
|
209 int beg = 0; |
|
210 int end = 0; |
|
211 QString s; |
|
212 beg = term.indexOf(QLatin1Char('\"'), beg); |
|
213 while (beg != -1) { |
|
214 beg++; |
|
215 end = term.indexOf(QLatin1Char('\"'), beg); |
|
216 s = term.mid(beg, end - beg); |
|
217 s = s.toLower(); |
|
218 s = s.simplified(); |
|
219 if (s.contains(QLatin1Char('*'))) { |
|
220 qWarning("Full Text Search, using a wildcard within phrases is not allowed."); |
|
221 return false; |
|
222 } |
|
223 *seqWords += s.split(QLatin1Char(' ')); |
|
224 *termSeq << s; |
|
225 beg = term.indexOf(QLatin1Char('\"'), end + 1); |
|
226 } |
|
227 } else { |
|
228 qWarning("Full Text Search, the closing quotation mark is missing."); |
|
229 return false; |
|
230 } |
|
231 } |
|
232 |
|
233 return true; |
|
234 } |
|
235 |
|
236 void Reader::searchInIndex(const QStringList &terms) |
|
237 { |
|
238 foreach (const QString term, terms) { |
|
239 QVector<Document> documents; |
|
240 |
|
241 for(IndexTable::ConstIterator it = searchIndexTable.begin(); |
|
242 it != searchIndexTable.end(); ++it) { |
|
243 EntryTable entryTable = it.value().first; |
|
244 DocumentList documentList = it.value().second; |
|
245 |
|
246 if (term.contains(QLatin1Char('*'))) |
|
247 documents = setupDummyTerm(getWildcardTerms(term, entryTable), entryTable); |
|
248 else if (entryTable.value(term)) |
|
249 documents = entryTable.value(term)->documents; |
|
250 else |
|
251 continue; |
|
252 |
|
253 if (!documents.isEmpty()) { |
|
254 DocumentInfo info; |
|
255 QString title, url; |
|
256 QVector<DocumentInfo> documentsInfo; |
|
257 foreach(const Document doc, documents) { |
|
258 info.docNumber = doc.docNumber; |
|
259 info.frequency = doc.frequency; |
|
260 info.documentUrl = documentList.at(doc.docNumber).at(1); |
|
261 info.documentTitle = documentList.at(doc.docNumber).at(0); |
|
262 documentsInfo.append(info); |
|
263 } |
|
264 |
|
265 bool found = false; |
|
266 for(QList<TermInfo>::Iterator tit = termList.begin(); |
|
267 tit != termList.end(); ++tit) { |
|
268 TermInfo *t = &(*tit); |
|
269 if(t->term == term) { |
|
270 t->documents += documentsInfo; |
|
271 t->frequency += documentsInfo.count(); |
|
272 found = true; break; |
|
273 } |
|
274 } |
|
275 if (!found) |
|
276 termList.append(TermInfo(term, documentsInfo.count(), documentsInfo)); |
|
277 } |
|
278 } |
|
279 } |
|
280 qSort(termList); |
|
281 } |
|
282 |
|
283 QVector<DocumentInfo> Reader::hits() |
|
284 { |
|
285 QVector<DocumentInfo> documents; |
|
286 if (!termList.count()) |
|
287 return documents; |
|
288 |
|
289 documents = termList.takeFirst().documents; |
|
290 for(QList<TermInfo>::Iterator it = termList.begin(); it != termList.end(); ++it) { |
|
291 TermInfo *t = &(*it); |
|
292 QVector<DocumentInfo> docs = t->documents; |
|
293 for(QVector<DocumentInfo>::Iterator minDoc_it = documents.begin(); |
|
294 minDoc_it != documents.end(); ) { |
|
295 bool found = false; |
|
296 for (QVector<DocumentInfo>::ConstIterator doc_it = docs.constBegin(); |
|
297 doc_it != docs.constEnd(); ++doc_it ) { |
|
298 if ( (*minDoc_it).docNumber == (*doc_it).docNumber ) { |
|
299 (*minDoc_it).frequency += (*doc_it).frequency; |
|
300 found = true; |
|
301 break; |
|
302 } |
|
303 } |
|
304 if (!found) |
|
305 minDoc_it = documents.erase(minDoc_it); |
|
306 else |
|
307 ++minDoc_it; |
|
308 } |
|
309 } |
|
310 |
|
311 qSort(documents); |
|
312 return documents; |
|
313 } |
|
314 |
|
315 bool Reader::searchForPattern(const QStringList &patterns, const QStringList &words, |
|
316 const QByteArray &data) |
|
317 { |
|
318 if (data.isEmpty()) |
|
319 return false; |
|
320 |
|
321 for(QHash<QString, PosEntry*>::ConstIterator mit = |
|
322 miniIndex.begin(); mit != miniIndex.end(); ++mit) { |
|
323 delete mit.value(); |
|
324 } |
|
325 miniIndex.clear(); |
|
326 |
|
327 wordNum = 3; |
|
328 QStringList::ConstIterator cIt = words.begin(); |
|
329 for ( ; cIt != words.end(); ++cIt ) |
|
330 miniIndex.insert(*cIt, new PosEntry(0)); |
|
331 |
|
332 QTextStream s(data); |
|
333 QString text = s.readAll(); |
|
334 bool valid = true; |
|
335 const QChar *buf = text.unicode(); |
|
336 QChar str[64]; |
|
337 QChar c = buf[0]; |
|
338 int j = 0; |
|
339 int i = 0; |
|
340 while ( j < text.length() ) { |
|
341 if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) { |
|
342 valid = false; |
|
343 if ( i > 1 ) |
|
344 buildMiniIndex( QString(str,i) ); |
|
345 i = 0; |
|
346 c = buf[++j]; |
|
347 continue; |
|
348 } |
|
349 if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) { |
|
350 valid = true; |
|
351 c = buf[++j]; |
|
352 continue; |
|
353 } |
|
354 if ( !valid ) { |
|
355 c = buf[++j]; |
|
356 continue; |
|
357 } |
|
358 if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) { |
|
359 str[i] = c.toLower(); |
|
360 ++i; |
|
361 } else { |
|
362 if ( i > 1 ) |
|
363 buildMiniIndex( QString(str,i) ); |
|
364 i = 0; |
|
365 } |
|
366 c = buf[++j]; |
|
367 } |
|
368 if ( i > 1 ) |
|
369 buildMiniIndex( QString(str,i) ); |
|
370 |
|
371 QStringList::ConstIterator patIt = patterns.begin(); |
|
372 QStringList wordLst; |
|
373 QList<uint> a, b; |
|
374 QList<uint>::iterator aIt; |
|
375 for ( ; patIt != patterns.end(); ++patIt ) { |
|
376 wordLst = (*patIt).split(QLatin1Char(' ')); |
|
377 a = miniIndex[ wordLst[0] ]->positions; |
|
378 for ( int j = 1; j < (int)wordLst.count(); ++j ) { |
|
379 b = miniIndex[ wordLst[j] ]->positions; |
|
380 aIt = a.begin(); |
|
381 while ( aIt != a.end() ) { |
|
382 if ( b.contains( *aIt + 1 )) { |
|
383 (*aIt)++; |
|
384 ++aIt; |
|
385 } else { |
|
386 aIt = a.erase( aIt ); |
|
387 } |
|
388 } |
|
389 } |
|
390 } |
|
391 if ( a.count() ) |
|
392 return true; |
|
393 return false; |
|
394 } |
|
395 |
|
396 QVector<Document> Reader::setupDummyTerm(const QStringList &terms, |
|
397 const EntryTable &entryTable) |
|
398 { |
|
399 QList<Term> termList; |
|
400 for (QStringList::ConstIterator it = terms.begin(); it != terms.end(); ++it) { |
|
401 if (entryTable.value(*it)) { |
|
402 Entry *e = entryTable.value(*it); |
|
403 termList.append(Term(*it, e->documents.count(), e->documents ) ); |
|
404 } |
|
405 } |
|
406 QVector<Document> maxList(0); |
|
407 if ( !termList.count() ) |
|
408 return maxList; |
|
409 qSort(termList); |
|
410 |
|
411 maxList = termList.takeLast().documents; |
|
412 for(QList<Term>::Iterator it = termList.begin(); it != termList.end(); ++it) { |
|
413 Term *t = &(*it); |
|
414 QVector<Document> docs = t->documents; |
|
415 for (QVector<Document>::iterator docIt = docs.begin(); docIt != docs.end(); ++docIt ) { |
|
416 if ( maxList.indexOf( *docIt ) == -1 ) |
|
417 maxList.append( *docIt ); |
|
418 } |
|
419 } |
|
420 return maxList; |
|
421 } |
|
422 |
|
423 QStringList Reader::getWildcardTerms(const QString &term, |
|
424 const EntryTable &entryTable) |
|
425 { |
|
426 QStringList lst; |
|
427 QStringList terms = split(term); |
|
428 QStringList::Iterator iter; |
|
429 |
|
430 for(EntryTable::ConstIterator it = entryTable.begin(); |
|
431 it != entryTable.end(); ++it) { |
|
432 int index = 0; |
|
433 bool found = false; |
|
434 QString text( it.key() ); |
|
435 for ( iter = terms.begin(); iter != terms.end(); ++iter ) { |
|
436 if ( *iter == QLatin1String("*") ) { |
|
437 found = true; |
|
438 continue; |
|
439 } |
|
440 if ( iter == terms.begin() && (*iter)[0] != text[0] ) { |
|
441 found = false; |
|
442 break; |
|
443 } |
|
444 index = text.indexOf( *iter, index ); |
|
445 if ( *iter == terms.last() && index != (int)text.length()-1 ) { |
|
446 index = text.lastIndexOf( *iter ); |
|
447 if ( index != (int)text.length() - (int)(*iter).length() ) { |
|
448 found = false; |
|
449 break; |
|
450 } |
|
451 } |
|
452 if ( index != -1 ) { |
|
453 found = true; |
|
454 index += (*iter).length(); |
|
455 continue; |
|
456 } else { |
|
457 found = false; |
|
458 break; |
|
459 } |
|
460 } |
|
461 if (found) |
|
462 lst << text; |
|
463 } |
|
464 |
|
465 return lst; |
|
466 } |
|
467 |
|
468 void Reader::buildMiniIndex(const QString &string) |
|
469 { |
|
470 if (miniIndex[string]) |
|
471 miniIndex[string]->positions.append(wordNum); |
|
472 ++wordNum; |
|
473 } |
|
474 |
|
475 void Reader::reset() |
|
476 { |
|
477 for(IndexTable::Iterator it = indexTable.begin(); |
|
478 it != indexTable.end(); ++it) { |
|
479 cleanupIndex(it.value().first); |
|
480 it.value().second.clear(); |
|
481 } |
|
482 } |
|
483 |
|
484 void Reader::cleanupIndex(EntryTable &entryTable) |
|
485 { |
|
486 for(EntryTable::ConstIterator it = |
|
487 entryTable.begin(); it != entryTable.end(); ++it) { |
|
488 delete it.value(); |
|
489 } |
|
490 |
|
491 entryTable.clear(); |
|
492 } |
|
493 |
|
494 |
|
495 QHelpSearchIndexReaderDefault::QHelpSearchIndexReaderDefault() |
|
496 : QHelpSearchIndexReader() |
|
497 { |
|
498 // nothing todo |
|
499 } |
|
500 |
|
501 QHelpSearchIndexReaderDefault::~QHelpSearchIndexReaderDefault() |
|
502 { |
|
503 } |
|
504 |
|
505 void QHelpSearchIndexReaderDefault::run() |
|
506 { |
|
507 mutex.lock(); |
|
508 |
|
509 if (m_cancel) { |
|
510 mutex.unlock(); |
|
511 return; |
|
512 } |
|
513 |
|
514 const QList<QHelpSearchQuery> &queryList = this->m_query; |
|
515 const QLatin1String key("DefaultSearchNamespaces"); |
|
516 const QString collectionFile(this->m_collectionFile); |
|
517 const QString indexPath = m_indexFilesFolder; |
|
518 |
|
519 mutex.unlock(); |
|
520 |
|
521 QString queryTerm; |
|
522 foreach (const QHelpSearchQuery query, queryList) { |
|
523 if (query.fieldName == QHelpSearchQuery::DEFAULT) { |
|
524 queryTerm = query.wordList.at(0); |
|
525 break; |
|
526 } |
|
527 } |
|
528 |
|
529 if (queryTerm.isEmpty()) |
|
530 return; |
|
531 |
|
532 QHelpEngineCore engine(collectionFile, 0); |
|
533 if (!engine.setupData()) |
|
534 return; |
|
535 |
|
536 const QStringList registeredDocs = engine.registeredDocumentations(); |
|
537 const QStringList indexedNamespaces = engine.customValue(key).toString(). |
|
538 split(QLatin1String("|"), QString::SkipEmptyParts); |
|
539 |
|
540 emit searchingStarted(); |
|
541 |
|
542 // setup the reader |
|
543 m_reader.setIndexPath(indexPath); |
|
544 foreach(const QString namespaceName, registeredDocs) { |
|
545 mutex.lock(); |
|
546 if (m_cancel) { |
|
547 mutex.unlock(); |
|
548 searchingFinished(0); // TODO: check this ??? |
|
549 return; |
|
550 } |
|
551 mutex.unlock(); |
|
552 |
|
553 const QList<QStringList> attributeSets = |
|
554 engine.filterAttributeSets(namespaceName); |
|
555 |
|
556 foreach (QStringList attributes, attributeSets) { |
|
557 // read all index files |
|
558 m_reader.setIndexFile(namespaceName, attributes.join(QLatin1String("@"))); |
|
559 if (!m_reader.readIndex()) { |
|
560 qWarning("Full Text Search, could not read file for namespace: %s.", |
|
561 namespaceName.toUtf8().constData()); |
|
562 } |
|
563 } |
|
564 } |
|
565 |
|
566 // get the current filter attributes and minimize the index files table |
|
567 m_reader.filterFilesForAttributes(engine.filterAttributes(engine.currentFilter())); |
|
568 |
|
569 hitList.clear(); |
|
570 QStringList terms, termSeq, seqWords; |
|
571 if (m_reader.initCheck() && // check if we could read anything |
|
572 m_reader.splitSearchTerm(queryTerm, &terms, &termSeq, &seqWords) ) { |
|
573 |
|
574 // search for term(s) |
|
575 m_reader.searchInIndex(terms); // TODO: should this be interruptible as well ??? |
|
576 |
|
577 QVector<DocumentInfo> hits = m_reader.hits(); |
|
578 if (!hits.isEmpty()) { |
|
579 if (termSeq.isEmpty()) { |
|
580 foreach (const DocumentInfo docInfo, hits) { |
|
581 mutex.lock(); |
|
582 if (m_cancel) { |
|
583 mutex.unlock(); |
|
584 searchingFinished(0); // TODO: check this, speed issue while locking??? |
|
585 return; |
|
586 } |
|
587 mutex.unlock(); |
|
588 hitList.append(qMakePair(docInfo.documentTitle, docInfo.documentUrl)); |
|
589 } |
|
590 } else { |
|
591 foreach (const DocumentInfo docInfo, hits) { |
|
592 mutex.lock(); |
|
593 if (m_cancel) { |
|
594 mutex.unlock(); |
|
595 searchingFinished(0); // TODO: check this, speed issue while locking??? |
|
596 return; |
|
597 } |
|
598 mutex.unlock(); |
|
599 |
|
600 if (m_reader.searchForPattern(termSeq, seqWords, engine.fileData(docInfo.documentUrl))) // TODO: should this be interruptible as well ??? |
|
601 hitList.append(qMakePair(docInfo.documentTitle, docInfo.documentUrl)); |
|
602 } |
|
603 } |
|
604 } |
|
605 } |
|
606 |
|
607 emit searchingFinished(hitList.count()); |
|
608 } |
|
609 |
|
610 } // namespace std |
|
611 } // namespace fulltextsearch |
|
612 } // namespace qt |
|
613 |
|
614 QT_END_NAMESPACE |