tools/assistant/compat/index.cpp
author Eckhart Koeppen <eckhart.koppen@nokia.com>
Thu, 08 Apr 2010 14:19:33 +0300
branchRCL_3
changeset 7 3f74d0d4af4c
parent 4 3b1da2848fc7
permissions -rw-r--r--
qt:70947f0f93d948bc89b3b43d00da758a51f1ef84
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     1
/****************************************************************************
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     2
**
4
3b1da2848fc7 Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     3
** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
0
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     4
** All rights reserved.
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     5
** Contact: Nokia Corporation (qt-info@nokia.com)
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     6
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     7
** This file is part of the Qt Assistant of the Qt Toolkit.
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     8
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     9
** $QT_BEGIN_LICENSE:LGPL$
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    10
** No Commercial Usage
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    11
** This file contains pre-release code and may not be distributed.
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    12
** You may use this file in accordance with the terms and conditions
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    13
** contained in the Technology Preview License Agreement accompanying
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    14
** this package.
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    15
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    16
** GNU Lesser General Public License Usage
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    17
** Alternatively, this file may be used under the terms of the GNU Lesser
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    18
** General Public License version 2.1 as published by the Free Software
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    19
** Foundation and appearing in the file LICENSE.LGPL included in the
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    20
** packaging of this file.  Please review the following information to
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    21
** ensure the GNU Lesser General Public License version 2.1 requirements
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    22
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    23
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    24
** In addition, as a special exception, Nokia gives you certain additional
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    25
** rights.  These rights are described in the Nokia Qt LGPL Exception
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    26
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    27
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    28
** If you have questions regarding the use of this file, please contact
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    29
** Nokia at qt-info@nokia.com.
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    30
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    31
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    32
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    33
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    34
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    35
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    36
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    37
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    38
** $QT_END_LICENSE$
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    39
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    40
****************************************************************************/
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    41
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    42
#include "index.h"
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    43
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    44
#include <QFile>
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    45
#include <QDir>
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    46
#include <QStringList>
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    47
#include <QApplication>
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    48
#include <QByteArray>
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    49
#include <QTextStream>
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    50
#include <QtAlgorithms>
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    51
#include <QUrl>
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    52
#include <QTextCodec>
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    53
#include <ctype.h>
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    54
#include <QTextDocument>
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    55
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    56
QT_BEGIN_NAMESPACE
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    57
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    58
struct Term {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    59
    Term() : frequency(-1) {}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    60
    Term( const QString &t, int f, QVector<Document> l ) : term( t ), frequency( f ), documents( l ) {}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    61
    QString term;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    62
    int frequency;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    63
    QVector<Document>documents;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    64
    bool operator<( const Term &i2 ) const { return frequency < i2.frequency; }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    65
};
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    66
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    67
QDataStream &operator>>( QDataStream &s, Document &l )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    68
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    69
    s >> l.docNumber;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    70
    s >> l.frequency;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    71
    return s;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    72
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    73
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    74
QDataStream &operator<<( QDataStream &s, const Document &l )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    75
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    76
    s << (qint16)l.docNumber;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    77
    s << (qint16)l.frequency;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    78
    return s;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    79
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    80
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    81
Index::Index( const QString &dp, const QString &hp )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    82
    : QObject( 0 ), docPath( dp )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    83
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    84
    Q_UNUSED(hp);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    85
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    86
    alreadyHaveDocList = false;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    87
    lastWindowClosed = false;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    88
    connect( qApp, SIGNAL(lastWindowClosed()),
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    89
             this, SLOT(setLastWinClosed()) );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    90
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    91
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    92
Index::Index( const QStringList &dl, const QString &hp )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    93
    : QObject( 0 )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    94
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    95
    Q_UNUSED(hp);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    96
    docList = dl;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    97
    alreadyHaveDocList = true;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    98
    lastWindowClosed = false;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    99
    connect( qApp, SIGNAL(lastWindowClosed()),
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   100
             this, SLOT(setLastWinClosed()) );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   101
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   102
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   103
void Index::setLastWinClosed()
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   104
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   105
    lastWindowClosed = true;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   106
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   107
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   108
void Index::setDictionaryFile( const QString &f )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   109
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   110
    dictFile = f;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   111
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   112
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   113
void Index::setDocListFile( const QString &f )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   114
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   115
    docListFile = f;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   116
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   117
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   118
void Index::setDocList( const QStringList &lst )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   119
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   120
    docList = lst;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   121
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   122
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   123
int Index::makeIndex()
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   124
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   125
    if ( !alreadyHaveDocList )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   126
        setupDocumentList();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   127
    if ( docList.isEmpty() )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   128
        return 1;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   129
    QStringList::Iterator it = docList.begin();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   130
    int steps = docList.count() / 100;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   131
    if ( !steps )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   132
        steps++;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   133
    int prog = 0;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   134
    for ( int i = 0; it != docList.end(); ++it, ++i ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   135
        if ( lastWindowClosed ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   136
            return -1;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   137
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   138
        QUrl url(*it);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   139
        parseDocument( url.toLocalFile(), i );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   140
        if ( i%steps == 0 ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   141
            prog++;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   142
            emit indexingProgress( prog );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   143
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   144
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   145
    return 0;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   146
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   147
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   148
void Index::setupDocumentList()
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   149
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   150
    QDir d( docPath );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   151
    QStringList filters;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   152
    filters.append(QLatin1String("*.html"));
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   153
    QStringList lst = d.entryList(filters);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   154
    QStringList::ConstIterator it = lst.constBegin();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   155
    for ( ; it != lst.constEnd(); ++it )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   156
        docList.append( QLatin1String("file:") + docPath + QLatin1String("/") + *it );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   157
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   158
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   159
void Index::insertInDict( const QString &str, int docNum )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   160
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   161
    if ( str == QLatin1String("amp") || str == QLatin1String("nbsp"))
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   162
        return;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   163
    Entry *e = 0;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   164
    if ( dict.count() )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   165
        e = dict[ str ];
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   166
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   167
    if ( e ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   168
        if ( e->documents.last().docNumber != docNum )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   169
            e->documents.append( Document(docNum, 1 ) );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   170
        else
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   171
            e->documents.last().frequency++;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   172
    } else {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   173
        dict.insert( str, new Entry( docNum ) );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   174
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   175
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   176
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   177
QString Index::getCharsetForDocument(QFile *file)
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   178
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   179
    QTextStream s(file);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   180
    QString contents = s.readAll();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   181
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   182
    QString encoding;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   183
    int start = contents.indexOf(QLatin1String("<meta"), 0, Qt::CaseInsensitive);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   184
    if (start > 0) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   185
        int end = contents.indexOf(QLatin1String(">"), start);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   186
        QString meta = contents.mid(start+5, end-start);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   187
        meta = meta.toLower();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   188
        QRegExp r(QLatin1String("charset=([^\"\\s]+)"));
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   189
        if (r.indexIn(meta) != -1) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   190
            encoding = r.cap(1);        
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   191
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   192
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   193
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   194
    file->seek(0);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   195
    if (encoding.isEmpty())
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   196
        return QLatin1String("utf-8");
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   197
    return encoding;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   198
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   199
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   200
void Index::parseDocument( const QString &filename, int docNum )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   201
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   202
    QFile file( filename );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   203
    if ( !file.open(QFile::ReadOnly) ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   204
        qWarning( "can not open file %s", qPrintable(filename) );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   205
        return;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   206
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   207
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   208
    QTextStream s(&file);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   209
    QString en = getCharsetForDocument(&file);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   210
    s.setCodec(QTextCodec::codecForName(en.toLatin1().constData()));
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   211
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   212
    QString text = s.readAll();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   213
    if (text.isNull())
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   214
        return;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   215
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   216
    bool valid = true;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   217
    const QChar *buf = text.unicode();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   218
    QChar str[64];
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   219
    QChar c = buf[0];
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   220
    int j = 0;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   221
    int i = 0;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   222
    while ( j < text.length() ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   223
        if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   224
            valid = false;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   225
            if ( i > 1 )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   226
                insertInDict( QString(str,i), docNum );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   227
            i = 0;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   228
            c = buf[++j];
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   229
            continue;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   230
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   231
        if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   232
            valid = true;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   233
            c = buf[++j];
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   234
            continue;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   235
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   236
        if ( !valid ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   237
            c = buf[++j];
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   238
            continue;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   239
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   240
        if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   241
            str[i] = c.toLower();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   242
            ++i;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   243
        } else {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   244
            if ( i > 1 )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   245
                insertInDict( QString(str,i), docNum );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   246
            i = 0;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   247
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   248
        c = buf[++j];
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   249
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   250
    if ( i > 1 )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   251
        insertInDict( QString(str,i), docNum );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   252
    file.close();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   253
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   254
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   255
void Index::writeDict()
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   256
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   257
    QFile f( dictFile );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   258
    if ( !f.open(QFile::WriteOnly ) )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   259
        return;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   260
    QDataStream s( &f );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   261
    for(QHash<QString, Entry *>::Iterator it = dict.begin(); it != dict.end(); ++it) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   262
        s << it.key();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   263
        s << it.value()->documents.count();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   264
        s << it.value()->documents;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   265
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   266
    f.close();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   267
    writeDocumentList();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   268
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   269
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   270
void Index::writeDocumentList()
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   271
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   272
    QFile f( docListFile );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   273
    if ( !f.open(QFile::WriteOnly ) )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   274
        return;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   275
    QDataStream s( &f );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   276
    s << docList;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   277
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   278
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   279
void Index::readDict()
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   280
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   281
    QFile f( dictFile );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   282
    if ( !f.open(QFile::ReadOnly ) )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   283
        return;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   284
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   285
    dict.clear();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   286
    QDataStream s( &f );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   287
    QString key;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   288
    int numOfDocs;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   289
    QVector<Document> docs;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   290
    while ( !s.atEnd() ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   291
        s >> key;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   292
        s >> numOfDocs;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   293
        docs.resize(numOfDocs);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   294
        s >> docs;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   295
        dict.insert( key, new Entry( docs ) );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   296
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   297
    f.close();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   298
    readDocumentList();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   299
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   300
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   301
void Index::readDocumentList()
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   302
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   303
    QFile f( docListFile );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   304
    if ( !f.open(QFile::ReadOnly ) )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   305
        return;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   306
    QDataStream s( &f );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   307
    s >> docList;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   308
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   309
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   310
QStringList Index::query( const QStringList &terms, const QStringList &termSeq, const QStringList &seqWords )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   311
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   312
    QList<Term> termList;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   313
    for (QStringList::ConstIterator it = terms.begin(); it != terms.end(); ++it ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   314
        Entry *e = 0;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   315
        if ( (*it).contains(QLatin1Char('*')) ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   316
            QVector<Document> wcts = setupDummyTerm( getWildcardTerms( *it ) );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   317
            termList.append( Term(QLatin1String("dummy"), wcts.count(), wcts ) );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   318
        } else if ( dict[ *it ] ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   319
            e = dict[ *it ];
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   320
            termList.append( Term( *it, e->documents.count(), e->documents ) );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   321
        } else {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   322
            return QStringList();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   323
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   324
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   325
    if ( !termList.count() )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   326
        return QStringList();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   327
    qSort(termList);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   328
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   329
    QVector<Document> minDocs = termList.takeFirst().documents;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   330
    for(QList<Term>::Iterator it = termList.begin(); it != termList.end(); ++it) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   331
        Term *t = &(*it);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   332
        QVector<Document> docs = t->documents;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   333
        for(QVector<Document>::Iterator minDoc_it = minDocs.begin(); minDoc_it != minDocs.end(); ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   334
            bool found = false;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   335
            for (QVector<Document>::ConstIterator doc_it = docs.constBegin(); doc_it != docs.constEnd(); ++doc_it ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   336
                if ( (*minDoc_it).docNumber == (*doc_it).docNumber ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   337
                    (*minDoc_it).frequency += (*doc_it).frequency;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   338
                    found = true;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   339
                    break;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   340
                }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   341
            }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   342
            if ( !found )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   343
                minDoc_it = minDocs.erase( minDoc_it );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   344
            else
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   345
                ++minDoc_it;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   346
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   347
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   348
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   349
    QStringList results;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   350
    qSort( minDocs );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   351
    if ( termSeq.isEmpty() ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   352
        for(QVector<Document>::Iterator it = minDocs.begin(); it != minDocs.end(); ++it)
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   353
            results << docList.at((int)(*it).docNumber);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   354
        return results;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   355
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   356
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   357
    QString fileName;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   358
    for(QVector<Document>::Iterator it = minDocs.begin(); it != minDocs.end(); ++it) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   359
        fileName =  docList[ (int)(*it).docNumber ];
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   360
        if ( searchForPattern( termSeq, seqWords, fileName ) )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   361
            results << fileName;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   362
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   363
    return results;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   364
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   365
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   366
QString Index::getDocumentTitle( const QString &fullFileName )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   367
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   368
    QUrl url(fullFileName);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   369
    QString fileName = url.toLocalFile();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   370
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   371
    if (documentTitleCache.contains(fileName))
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   372
        return documentTitleCache.value(fileName);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   373
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   374
    QFile file( fileName );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   375
    if ( !file.open( QFile::ReadOnly ) ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   376
        qWarning( "cannot open file %s", qPrintable(fileName) );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   377
        return fileName;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   378
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   379
    QTextStream s( &file );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   380
    QString text = s.readAll();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   381
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   382
    int start = text.indexOf(QLatin1String("<title>"), 0, Qt::CaseInsensitive) + 7;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   383
    int end = text.indexOf(QLatin1String("</title>"), 0, Qt::CaseInsensitive);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   384
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   385
    QString title = tr("Untitled");
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   386
    if (end - start > 0) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   387
        title = text.mid(start, end - start);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   388
        if (Qt::mightBeRichText(title)) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   389
            QTextDocument doc;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   390
            doc.setHtml(title);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   391
            title = doc.toPlainText();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   392
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   393
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   394
    documentTitleCache.insert(fileName, title);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   395
    return title;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   396
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   397
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   398
QStringList Index::getWildcardTerms( const QString &term )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   399
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   400
    QStringList lst;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   401
    QStringList terms = split( term );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   402
    QStringList::Iterator iter;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   403
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   404
    for(QHash<QString, Entry*>::Iterator it = dict.begin(); it != dict.end(); ++it) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   405
        int index = 0;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   406
        bool found = false;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   407
        QString text( it.key() );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   408
        for ( iter = terms.begin(); iter != terms.end(); ++iter ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   409
            if ( *iter == QLatin1String("*") ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   410
                found = true;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   411
                continue;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   412
            }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   413
            if ( iter == terms.begin() && (*iter)[0] != text[0] ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   414
                found = false;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   415
                break;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   416
            }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   417
            index = text.indexOf( *iter, index );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   418
            if ( *iter == terms.last() && index != (int)text.length()-1 ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   419
                index = text.lastIndexOf( *iter );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   420
                if ( index != (int)text.length() - (int)(*iter).length() ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   421
                    found = false;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   422
                    break;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   423
                }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   424
            }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   425
            if ( index != -1 ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   426
                found = true;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   427
                index += (*iter).length();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   428
                continue;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   429
            } else {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   430
                found = false;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   431
                break;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   432
            }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   433
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   434
        if ( found )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   435
            lst << text;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   436
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   437
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   438
    return lst;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   439
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   440
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   441
QStringList Index::split( const QString &str )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   442
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   443
    QStringList lst;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   444
    int j = 0;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   445
    int i = str.indexOf(QLatin1Char('*'), j );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   446
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   447
    if (str.startsWith(QLatin1String("*")))
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   448
        lst << QLatin1String("*");
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   449
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   450
    while ( i != -1 ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   451
        if ( i > j && i <= (int)str.length() ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   452
            lst << str.mid( j, i - j );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   453
            lst << QLatin1String("*");
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   454
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   455
        j = i + 1;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   456
        i = str.indexOf(QLatin1Char('*'), j );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   457
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   458
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   459
    int l = str.length() - 1;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   460
    if ( str.mid( j, l - j + 1 ).length() > 0 )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   461
        lst << str.mid( j, l - j + 1 );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   462
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   463
    return lst;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   464
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   465
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   466
QVector<Document> Index::setupDummyTerm( const QStringList &terms )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   467
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   468
    QList<Term> termList;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   469
    for (QStringList::ConstIterator it = terms.begin(); it != terms.end(); ++it) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   470
        Entry *e = 0;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   471
        if ( dict[ *it ] ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   472
            e = dict[ *it ];
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   473
            termList.append( Term( *it, e->documents.count(), e->documents ) );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   474
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   475
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   476
    QVector<Document> maxList(0);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   477
    if ( !termList.count() )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   478
        return maxList;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   479
    qSort(termList);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   480
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   481
    maxList = termList.takeLast().documents;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   482
    for(QList<Term>::Iterator it = termList.begin(); it != termList.end(); ++it) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   483
        Term *t = &(*it);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   484
        QVector<Document> docs = t->documents;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   485
        for (QVector<Document>::iterator docIt = docs.begin(); docIt != docs.end(); ++docIt ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   486
            if ( maxList.indexOf( *docIt ) == -1 )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   487
                maxList.append( *docIt );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   488
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   489
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   490
    return maxList;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   491
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   492
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   493
void Index::buildMiniDict( const QString &str )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   494
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   495
    if ( miniDict[ str ] )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   496
        miniDict[ str ]->positions.append( wordNum );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   497
    ++wordNum;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   498
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   499
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   500
bool Index::searchForPattern( const QStringList &patterns, const QStringList &words, const QString &fileName )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   501
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   502
    QUrl url(fileName);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   503
    QString fName = url.toLocalFile();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   504
    QFile file( fName );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   505
    if ( !file.open( QFile::ReadOnly ) ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   506
        qWarning( "cannot open file %s", qPrintable(fName) );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   507
        return false;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   508
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   509
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   510
    wordNum = 3;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   511
    miniDict.clear();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   512
    QStringList::ConstIterator cIt = words.begin();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   513
    for ( ; cIt != words.end(); ++cIt )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   514
        miniDict.insert( *cIt, new PosEntry( 0 ) );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   515
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   516
    QTextStream s( &file );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   517
    QString text = s.readAll();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   518
    bool valid = true;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   519
    const QChar *buf = text.unicode();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   520
    QChar str[64];
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   521
    QChar c = buf[0];
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   522
    int j = 0;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   523
    int i = 0;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   524
    while ( j < text.length() ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   525
        if ( c == QLatin1Char('<') || c == QLatin1Char('&') ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   526
            valid = false;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   527
            if ( i > 1 )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   528
                buildMiniDict( QString(str,i) );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   529
            i = 0;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   530
            c = buf[++j];
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   531
            continue;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   532
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   533
        if ( ( c == QLatin1Char('>') || c == QLatin1Char(';') ) && !valid ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   534
            valid = true;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   535
            c = buf[++j];
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   536
            continue;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   537
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   538
        if ( !valid ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   539
            c = buf[++j];
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   540
            continue;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   541
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   542
        if ( ( c.isLetterOrNumber() || c == QLatin1Char('_') ) && i < 63 ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   543
            str[i] = c.toLower();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   544
            ++i;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   545
        } else {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   546
            if ( i > 1 )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   547
                buildMiniDict( QString(str,i) );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   548
            i = 0;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   549
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   550
        c = buf[++j];
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   551
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   552
    if ( i > 1 )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   553
        buildMiniDict( QString(str,i) );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   554
    file.close();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   555
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   556
    QStringList::ConstIterator patIt = patterns.begin();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   557
    QStringList wordLst;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   558
    QList<uint> a, b;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   559
    QList<uint>::iterator aIt;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   560
    for ( ; patIt != patterns.end(); ++patIt ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   561
        wordLst = (*patIt).split(QLatin1Char(' '));
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   562
        a = miniDict[ wordLst[0] ]->positions;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   563
        for ( int j = 1; j < (int)wordLst.count(); ++j ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   564
            b = miniDict[ wordLst[j] ]->positions;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   565
            aIt = a.begin();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   566
            while ( aIt != a.end() ) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   567
                if ( b.contains( *aIt + 1 )) {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   568
                    (*aIt)++;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   569
                    ++aIt;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   570
                } else {
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   571
                    aIt = a.erase( aIt );
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   572
                }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   573
            }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   574
        }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   575
    }
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   576
    if ( a.count() )
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   577
        return true;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   578
    return false;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   579
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   580
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   581
QT_END_NAMESPACE