tools/assistant/lib/fulltextsearch/qtokenizer.cpp
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Fri, 22 Jan 2010 10:32:13 +0200
changeset 1 ae9c8dab0e3e
parent 0 1918ee327afb
child 4 3b1da2848fc7
permissions -rw-r--r--
Revision: 201001 Kit: 201003
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     1
/****************************************************************************
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     2
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     3
** Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team.
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     4
** All rights reserved.
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     5
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     6
** Portion Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     7
** All rights reserved.
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     8
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
     9
** This file may be used under the terms of the GNU Lesser General Public
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    10
** License version 2.1 as published by the Free Software Foundation and
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    11
** appearing in the file LICENSE.LGPL included in the packaging of this file.
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    12
** Please review the following information to ensure the GNU Lesser General
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    13
** Public License version 2.1 requirements will be met:
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    14
** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    15
**
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    16
****************************************************************************/
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    17
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    18
#include "qtokenizer_p.h"
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    19
#include "qclucene_global_p.h"
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    20
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    21
#include <CLucene.h>
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    22
#include <CLucene/analysis/AnalysisHeader.h>
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    23
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    24
QT_BEGIN_NAMESPACE
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    25
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    26
QCLuceneTokenizer::QCLuceneTokenizer()
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    27
    : QCLuceneTokenStream()
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    28
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    29
    // nothing todo
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    30
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    31
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    32
QCLuceneTokenizer::QCLuceneTokenizer(const QCLuceneReader &reader)
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    33
    : QCLuceneTokenStream()
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    34
    , reader(reader)
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    35
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    36
    // nothing todo
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    37
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    38
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    39
QCLuceneTokenizer::~QCLuceneTokenizer()
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    40
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    41
    close();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    42
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    43
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    44
void QCLuceneTokenizer::close()
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    45
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    46
    d->tokenStream->close();
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    47
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    48
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    49
bool QCLuceneTokenizer::next(QCLuceneToken &token)
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    50
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    51
    return d->tokenStream->next(token.d->token);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    52
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    53
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    54
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    55
QCLuceneStandardTokenizer::QCLuceneStandardTokenizer(const QCLuceneReader &reader)
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    56
    : QCLuceneTokenizer(reader)
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    57
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    58
    d->tokenStream = 
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    59
        new lucene::analysis::standard::StandardTokenizer(reader.d->reader);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    60
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    61
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    62
QCLuceneStandardTokenizer::~QCLuceneStandardTokenizer()
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    63
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    64
    // nothing todo
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    65
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    66
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    67
bool QCLuceneStandardTokenizer::readApostrophe(const QString &string, 
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    68
                                               QCLuceneToken &token)
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    69
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    70
    lucene::analysis::standard::StandardTokenizer *stdTokenizer = 
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    71
        static_cast<lucene::analysis::standard::StandardTokenizer*> (d->tokenStream);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    72
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    73
    if (stdTokenizer == 0)
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    74
        return false;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    75
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    76
    TCHAR* value = QStringToTChar(string);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    77
    lucene::util::StringBuffer buffer(value);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    78
    bool retValue = stdTokenizer->ReadApostrophe(&buffer, token.d->token);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    79
    delete [] value;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    80
    
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    81
    return retValue;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    82
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    83
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    84
bool QCLuceneStandardTokenizer::readAt(const QString &string, QCLuceneToken &token)
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    85
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    86
    lucene::analysis::standard::StandardTokenizer *stdTokenizer = 
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    87
        static_cast<lucene::analysis::standard::StandardTokenizer*> (d->tokenStream);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    88
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    89
    if (stdTokenizer == 0)
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    90
        return false;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    91
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    92
    TCHAR* value = QStringToTChar(string);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    93
    lucene::util::StringBuffer buffer(value);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    94
    bool retValue = stdTokenizer->ReadAt(&buffer, token.d->token);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    95
    delete [] value;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    96
    
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    97
    return retValue;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    98
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
    99
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   100
bool QCLuceneStandardTokenizer::readCompany(const QString &string, 
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   101
                                            QCLuceneToken &token)
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   102
{
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   103
    lucene::analysis::standard::StandardTokenizer *stdTokenizer = 
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   104
        static_cast<lucene::analysis::standard::StandardTokenizer*> (d->tokenStream);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   105
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   106
    if (stdTokenizer == 0)
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   107
        return false;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   108
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   109
    TCHAR* value = QStringToTChar(string);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   110
    lucene::util::StringBuffer buffer(value);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   111
    bool retValue = stdTokenizer->ReadCompany(&buffer, token.d->token);
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   112
    delete [] value;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   113
    
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   114
    return retValue;
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   115
}
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   116
1918ee327afb Revision: 200952
Alex Gilkes <alex.gilkes@nokia.com>
parents:
diff changeset
   117
QT_END_NAMESPACE