util/tools/linguist/shared/simtexth.h
author eckhart.koppen@nokia.com
Wed, 31 Mar 2010 11:06:36 +0300
changeset 7 f7bc934e204c
permissions -rw-r--r--
5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
7
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
     1
/****************************************************************************
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
     2
**
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
     3
** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
     4
** All rights reserved.
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
     5
** Contact: Nokia Corporation (qt-info@nokia.com)
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
     6
**
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
     7
** This file is part of the Qt Linguist of the Qt Toolkit.
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
     8
**
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
     9
** $QT_BEGIN_LICENSE:LGPL$
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    10
** No Commercial Usage
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    11
** This file contains pre-release code and may not be distributed.
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    12
** You may use this file in accordance with the terms and conditions
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    13
** contained in the Technology Preview License Agreement accompanying
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    14
** this package.
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    15
**
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    16
** GNU Lesser General Public License Usage
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    17
** Alternatively, this file may be used under the terms of the GNU Lesser
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    18
** General Public License version 2.1 as published by the Free Software
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    19
** Foundation and appearing in the file LICENSE.LGPL included in the
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    20
** packaging of this file.  Please review the following information to
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    21
** ensure the GNU Lesser General Public License version 2.1 requirements
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    22
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    23
**
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    24
** In addition, as a special exception, Nokia gives you certain additional
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    25
** rights.  These rights are described in the Nokia Qt LGPL Exception
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    26
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    27
**
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    28
** If you have questions regarding the use of this file, please contact
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    29
** Nokia at qt-info@nokia.com.
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    30
**
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    31
**
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    32
**
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    33
**
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    34
**
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    35
**
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    36
**
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    37
**
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    38
** $QT_END_LICENSE$
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    39
**
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    40
****************************************************************************/
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    41
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    42
#ifndef SIMTEXTH_H
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    43
#define SIMTEXTH_H
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    44
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    45
const int textSimilarityThreshold = 190;
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    46
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    47
#include <QString>
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    48
#include <QList>
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    49
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    50
QT_BEGIN_NAMESPACE
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    51
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    52
class Translator;
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    53
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    54
struct Candidate
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    55
{
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    56
    Candidate() {}
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    57
    Candidate(const QString& source0, const QString &target0)
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    58
	: source(source0), target(target0)
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    59
    {}
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    60
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    61
    QString source;
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    62
    QString target;
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    63
};
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    64
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    65
inline bool operator==( const Candidate& c, const Candidate& d ) {
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    66
    return c.target == d.target && c.source == d.source;
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    67
}
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    68
inline bool operator!=( const Candidate& c, const Candidate& d ) {
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    69
    return !operator==( c, d );
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    70
}
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    71
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    72
typedef QList<Candidate> CandidateList;
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    73
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    74
struct CoMatrix;
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    75
/**
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    76
 * This class is more efficient for searching through a large array of candidate strings, since we only
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    77
 * have to construct the CoMatrix for the \a stringToMatch once,
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    78
 * after that we just call getSimilarityScore(strCandidate).
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    79
 * \sa getSimilarityScore
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    80
 */
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    81
class StringSimilarityMatcher {
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    82
public:
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    83
    StringSimilarityMatcher(const QString &stringToMatch);
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    84
    ~StringSimilarityMatcher();
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    85
    int getSimilarityScore(const QString &strCandidate);
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    86
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    87
private:
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    88
    CoMatrix *m_cm;
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    89
    int m_length;
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    90
};
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    91
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    92
int getSimilarityScore(const QString &str1, const QString &str2);
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    93
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    94
CandidateList similarTextHeuristicCandidates( const Translator *tor,
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    95
					      const QString &text,
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    96
					      int maxCandidates );
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    97
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    98
QT_END_NAMESPACE
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
    99
f7bc934e204c 5cabc75a39ca2f064f70b40f72ed93c74c4dc19b
eckhart.koppen@nokia.com
parents:
diff changeset
   100
#endif