searchengine/oss/loc/analysisunittest/src/tinyanalysis.cpp
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Tue, 06 Jul 2010 15:30:04 +0300
changeset 10 afe194b6b1cd
permissions -rw-r--r--
Revision: 201025 Kit: 2010127
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
10
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     1
/*
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     2
* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     3
* All rights reserved.
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     4
* This component and the accompanying materials are made available
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     5
* under the terms of "Eclipse Public License v1.0"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     6
* which accompanies this distribution, and is available
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     8
*
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     9
* Initial Contributors:
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    10
* Nokia Corporation - initial contribution.
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    11
*
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    12
* Contributors:
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    13
*
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    14
* Description: 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    15
*
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    16
*/
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    17
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    18
#include "tinyanalysis.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    19
#include "tinyanalysis.inl"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    20
#include "tinyunicode.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    21
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    22
#include "itk.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    23
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    24
#include <iostream>
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    25
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    26
#include "CLucene.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    27
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    28
#include "wchar.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    29
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    30
#include "analysisunittest.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    31
#include "evaluationtool.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    32
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    33
using namespace evaluationtool; 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    34
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    35
template <class T>
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    36
void TestLetters(Itk::TestMgr* testMgr, T text) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    37
	using namespace analysis::tiny;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    38
	
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    39
	CustomTokenizer<T> letters(iswalpha);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    40
	RelaxedTokenizer<T> tokens(letters);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    41
	
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    42
	Token<T> t; 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    43
	while (t = tokens.consume(text)) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    44
        wchar_t buf[256];
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    45
        t.utf16(buf);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    46
        wprintf(L"\"%S\" ", buf);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    47
	}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    48
	wprintf(L"\n");
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    49
}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    50
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    51
int isnotspace(int c) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    52
    return !iswspace(c);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    53
}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    54
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    55
template <class T>
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    56
void TestNGram(Itk::TestMgr* testMgr, T text) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    57
	using namespace analysis::tiny;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    58
	
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    59
	NGramTokenizer<T> ngram(2, isnotspace);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    60
	RelaxedTokenizer<T> tokens(ngram);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    61
	
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    62
	Token<T> t; 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    63
	while (t = tokens.consume(text)) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    64
        wchar_t buf[256];
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    65
        t.utf16(buf);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    66
        wprintf(L"\"%S\" ", buf);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    67
	}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    68
    wprintf(L"\n");
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    69
}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    70
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    71
void TinyWcharTest(Itk::TestMgr* testMgr) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    72
	TestLetters(testMgr, L"foo bar foobar foo*bar foo_bar");
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    73
	TestNGram(testMgr, L"foo bar foobar foo*bar foo_bar");
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    74
}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    75
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    76
void TinyReaderTest(Itk::TestMgr* testMgr) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    77
	{
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    78
		lucene::util::StringReader reader(L"foo bar foobar foo*bar foo_bar");
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    79
		analysis::tiny::cl::ReaderBuffer<8> buf(reader);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    80
		TestLetters(testMgr, buf.begin());
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    81
	}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    82
	{
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    83
		lucene::util::StringReader reader(L"foo bar foobar foo*bar foo_bar");
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    84
		analysis::tiny::cl::ReaderBuffer<8> buf(reader);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    85
		TestNGram(testMgr, buf.begin());
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    86
	}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    87
}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    88
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    89
void TinyChinaTest(Itk::TestMgr* testMgr) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    90
    using namespace analysis::tiny;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    91
    Corpus corpus(CHINESE_PRC_TEXTCORPUS); 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    92
    typedef cl::ReaderBuffer<64> buffer;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    93
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    94
    for (int i = 0; i < corpus.size(); i++) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    95
        lucene::util::StringReader reader(corpus[i]);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    96
        buffer buf(reader);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    97
        TestNGram( testMgr, Utf16Iterator<buffer::iterator>( buf.begin() ) );
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    98
    }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    99
}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   100
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   101
void TinyUtf16Test(Itk::TestMgr* testMgr) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   102
    using namespace analysis::tiny;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   103
    Corpus corpus(CHINESE_PRC_TEXTCORPUS); 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   104
    typedef cl::ReaderBuffer<512> buffer;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   105
    typedef Utf16Iterator<buffer::iterator> u16iter;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   106
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   107
    for (int i = 0; i < corpus.size(); i++) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   108
        {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   109
            lucene::util::StringReader reader(corpus[i]);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   110
            buffer buf(reader);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   111
            {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   112
                u16iter i( buf.begin() );
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   113
                for (; *i; ++i) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   114
                    int c = *i;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   115
                    wcout<<(void*)c<<L" ";
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   116
                }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   117
            }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   118
        }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   119
        wcout<<endl;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   120
        {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   121
            wchar_t c[512];
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   122
            {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   123
                lucene::util::StringReader reader(corpus[i]);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   124
                buffer buf(reader);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   125
    
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   126
                buffer::iterator j = buf.begin();
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   127
                {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   128
                    int i;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   129
                    for (i = 0; *j; i++, ++j) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   130
                        c[i] = *j;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   131
                    }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   132
                    c[i] = '\0';
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   133
                }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   134
            }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   135
            lucene::util::StringReader reader(corpus[i]);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   136
            buffer buf(reader);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   137
            u16iter i( buf.begin() );
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   138
            wchar_t b[512];
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   139
            wcout<<flush;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   140
            Utf16Writer<wchar_t*>(b)<<i<<L'\0';
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   141
            wprintf(L"%S\n", b);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   142
            fflush(stdout);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   143
            for (int k = 0; c[k] || b[k]; k++) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   144
                if (c[k] != b[k]) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   145
                    wcout<<"x";
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   146
                } else {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   147
                    wcout<<".";
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   148
                }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   149
            }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   150
        }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   151
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   152
        wcout<<endl;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   153
    }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   154
}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   155
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   156
void TinyJamuTest(Itk::TestMgr* testMgr) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   157
    using namespace analysis::tiny;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   158
    Corpus corpus(KOREAN_TEXTCORPUS);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   159
    
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   160
    typedef cl::ReaderBuffer<512> buffer;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   161
    typedef Utf16Iterator<buffer::iterator> u16iter;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   162
    typedef JamuIterator<u16iter> iter;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   163
    
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   164
    for (int line = 0; line < corpus.size(); line++) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   165
        lucene::util::StringReader reader(corpus[line]);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   166
        buffer buf(reader);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   167
        iter i(u16iter(buf.begin()));
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   168
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   169
        printf("%S\n", utf16str(i).c_str());
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   170
    }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   171
}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   172
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   173
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   174
void TinyHangulTest(Itk::TestMgr* testMgr) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   175
    using namespace analysis::tiny;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   176
    Corpus corpus(KOREAN_TEXTCORPUS);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   177
    
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   178
    typedef cl::ReaderBuffer<512> buffer;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   179
    typedef Utf16Iterator<buffer::iterator> u16iter;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   180
    typedef HangulIterator<u16iter> iter;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   181
    
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   182
    for (int line = 0; line < corpus.size(); line++) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   183
        lucene::util::StringReader reader(corpus[line]);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   184
        buffer buf(reader);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   185
        iter i(u16iter(buf.begin()));
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   186
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   187
        printf("%S\n", utf16str(i).c_str());
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   188
    }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   189
}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   190
Itk::TesterBase * CreateTinyAnalysisUnitTest() 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   191
{
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   192
	using namespace Itk;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   193
	
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   194
	SuiteTester
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   195
		* testSuite = 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   196
			new SuiteTester( "tiny" );
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   197
	
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   198
	testSuite->add( "wchar", TinyWcharTest, "wchar" );
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   199
	testSuite->add( "reader", TinyReaderTest, "reader" );
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   200
    testSuite->add( "cn", TinyChinaTest, "cn" );
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   201
    testSuite->add( "utf16", TinyUtf16Test, "utf16" );
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   202
    testSuite->add( "jamu", TinyJamuTest, "jamu" );
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   203
    testSuite->add( "hangul", TinyHangulTest, "hangul" );
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   204
    
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   205
	return testSuite;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   206
}