diff -r d4d56f5e7c55 -r 65456528cac2 searchengine/oss/loc/analysisunittest/src/tinyanalysis.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/oss/loc/analysisunittest/src/tinyanalysis.cpp Fri Oct 15 12:09:28 2010 +0530 @@ -0,0 +1,206 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + +#include "tinyanalysis.h" +#include "tinyanalysis.inl" +#include "tinyunicode.h" + +#include "itk.h" + +#include + +#include "CLucene.h" + +#include "wchar.h" + +#include "analysisunittest.h" +#include "evaluationtool.h" + +using namespace evaluationtool; + +template +void TestLetters(Itk::TestMgr* testMgr, T text) { + using namespace analysis::tiny; + + CustomTokenizer letters(iswalpha); + RelaxedTokenizer tokens(letters); + + Token t; + while (t = tokens.consume(text)) { + wchar_t buf[256]; + t.utf16(buf); + wprintf(L"\"%S\" ", buf); + } + wprintf(L"\n"); +} + +int isnotspace(int c) { + return !iswspace(c); +} + +template +void TestNGram(Itk::TestMgr* testMgr, T text) { + using namespace analysis::tiny; + + NGramTokenizer ngram(2, isnotspace); + RelaxedTokenizer tokens(ngram); + + Token t; + while (t = tokens.consume(text)) { + wchar_t buf[256]; + t.utf16(buf); + wprintf(L"\"%S\" ", buf); + } + wprintf(L"\n"); +} + +void TinyWcharTest(Itk::TestMgr* testMgr) { + TestLetters(testMgr, L"foo bar foobar foo*bar foo_bar"); + TestNGram(testMgr, L"foo bar foobar foo*bar foo_bar"); +} + +void TinyReaderTest(Itk::TestMgr* testMgr) { + { + lucene::util::StringReader reader(L"foo bar foobar foo*bar foo_bar"); + analysis::tiny::cl::ReaderBuffer<8> buf(reader); + TestLetters(testMgr, buf.begin()); + } + { + lucene::util::StringReader reader(L"foo bar foobar foo*bar foo_bar"); + analysis::tiny::cl::ReaderBuffer<8> buf(reader); + TestNGram(testMgr, buf.begin()); + } +} + +void TinyChinaTest(Itk::TestMgr* testMgr) { + using namespace analysis::tiny; + Corpus corpus(CHINESE_PRC_TEXTCORPUS); + typedef cl::ReaderBuffer<64> buffer; + + for (int i = 0; i < corpus.size(); i++) { + lucene::util::StringReader reader(corpus[i]); + buffer buf(reader); + TestNGram( testMgr, Utf16Iterator( buf.begin() ) ); + } +} + +void TinyUtf16Test(Itk::TestMgr* testMgr) { + using namespace analysis::tiny; + Corpus corpus(CHINESE_PRC_TEXTCORPUS); + typedef cl::ReaderBuffer<512> buffer; + typedef Utf16Iterator u16iter; + + for (int i = 0; i < corpus.size(); i++) { + { + lucene::util::StringReader reader(corpus[i]); + buffer buf(reader); + { + u16iter i( buf.begin() ); + for (; *i; ++i) { + int c = *i; + wcout<<(void*)c<(b)< buffer; + typedef Utf16Iterator u16iter; + typedef JamuIterator iter; + + for (int line = 0; line < corpus.size(); line++) { + lucene::util::StringReader reader(corpus[line]); + buffer buf(reader); + iter i(u16iter(buf.begin())); + + printf("%S\n", utf16str(i).c_str()); + } +} + + +void TinyHangulTest(Itk::TestMgr* testMgr) { + using namespace analysis::tiny; + Corpus corpus(KOREAN_TEXTCORPUS); + + typedef cl::ReaderBuffer<512> buffer; + typedef Utf16Iterator u16iter; + typedef HangulIterator iter; + + for (int line = 0; line < corpus.size(); line++) { + lucene::util::StringReader reader(corpus[line]); + buffer buf(reader); + iter i(u16iter(buf.begin())); + + printf("%S\n", utf16str(i).c_str()); + } +} +Itk::TesterBase * CreateTinyAnalysisUnitTest() +{ + using namespace Itk; + + SuiteTester + * testSuite = + new SuiteTester( "tiny" ); + + testSuite->add( "wchar", TinyWcharTest, "wchar" ); + testSuite->add( "reader", TinyReaderTest, "reader" ); + testSuite->add( "cn", TinyChinaTest, "cn" ); + testSuite->add( "utf16", TinyUtf16Test, "utf16" ); + testSuite->add( "jamu", TinyJamuTest, "jamu" ); + testSuite->add( "hangul", TinyHangulTest, "hangul" ); + + return testSuite; +}