searchengine/oss/loc/analysisunittest/src/thaianalysistest.cpp
author hgs
Fri, 15 Oct 2010 12:09:28 +0530
changeset 24 65456528cac2
permissions -rw-r--r--
201041
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
24
hgs
parents:
diff changeset
     1
/*
hgs
parents:
diff changeset
     2
* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
hgs
parents:
diff changeset
     3
* All rights reserved.
hgs
parents:
diff changeset
     4
* This component and the accompanying materials are made available
hgs
parents:
diff changeset
     5
* under the terms of "Eclipse Public License v1.0"
hgs
parents:
diff changeset
     6
* which accompanies this distribution, and is available
hgs
parents:
diff changeset
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
hgs
parents:
diff changeset
     8
*
hgs
parents:
diff changeset
     9
* Initial Contributors:
hgs
parents:
diff changeset
    10
* Nokia Corporation - initial contribution.
hgs
parents:
diff changeset
    11
*
hgs
parents:
diff changeset
    12
* Contributors:
hgs
parents:
diff changeset
    13
*
hgs
parents:
diff changeset
    14
* Description: 
hgs
parents:
diff changeset
    15
*
hgs
parents:
diff changeset
    16
*/
hgs
parents:
diff changeset
    17
hgs
parents:
diff changeset
    18
#include "itk.h"
hgs
parents:
diff changeset
    19
hgs
parents:
diff changeset
    20
#include "thaianalysis.h"
hgs
parents:
diff changeset
    21
hgs
parents:
diff changeset
    22
#include "CLucene.h"
hgs
parents:
diff changeset
    23
hgs
parents:
diff changeset
    24
#include <iostream>
hgs
parents:
diff changeset
    25
hgs
parents:
diff changeset
    26
#include "testutils.h"
hgs
parents:
diff changeset
    27
hgs
parents:
diff changeset
    28
#include "evaluationtool.h"
hgs
parents:
diff changeset
    29
#include "analysisunittest.h"
hgs
parents:
diff changeset
    30
#include "thaistatemachine.h"
hgs
parents:
diff changeset
    31
hgs
parents:
diff changeset
    32
hgs
parents:
diff changeset
    33
using namespace analysis; 
hgs
parents:
diff changeset
    34
using namespace evaluationtool; 
hgs
parents:
diff changeset
    35
using namespace lucene::util;
hgs
parents:
diff changeset
    36
hgs
parents:
diff changeset
    37
using namespace lucene::analysis; 
hgs
parents:
diff changeset
    38
hgs
parents:
diff changeset
    39
#define THAI_TEXTCORPUS "c:\\data\\analysisunittestcorpus\\thai\\corpus.txt"
hgs
parents:
diff changeset
    40
hgs
parents:
diff changeset
    41
#define BUFFER_SIZE 512
hgs
parents:
diff changeset
    42
hgs
parents:
diff changeset
    43
void printBreaks(BreakIterator& breaks, const wchar_t* text)
hgs
parents:
diff changeset
    44
{
hgs
parents:
diff changeset
    45
	breaks.setText( text );
hgs
parents:
diff changeset
    46
hgs
parents:
diff changeset
    47
	while (breaks.hasNext()) 
hgs
parents:
diff changeset
    48
	{
hgs
parents:
diff changeset
    49
		int begin = breaks.current(); 
hgs
parents:
diff changeset
    50
		int end = breaks.next();
hgs
parents:
diff changeset
    51
		
hgs
parents:
diff changeset
    52
		wchar_t buf[BUFFER_SIZE];
hgs
parents:
diff changeset
    53
		memcpy(buf, text+begin, sizeof(wchar_t)*(end-begin));
hgs
parents:
diff changeset
    54
		buf[end-begin] = '\0';
hgs
parents:
diff changeset
    55
		printf(" '%S'", buf);
hgs
parents:
diff changeset
    56
	}
hgs
parents:
diff changeset
    57
	printf("\n");
hgs
parents:
diff changeset
    58
}
hgs
parents:
diff changeset
    59
hgs
parents:
diff changeset
    60
void thaiBreakIteratorTest(Itk::TestMgr* testMgr) 
hgs
parents:
diff changeset
    61
{
hgs
parents:
diff changeset
    62
	std::auto_ptr<BreakIterator> breaks( ThaiAnalysisInfra::theInstance()->createBreakIterator() );
hgs
parents:
diff changeset
    63
	
hgs
parents:
diff changeset
    64
	FileReader reader(THAI_TEXTCORPUS, "UTF-8");
hgs
parents:
diff changeset
    65
	
hgs
parents:
diff changeset
    66
	Corpus corpus(THAI_TEXTCORPUS); 
hgs
parents:
diff changeset
    67
	
hgs
parents:
diff changeset
    68
	for (int i = 0; i < corpus.size(); i++) {
hgs
parents:
diff changeset
    69
		printBreaks(*breaks, corpus[i]);
hgs
parents:
diff changeset
    70
	}
hgs
parents:
diff changeset
    71
}
hgs
parents:
diff changeset
    72
hgs
parents:
diff changeset
    73
void thaiAnalyzerTest(Itk::TestMgr* testMgr)
hgs
parents:
diff changeset
    74
{
hgs
parents:
diff changeset
    75
	ThaiAnalyzer analyzer;
hgs
parents:
diff changeset
    76
	
hgs
parents:
diff changeset
    77
	Corpus corpus(THAI_TEXTCORPUS); 
hgs
parents:
diff changeset
    78
	
hgs
parents:
diff changeset
    79
	for (int i = 0; i < corpus.size(); i++) {
hgs
parents:
diff changeset
    80
		printTokens(analyzer, corpus[i]);
hgs
parents:
diff changeset
    81
	}
hgs
parents:
diff changeset
    82
}
hgs
parents:
diff changeset
    83
hgs
parents:
diff changeset
    84
void thaiAnalyzerSpeed(Itk::TestMgr* testMgr) 
hgs
parents:
diff changeset
    85
{
hgs
parents:
diff changeset
    86
	ThaiAnalyzer analyzer;
hgs
parents:
diff changeset
    87
	FileReader reader(THAI_TEXTCORPUS, "UTF-8");
hgs
parents:
diff changeset
    88
	int filesize = Cpt::filesize(THAI_TEXTCORPUS);
hgs
parents:
diff changeset
    89
	
hgs
parents:
diff changeset
    90
	Itk::Timestamp begin;
hgs
parents:
diff changeset
    91
	Itk::getTimestamp(&begin);
hgs
parents:
diff changeset
    92
hgs
parents:
diff changeset
    93
	auto_ptr<TokenStream> stream( analyzer.tokenStream( NULL, &reader ) );
hgs
parents:
diff changeset
    94
	lucene::analysis::Token token; 
hgs
parents:
diff changeset
    95
	while (stream->next(&token)); // go throught all tokens
hgs
parents:
diff changeset
    96
hgs
parents:
diff changeset
    97
	Itk::Timestamp end;
hgs
parents:
diff changeset
    98
	Itk::getTimestamp(&end);
hgs
parents:
diff changeset
    99
	
hgs
parents:
diff changeset
   100
	long time = Itk::getElapsedMs(&end, &begin); 
hgs
parents:
diff changeset
   101
	ITK_REPORT( testMgr, "Thai analysis time", "%d ms / %d KB", time, (filesize/1000));
hgs
parents:
diff changeset
   102
	ITK_REPORT( testMgr, "Thai analysis speed", "%d KB/s", (filesize / time));
hgs
parents:
diff changeset
   103
}
hgs
parents:
diff changeset
   104
hgs
parents:
diff changeset
   105
hgs
parents:
diff changeset
   106
void thaiControlSpeed(Itk::TestMgr* testMgr) 
hgs
parents:
diff changeset
   107
{
hgs
parents:
diff changeset
   108
	lucene::analysis::standard::StandardAnalyzer analyzer;
hgs
parents:
diff changeset
   109
	FileReader reader(THAI_TEXTCORPUS, "UTF-8");
hgs
parents:
diff changeset
   110
	int filesize = Cpt::filesize(THAI_TEXTCORPUS);
hgs
parents:
diff changeset
   111
	
hgs
parents:
diff changeset
   112
	Itk::Timestamp begin;
hgs
parents:
diff changeset
   113
	Itk::getTimestamp(&begin);
hgs
parents:
diff changeset
   114
hgs
parents:
diff changeset
   115
	auto_ptr<TokenStream> stream( analyzer.tokenStream( NULL, &reader ) );
hgs
parents:
diff changeset
   116
	lucene::analysis::Token token; 
hgs
parents:
diff changeset
   117
	while (stream->next(&token)); // go throught all tokens
hgs
parents:
diff changeset
   118
hgs
parents:
diff changeset
   119
	Itk::Timestamp end;
hgs
parents:
diff changeset
   120
	Itk::getTimestamp(&end);
hgs
parents:
diff changeset
   121
	
hgs
parents:
diff changeset
   122
	long time = Itk::getElapsedMs(&end, &begin); 
hgs
parents:
diff changeset
   123
	ITK_REPORT( testMgr, "Thai control time", "%d ms / %d KB", time, (filesize/1000));
hgs
parents:
diff changeset
   124
	ITK_REPORT( testMgr, "Thai control speed", "%d KB/s", (filesize / time));
hgs
parents:
diff changeset
   125
}
hgs
parents:
diff changeset
   126
hgs
parents:
diff changeset
   127
hgs
parents:
diff changeset
   128
hgs
parents:
diff changeset
   129
Itk::TesterBase * CreateThaiAnalysisUnitTest() 
hgs
parents:
diff changeset
   130
{
hgs
parents:
diff changeset
   131
	using namespace Itk;
hgs
parents:
diff changeset
   132
	
hgs
parents:
diff changeset
   133
	SuiteTester
hgs
parents:
diff changeset
   134
		* testSuite = 
hgs
parents:
diff changeset
   135
			new SuiteTester( "thai" );
hgs
parents:
diff changeset
   136
	
hgs
parents:
diff changeset
   137
	testSuite->add( "breaks", thaiBreakIteratorTest, "breaks" );
hgs
parents:
diff changeset
   138
	testSuite->add( "analyzer", thaiAnalyzerTest, 	 "analyzer" );
hgs
parents:
diff changeset
   139
	testSuite->add( "analyzerSpeed", thaiAnalyzerSpeed );
hgs
parents:
diff changeset
   140
	testSuite->add( "controlSpeed", thaiControlSpeed );
hgs
parents:
diff changeset
   141
	
hgs
parents:
diff changeset
   142
	return testSuite;
hgs
parents:
diff changeset
   143
}
hgs
parents:
diff changeset
   144