searchengine/oss/loc/analysisunittest/src/thaianalysistest.cpp
author hgs
Fri, 15 Oct 2010 12:09:28 +0530
changeset 24 65456528cac2
permissions -rw-r--r--
201041

/*
* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
* All rights reserved.
* This component and the accompanying materials are made available
* under the terms of "Eclipse Public License v1.0"
* which accompanies this distribution, and is available
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
*
* Initial Contributors:
* Nokia Corporation - initial contribution.
*
* Contributors:
*
* Description: 
*
*/

#include "itk.h"

#include "thaianalysis.h"

#include "CLucene.h"

#include <iostream>

#include "testutils.h"

#include "evaluationtool.h"
#include "analysisunittest.h"
#include "thaistatemachine.h"


using namespace analysis; 
using namespace evaluationtool; 
using namespace lucene::util;

using namespace lucene::analysis; 

#define THAI_TEXTCORPUS "c:\\data\\analysisunittestcorpus\\thai\\corpus.txt"

#define BUFFER_SIZE 512

void printBreaks(BreakIterator& breaks, const wchar_t* text)
{
	breaks.setText( text );

	while (breaks.hasNext()) 
	{
		int begin = breaks.current(); 
		int end = breaks.next();
		
		wchar_t buf[BUFFER_SIZE];
		memcpy(buf, text+begin, sizeof(wchar_t)*(end-begin));
		buf[end-begin] = '\0';
		printf(" '%S'", buf);
	}
	printf("\n");
}

void thaiBreakIteratorTest(Itk::TestMgr* testMgr) 
{
	std::auto_ptr<BreakIterator> breaks( ThaiAnalysisInfra::theInstance()->createBreakIterator() );
	
	FileReader reader(THAI_TEXTCORPUS, "UTF-8");
	
	Corpus corpus(THAI_TEXTCORPUS); 
	
	for (int i = 0; i < corpus.size(); i++) {
		printBreaks(*breaks, corpus[i]);
	}
}

void thaiAnalyzerTest(Itk::TestMgr* testMgr)
{
	ThaiAnalyzer analyzer;
	
	Corpus corpus(THAI_TEXTCORPUS); 
	
	for (int i = 0; i < corpus.size(); i++) {
		printTokens(analyzer, corpus[i]);
	}
}

void thaiAnalyzerSpeed(Itk::TestMgr* testMgr) 
{
	ThaiAnalyzer analyzer;
	FileReader reader(THAI_TEXTCORPUS, "UTF-8");
	int filesize = Cpt::filesize(THAI_TEXTCORPUS);
	
	Itk::Timestamp begin;
	Itk::getTimestamp(&begin);

	auto_ptr<TokenStream> stream( analyzer.tokenStream( NULL, &reader ) );
	lucene::analysis::Token token; 
	while (stream->next(&token)); // go throught all tokens

	Itk::Timestamp end;
	Itk::getTimestamp(&end);
	
	long time = Itk::getElapsedMs(&end, &begin); 
	ITK_REPORT( testMgr, "Thai analysis time", "%d ms / %d KB", time, (filesize/1000));
	ITK_REPORT( testMgr, "Thai analysis speed", "%d KB/s", (filesize / time));
}


void thaiControlSpeed(Itk::TestMgr* testMgr) 
{
	lucene::analysis::standard::StandardAnalyzer analyzer;
	FileReader reader(THAI_TEXTCORPUS, "UTF-8");
	int filesize = Cpt::filesize(THAI_TEXTCORPUS);
	
	Itk::Timestamp begin;
	Itk::getTimestamp(&begin);

	auto_ptr<TokenStream> stream( analyzer.tokenStream( NULL, &reader ) );
	lucene::analysis::Token token; 
	while (stream->next(&token)); // go throught all tokens

	Itk::Timestamp end;
	Itk::getTimestamp(&end);
	
	long time = Itk::getElapsedMs(&end, &begin); 
	ITK_REPORT( testMgr, "Thai control time", "%d ms / %d KB", time, (filesize/1000));
	ITK_REPORT( testMgr, "Thai control speed", "%d KB/s", (filesize / time));
}



Itk::TesterBase * CreateThaiAnalysisUnitTest() 
{
	using namespace Itk;
	
	SuiteTester
		* testSuite = 
			new SuiteTester( "thai" );
	
	testSuite->add( "breaks", thaiBreakIteratorTest, "breaks" );
	testSuite->add( "analyzer", thaiAnalyzerTest, 	 "analyzer" );
	testSuite->add( "analyzerSpeed", thaiAnalyzerSpeed );
	testSuite->add( "controlSpeed", thaiControlSpeed );
	
	return testSuite;
}