searchengine/oss/loc/analysisunittest/src/thaianalysistest.cpp
changeset 24 65456528cac2
equal deleted inserted replaced
23:d4d56f5e7c55 24:65456528cac2
       
     1 /*
       
     2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 #include "itk.h"
       
    19 
       
    20 #include "thaianalysis.h"
       
    21 
       
    22 #include "CLucene.h"
       
    23 
       
    24 #include <iostream>
       
    25 
       
    26 #include "testutils.h"
       
    27 
       
    28 #include "evaluationtool.h"
       
    29 #include "analysisunittest.h"
       
    30 #include "thaistatemachine.h"
       
    31 
       
    32 
       
    33 using namespace analysis; 
       
    34 using namespace evaluationtool; 
       
    35 using namespace lucene::util;
       
    36 
       
    37 using namespace lucene::analysis; 
       
    38 
       
    39 #define THAI_TEXTCORPUS "c:\\data\\analysisunittestcorpus\\thai\\corpus.txt"
       
    40 
       
    41 #define BUFFER_SIZE 512
       
    42 
       
    43 void printBreaks(BreakIterator& breaks, const wchar_t* text)
       
    44 {
       
    45 	breaks.setText( text );
       
    46 
       
    47 	while (breaks.hasNext()) 
       
    48 	{
       
    49 		int begin = breaks.current(); 
       
    50 		int end = breaks.next();
       
    51 		
       
    52 		wchar_t buf[BUFFER_SIZE];
       
    53 		memcpy(buf, text+begin, sizeof(wchar_t)*(end-begin));
       
    54 		buf[end-begin] = '\0';
       
    55 		printf(" '%S'", buf);
       
    56 	}
       
    57 	printf("\n");
       
    58 }
       
    59 
       
    60 void thaiBreakIteratorTest(Itk::TestMgr* testMgr) 
       
    61 {
       
    62 	std::auto_ptr<BreakIterator> breaks( ThaiAnalysisInfra::theInstance()->createBreakIterator() );
       
    63 	
       
    64 	FileReader reader(THAI_TEXTCORPUS, "UTF-8");
       
    65 	
       
    66 	Corpus corpus(THAI_TEXTCORPUS); 
       
    67 	
       
    68 	for (int i = 0; i < corpus.size(); i++) {
       
    69 		printBreaks(*breaks, corpus[i]);
       
    70 	}
       
    71 }
       
    72 
       
    73 void thaiAnalyzerTest(Itk::TestMgr* testMgr)
       
    74 {
       
    75 	ThaiAnalyzer analyzer;
       
    76 	
       
    77 	Corpus corpus(THAI_TEXTCORPUS); 
       
    78 	
       
    79 	for (int i = 0; i < corpus.size(); i++) {
       
    80 		printTokens(analyzer, corpus[i]);
       
    81 	}
       
    82 }
       
    83 
       
    84 void thaiAnalyzerSpeed(Itk::TestMgr* testMgr) 
       
    85 {
       
    86 	ThaiAnalyzer analyzer;
       
    87 	FileReader reader(THAI_TEXTCORPUS, "UTF-8");
       
    88 	int filesize = Cpt::filesize(THAI_TEXTCORPUS);
       
    89 	
       
    90 	Itk::Timestamp begin;
       
    91 	Itk::getTimestamp(&begin);
       
    92 
       
    93 	auto_ptr<TokenStream> stream( analyzer.tokenStream( NULL, &reader ) );
       
    94 	lucene::analysis::Token token; 
       
    95 	while (stream->next(&token)); // go throught all tokens
       
    96 
       
    97 	Itk::Timestamp end;
       
    98 	Itk::getTimestamp(&end);
       
    99 	
       
   100 	long time = Itk::getElapsedMs(&end, &begin); 
       
   101 	ITK_REPORT( testMgr, "Thai analysis time", "%d ms / %d KB", time, (filesize/1000));
       
   102 	ITK_REPORT( testMgr, "Thai analysis speed", "%d KB/s", (filesize / time));
       
   103 }
       
   104 
       
   105 
       
   106 void thaiControlSpeed(Itk::TestMgr* testMgr) 
       
   107 {
       
   108 	lucene::analysis::standard::StandardAnalyzer analyzer;
       
   109 	FileReader reader(THAI_TEXTCORPUS, "UTF-8");
       
   110 	int filesize = Cpt::filesize(THAI_TEXTCORPUS);
       
   111 	
       
   112 	Itk::Timestamp begin;
       
   113 	Itk::getTimestamp(&begin);
       
   114 
       
   115 	auto_ptr<TokenStream> stream( analyzer.tokenStream( NULL, &reader ) );
       
   116 	lucene::analysis::Token token; 
       
   117 	while (stream->next(&token)); // go throught all tokens
       
   118 
       
   119 	Itk::Timestamp end;
       
   120 	Itk::getTimestamp(&end);
       
   121 	
       
   122 	long time = Itk::getElapsedMs(&end, &begin); 
       
   123 	ITK_REPORT( testMgr, "Thai control time", "%d ms / %d KB", time, (filesize/1000));
       
   124 	ITK_REPORT( testMgr, "Thai control speed", "%d KB/s", (filesize / time));
       
   125 }
       
   126 
       
   127 
       
   128 
       
   129 Itk::TesterBase * CreateThaiAnalysisUnitTest() 
       
   130 {
       
   131 	using namespace Itk;
       
   132 	
       
   133 	SuiteTester
       
   134 		* testSuite = 
       
   135 			new SuiteTester( "thai" );
       
   136 	
       
   137 	testSuite->add( "breaks", thaiBreakIteratorTest, "breaks" );
       
   138 	testSuite->add( "analyzer", thaiAnalyzerTest, 	 "analyzer" );
       
   139 	testSuite->add( "analyzerSpeed", thaiAnalyzerSpeed );
       
   140 	testSuite->add( "controlSpeed", thaiControlSpeed );
       
   141 	
       
   142 	return testSuite;
       
   143 }
       
   144