|
1 /* |
|
2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 #include "itk.h" |
|
19 |
|
20 #include "thaianalysis.h" |
|
21 |
|
22 #include "CLucene.h" |
|
23 |
|
24 #include <iostream> |
|
25 |
|
26 #include "testutils.h" |
|
27 |
|
28 #include "evaluationtool.h" |
|
29 #include "analysisunittest.h" |
|
30 #include "thaistatemachine.h" |
|
31 |
|
32 |
|
33 using namespace analysis; |
|
34 using namespace evaluationtool; |
|
35 using namespace lucene::util; |
|
36 |
|
37 using namespace lucene::analysis; |
|
38 |
|
39 #define THAI_TEXTCORPUS "c:\\data\\analysisunittestcorpus\\thai\\corpus.txt" |
|
40 |
|
41 #define BUFFER_SIZE 512 |
|
42 |
|
43 void printBreaks(BreakIterator& breaks, const wchar_t* text) |
|
44 { |
|
45 breaks.setText( text ); |
|
46 |
|
47 while (breaks.hasNext()) |
|
48 { |
|
49 int begin = breaks.current(); |
|
50 int end = breaks.next(); |
|
51 |
|
52 wchar_t buf[BUFFER_SIZE]; |
|
53 memcpy(buf, text+begin, sizeof(wchar_t)*(end-begin)); |
|
54 buf[end-begin] = '\0'; |
|
55 printf(" '%S'", buf); |
|
56 } |
|
57 printf("\n"); |
|
58 } |
|
59 |
|
60 void thaiBreakIteratorTest(Itk::TestMgr* testMgr) |
|
61 { |
|
62 std::auto_ptr<BreakIterator> breaks( ThaiAnalysisInfra::theInstance()->createBreakIterator() ); |
|
63 |
|
64 FileReader reader(THAI_TEXTCORPUS, "UTF-8"); |
|
65 |
|
66 Corpus corpus(THAI_TEXTCORPUS); |
|
67 |
|
68 for (int i = 0; i < corpus.size(); i++) { |
|
69 printBreaks(*breaks, corpus[i]); |
|
70 } |
|
71 } |
|
72 |
|
73 void thaiAnalyzerTest(Itk::TestMgr* testMgr) |
|
74 { |
|
75 ThaiAnalyzer analyzer; |
|
76 |
|
77 Corpus corpus(THAI_TEXTCORPUS); |
|
78 |
|
79 for (int i = 0; i < corpus.size(); i++) { |
|
80 printTokens(analyzer, corpus[i]); |
|
81 } |
|
82 } |
|
83 |
|
84 void thaiAnalyzerSpeed(Itk::TestMgr* testMgr) |
|
85 { |
|
86 ThaiAnalyzer analyzer; |
|
87 FileReader reader(THAI_TEXTCORPUS, "UTF-8"); |
|
88 int filesize = Cpt::filesize(THAI_TEXTCORPUS); |
|
89 |
|
90 Itk::Timestamp begin; |
|
91 Itk::getTimestamp(&begin); |
|
92 |
|
93 auto_ptr<TokenStream> stream( analyzer.tokenStream( NULL, &reader ) ); |
|
94 lucene::analysis::Token token; |
|
95 while (stream->next(&token)); // go throught all tokens |
|
96 |
|
97 Itk::Timestamp end; |
|
98 Itk::getTimestamp(&end); |
|
99 |
|
100 long time = Itk::getElapsedMs(&end, &begin); |
|
101 ITK_REPORT( testMgr, "Thai analysis time", "%d ms / %d KB", time, (filesize/1000)); |
|
102 ITK_REPORT( testMgr, "Thai analysis speed", "%d KB/s", (filesize / time)); |
|
103 } |
|
104 |
|
105 |
|
106 void thaiControlSpeed(Itk::TestMgr* testMgr) |
|
107 { |
|
108 lucene::analysis::standard::StandardAnalyzer analyzer; |
|
109 FileReader reader(THAI_TEXTCORPUS, "UTF-8"); |
|
110 int filesize = Cpt::filesize(THAI_TEXTCORPUS); |
|
111 |
|
112 Itk::Timestamp begin; |
|
113 Itk::getTimestamp(&begin); |
|
114 |
|
115 auto_ptr<TokenStream> stream( analyzer.tokenStream( NULL, &reader ) ); |
|
116 lucene::analysis::Token token; |
|
117 while (stream->next(&token)); // go throught all tokens |
|
118 |
|
119 Itk::Timestamp end; |
|
120 Itk::getTimestamp(&end); |
|
121 |
|
122 long time = Itk::getElapsedMs(&end, &begin); |
|
123 ITK_REPORT( testMgr, "Thai control time", "%d ms / %d KB", time, (filesize/1000)); |
|
124 ITK_REPORT( testMgr, "Thai control speed", "%d KB/s", (filesize / time)); |
|
125 } |
|
126 |
|
127 |
|
128 |
|
129 Itk::TesterBase * CreateThaiAnalysisUnitTest() |
|
130 { |
|
131 using namespace Itk; |
|
132 |
|
133 SuiteTester |
|
134 * testSuite = |
|
135 new SuiteTester( "thai" ); |
|
136 |
|
137 testSuite->add( "breaks", thaiBreakIteratorTest, "breaks" ); |
|
138 testSuite->add( "analyzer", thaiAnalyzerTest, "analyzer" ); |
|
139 testSuite->add( "analyzerSpeed", thaiAnalyzerSpeed ); |
|
140 testSuite->add( "controlSpeed", thaiControlSpeed ); |
|
141 |
|
142 return testSuite; |
|
143 } |
|
144 |