searchengine/cpix/tsrc/cpixunittest/src/analysiswhitebox.cpp
author hgs
Fri, 15 Oct 2010 12:09:28 +0530
changeset 24 65456528cac2
parent 8 6547bf8ca13a
permissions -rw-r--r--
201041
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     1
#include <wchar.h>
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     2
#include <stddef.h>
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     3
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     4
#include <iostream>
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     5
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     6
#include "cpixidxdb.h"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     7
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     8
#include "itk.h"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     9
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    10
#include "config.h"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    11
#include "testutils.h"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    12
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    13
// For testing custom analyzer
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    14
#include "CLucene.h"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    15
#include "CLucene\analysis\AnalysisHeader.h"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    16
#include "CLucene\util\stringreader.h"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    17
#include "analyzerexp.h"
8
hgs
parents: 7
diff changeset
    18
#include "customanalyzer.h"
hgs
parents: 7
diff changeset
    19
hgs
parents: 7
diff changeset
    20
#include "localetestinfos.h"
hgs
parents: 7
diff changeset
    21
hgs
parents: 7
diff changeset
    22
#include "spi/locale.h"
hgs
parents: 7
diff changeset
    23
#include "cpixstrtools.h"
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    24
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    25
using namespace Cpt::Lex; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    26
using namespace Cpt::Parser; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    27
using namespace Cpix::AnalyzerExp; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    28
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    29
void PrintToken(Cpt::Lex::Token token) {
8
hgs
parents: 7
diff changeset
    30
	printf("%S('%S')", token.type(), token.text());  
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    31
}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    32
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    33
8
hgs
parents: 7
diff changeset
    34
void TestTokenization6(Itk::TestMgr * testMgr)
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    35
{
8
hgs
parents: 7
diff changeset
    36
	Cpix::AnalyzerExp::Tokenizer tokenizer; 
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    37
	Tokens source(tokenizer, 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    38
		L"switch { "
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    39
		  L"case '_docuid', '_mimetype': keywords;"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    40
		  L"case '_baseappclass':        whitespace>lowercase;"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    41
		  L"default: 					 natural(en); "
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    42
		L"}");
8
hgs
parents: 7
diff changeset
    43
    StdFilter 
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    44
        tokens(source); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    45
8
hgs
parents: 7
diff changeset
    46
    while (tokens) PrintToken(tokens++); 
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    47
}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    48
8
hgs
parents: 7
diff changeset
    49
void TestParsing(Itk::TestMgr* mgr)
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    50
{ 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    51
	Cpix::AnalyzerExp::Tokenizer tokenizer; 
8
hgs
parents: 7
diff changeset
    52
	
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    53
	Tokens source(tokenizer, L"foobar(zap, foo, 'bar', 'a', raboof)");
8
hgs
parents: 7
diff changeset
    54
	StdFilter tokens(source);
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    55
	Lexer lexer(tokens);
8
hgs
parents: 7
diff changeset
    56
	
hgs
parents: 7
diff changeset
    57
	const wchar_t* text = L" stdtokens >lowercase>stopwords(fin)>stopwords('a', 'an','the')>stem(fin)  ";
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    58
	
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    59
	Tokens source3(tokenizer, L"foobar(zap, 0, 0.0045, 4, 'a', 9223.031)");
8
hgs
parents: 7
diff changeset
    60
	StdFilter tokens3(source3);
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    61
	Lexer lexer3(tokens3);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    62
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    63
	try {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    64
		auto_ptr<Invokation> invoke = ParseInvokation(lexer); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    65
		lexer.eatEof(); 
8
hgs
parents: 7
diff changeset
    66
		printf("Invoke identifier: %S\n", invoke->id()); 
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    67
		printf("%d parameters\n", invoke->params().size()); 
8
hgs
parents: 7
diff changeset
    68
		auto_ptr<Piping> piping = ParsePiping(text); 
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    69
		printf("piping done.\n"); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    70
		if (dynamic_cast<const Invokation*>(&piping->tokenizer())) {
8
hgs
parents: 7
diff changeset
    71
			printf("Tokenizer: %S\n", dynamic_cast<const Invokation&>(piping->tokenizer()).id()); 
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    72
		}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    73
		printf("%d filters\n", piping->filters().size()); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    74
		invoke = ParseInvokation(lexer3);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    75
		lexer3.eatEof(); 
8
hgs
parents: 7
diff changeset
    76
		printf("Invoke identifier: %S\n", invoke->id()); 
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    77
		printf("%d parameters\n", invoke->params().size()); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    78
	} catch (ParseException& e) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    79
		printf("ParseException: %S\n", e.wWhat()); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    80
	} catch (LexException& e) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    81
		printf("LexException: %S\n", e.wWhat()); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    82
	}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    83
}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    84
8
hgs
parents: 7
diff changeset
    85
void TestSwitch(Itk::TestMgr* mgr)
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    86
{ 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    87
	Cpix::AnalyzerExp::Tokenizer tokenizer; 
8
hgs
parents: 7
diff changeset
    88
	
hgs
parents: 7
diff changeset
    89
	const wchar_t* text = 
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    90
		L"switch { "
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    91
		  L"case '_docuid', '_mimetype': keywords;"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    92
		  L"case '_baseappclass':        whitespace>lowercase;"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    93
		  L"default: 					 natural(en); "
8
hgs
parents: 7
diff changeset
    94
		L"}";
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    95
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    96
	try {
8
hgs
parents: 7
diff changeset
    97
		auto_ptr<Piping> sw = ParsePiping(text); 
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    98
		if (dynamic_cast<const Switch*>(&sw->tokenizer())) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    99
			const Switch* s = dynamic_cast<const Switch*>(&sw->tokenizer());
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   100
			for (int i = 0; i < s->cases().size(); i++) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   101
				const Case* c = s->cases()[i]; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   102
				printf("case "); 
8
hgs
parents: 7
diff changeset
   103
				for (int j = 0; j < c->cases().size(); j++) {
hgs
parents: 7
diff changeset
   104
					printf("%S", c->cases()[j]);
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   105
				}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   106
				printf(": ...\n"); 
8
hgs
parents: 7
diff changeset
   107
 //				wcout<<L":"<<s->def().tokenizer().id();
hgs
parents: 7
diff changeset
   108
			}
hgs
parents: 7
diff changeset
   109
			printf("default: ...\n");//<<s->def().tokenizer().id()<<"...;";
hgs
parents: 7
diff changeset
   110
		}
hgs
parents: 7
diff changeset
   111
	} catch (ParseException& e) {
hgs
parents: 7
diff changeset
   112
		// OBS wcout<<L"ParseException: "<<e.describe(text)<<endl; 
hgs
parents: 7
diff changeset
   113
		e.setContext(text);
hgs
parents: 7
diff changeset
   114
		printf("ParseException: %S\n", e.wWhat()); 
hgs
parents: 7
diff changeset
   115
	} catch (LexException& e) {
hgs
parents: 7
diff changeset
   116
		// OBS wcout<<L"LexException: "<<e.describe(text)<<endl; 
hgs
parents: 7
diff changeset
   117
		e.setContext(text);
hgs
parents: 7
diff changeset
   118
		printf("LexException: %S\n", e.wWhat()); 
hgs
parents: 7
diff changeset
   119
	}
hgs
parents: 7
diff changeset
   120
}
hgs
parents: 7
diff changeset
   121
hgs
parents: 7
diff changeset
   122
void TestConfigSwitch(Itk::TestMgr* mgr)
hgs
parents: 7
diff changeset
   123
{ 
hgs
parents: 7
diff changeset
   124
	Cpix::AnalyzerExp::Tokenizer tokenizer; 
hgs
parents: 7
diff changeset
   125
	
hgs
parents: 7
diff changeset
   126
	const wchar_t* text = 
hgs
parents: 7
diff changeset
   127
		L"config_switch { "
hgs
parents: 7
diff changeset
   128
		  L"case 'indexing': 	korean;"
hgs
parents: 7
diff changeset
   129
		  L"case 'query':       koreanquery;"
hgs
parents: 7
diff changeset
   130
		  L"case 'prefix':      letter;"
hgs
parents: 7
diff changeset
   131
		  L"default: 			korean;"
hgs
parents: 7
diff changeset
   132
		L"}";
hgs
parents: 7
diff changeset
   133
hgs
parents: 7
diff changeset
   134
	try {
hgs
parents: 7
diff changeset
   135
		auto_ptr<Piping> sw = ParsePiping(text); 
hgs
parents: 7
diff changeset
   136
		if (dynamic_cast<const ConfigSwitch*>(&sw->tokenizer())) {
hgs
parents: 7
diff changeset
   137
			const ConfigSwitch* s = dynamic_cast<const ConfigSwitch*>(&sw->tokenizer());
hgs
parents: 7
diff changeset
   138
			for (int i = 0; i < s->cases().size(); i++) {
hgs
parents: 7
diff changeset
   139
				const Case* c = s->cases()[i]; 
hgs
parents: 7
diff changeset
   140
				printf("case "); 
hgs
parents: 7
diff changeset
   141
				for (int j = 0; j < c->cases().size(); j++) {
hgs
parents: 7
diff changeset
   142
					printf("%S", c->cases()[j]);
hgs
parents: 7
diff changeset
   143
				}
hgs
parents: 7
diff changeset
   144
				printf(": ...\n"); 
hgs
parents: 7
diff changeset
   145
 //				wcout<<L":"<<s->def().tokenizer().id();
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   146
			}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   147
			printf("default: ...\n");//<<s->def().tokenizer().id()<<"...;";
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   148
		}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   149
	} catch (ParseException& e) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   150
		// OBS wcout<<L"ParseException: "<<e.describe(text)<<endl; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   151
		e.setContext(text);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   152
		printf("ParseException: %S\n", e.wWhat()); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   153
	} catch (LexException& e) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   154
		// OBS wcout<<L"LexException: "<<e.describe(text)<<endl; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   155
		e.setContext(text);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   156
		printf("LexException: %S\n", e.wWhat()); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   157
	}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   158
}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   159
8
hgs
parents: 7
diff changeset
   160
hgs
parents: 7
diff changeset
   161
void TestParsingErrors(Itk::TestMgr* mgr)
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   162
{
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   163
	Cpix::AnalyzerExp::Tokenizer tokenizer; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   164
	// eof
8
hgs
parents: 7
diff changeset
   165
	const wchar_t* text;
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   166
	try {
8
hgs
parents: 7
diff changeset
   167
		ParsePiping( text = L"foobar(zap, foo, 'bar', 'raf', do, " ); 
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   168
	} catch (ParseException& e) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   169
		printf("ParseException: %S\n", e.wWhat()); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   170
	}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   171
	
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   172
	// Unfinished literal
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   173
	try {
8
hgs
parents: 7
diff changeset
   174
		ParsePiping(text = L"foobar(zap, foo, 'bar', 'a, raboof)"); 
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   175
	} catch (LexException& e) { // syntax error
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   176
		printf("LexException: %S\n", e.wWhat()); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   177
	} catch (ParseException& e) { // syntax error
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   178
		printf("ParseException: %S\n", e.wWhat()); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   179
	} 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   180
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   181
	// Unknown token
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   182
	try {
8
hgs
parents: 7
diff changeset
   183
		ParsePiping(text = L"foobar(!zap, foo, 'bar', 'a', raboof)"); 
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   184
	} catch (LexException& e) { // syntax error
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   185
		printf("LexException: %S\n", e.wWhat()); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   186
	} 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   187
	
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   188
	// Missing comma
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   189
	try {
8
hgs
parents: 7
diff changeset
   190
		ParsePiping(text = L"foobar(zap, foo, 'bar', 'a' raboof)"); 
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   191
	} catch (ParseException& e) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   192
		printf("ParseException: %S\n", e.wWhat()); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   193
	} 
8
hgs
parents: 7
diff changeset
   194
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   195
}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   196
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   197
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   198
const char * CustomAnalyzerTestDocs[] = {
8
hgs
parents: 7
diff changeset
   199
    STEM_TEST_CORPUS_PATH "\\en\\1.txt",
hgs
parents: 7
diff changeset
   200
    STEM_TEST_CORPUS_PATH "\\en\\2.txt",
hgs
parents: 7
diff changeset
   201
    STEM_TEST_CORPUS_PATH "\\en\\3.txt",
hgs
parents: 7
diff changeset
   202
    STEM_TEST_CORPUS_PATH "\\en\\4.txt",
hgs
parents: 7
diff changeset
   203
        
hgs
parents: 7
diff changeset
   204
    STEM_TEST_CORPUS_PATH "\\fi\\1.txt",
hgs
parents: 7
diff changeset
   205
    STEM_TEST_CORPUS_PATH "\\fi\\2.txt",
hgs
parents: 7
diff changeset
   206
    LOC_TEST_CORPUS_PATH "\\th\\1.txt",
hgs
parents: 7
diff changeset
   207
    LOC_TEST_CORPUS_PATH "\\th\\2.txt",
hgs
parents: 7
diff changeset
   208
    
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   209
    NULL
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   210
};
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   211
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   212
const char DEFAULT_ENCODING[] = "UTF-8";
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   213
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   214
void PrintTokenStream(lucene::analysis::TokenStream* stream) 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   215
{
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   216
	using namespace lucene::analysis; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   217
	lucene::analysis::Token token; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   218
	while (stream->next(&token)) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   219
		int pos = token.getPositionIncrement(); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   220
		if (pos == 0) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   221
			printf("|"); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   222
		} else {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   223
			for (int i = 0; i < pos; i++) printf(" "); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   224
		}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   225
		printf("'%S'", token.termText());
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   226
	}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   227
	printf("\n");
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   228
}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   229
8
hgs
parents: 7
diff changeset
   230
void TestCustomAnalyzer(Itk::TestMgr * testMgr, 
hgs
parents: 7
diff changeset
   231
					    const char** files, 
hgs
parents: 7
diff changeset
   232
					    const wchar_t* definition)
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   233
{
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   234
	using namespace lucene::analysis; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   235
	using namespace lucene::util; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   236
	using namespace Cpix; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   237
	using namespace std; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   238
	CustomAnalyzer analyzer(definition);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   239
	
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   240
	printf("Analyzer \"%S\":\n", definition); 
8
hgs
parents: 7
diff changeset
   241
	for (int i = 0; files[i]; i++) 
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   242
	{
8
hgs
parents: 7
diff changeset
   243
		printf("File !%s tokenized:\n", (files[i]+1));
hgs
parents: 7
diff changeset
   244
		FileReader file( files[i], DEFAULT_ENCODING ); 
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   245
		
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   246
		TokenStream* stream = analyzer.tokenStream( L"field", &file ); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   247
		PrintTokenStream( stream ); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   248
		stream->close(); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   249
		_CLDELETE( stream ); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   250
	}
8
hgs
parents: 7
diff changeset
   251
	printf("\n");
hgs
parents: 7
diff changeset
   252
}
hgs
parents: 7
diff changeset
   253
hgs
parents: 7
diff changeset
   254
void TestCustomAnalyzer(Itk::TestMgr * testMgr, const wchar_t* definition) {
hgs
parents: 7
diff changeset
   255
	TestCustomAnalyzer(testMgr, CustomAnalyzerTestDocs, definition);
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   256
}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   257
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   258
void TestCustomAnalyzers(Itk::TestMgr * testMgr)
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   259
{
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   260
	TestCustomAnalyzer(testMgr, L"stdtokens");
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   261
	TestCustomAnalyzer(testMgr, L"whitespace");
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   262
	TestCustomAnalyzer(testMgr, L"whitespace>lowercase");
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   263
	TestCustomAnalyzer(testMgr, L"whitespace>accent");
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   264
	TestCustomAnalyzer(testMgr, L"letter");
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   265
	TestCustomAnalyzer(testMgr, L"letter>lowercase");
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   266
	TestCustomAnalyzer(testMgr, L"keyword");
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   267
	TestCustomAnalyzer(testMgr, L"keyword>lowercase");
8
hgs
parents: 7
diff changeset
   268
//	TestCustomAnalyzer(testMgr, L"stdtokens>lowercase>stem(en)"); // Does not work with NON-ASCII
hgs
parents: 7
diff changeset
   269
	TestCustomAnalyzer(testMgr, L"letter>lowercase>stop(en)"); 
hgs
parents: 7
diff changeset
   270
	TestCustomAnalyzer(testMgr, L"letter>lowercase>stop('i', 'oh', 'nyt', 'n�in')"); 
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   271
	TestCustomAnalyzer(testMgr, L"letter>length(2, 4)");
8
hgs
parents: 7
diff changeset
   272
	TestCustomAnalyzer(testMgr, L"standard>prefixes(1)");
hgs
parents: 7
diff changeset
   273
	TestCustomAnalyzer(testMgr, L"standard>prefixes(2)");
hgs
parents: 7
diff changeset
   274
	TestCustomAnalyzer(testMgr, L"standard>prefixes(3)");
hgs
parents: 7
diff changeset
   275
	TestCustomAnalyzer(testMgr, L"stdtokens>stdfilter>lowercase>thai>stop(en)");
hgs
parents: 7
diff changeset
   276
	TestCustomAnalyzer(testMgr, L"cjk>stop(en)");
hgs
parents: 7
diff changeset
   277
    TestCustomAnalyzer(testMgr, L"ngram(1)>lowercase>stop(en)");
hgs
parents: 7
diff changeset
   278
    TestCustomAnalyzer(testMgr, L"ngram(2)>lowercase>stop(en)");
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   279
}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   280
8
hgs
parents: 7
diff changeset
   281
void TestTokenizationWithLocales(Itk::TestMgr * testMgr) {
hgs
parents: 7
diff changeset
   282
	printf("locale=en\n"); 
hgs
parents: 7
diff changeset
   283
	cpix_Result result; 
hgs
parents: 7
diff changeset
   284
	cpix_SetLocale( &result, "en" ); 
hgs
parents: 7
diff changeset
   285
	TestCustomAnalyzer(testMgr, L"natural");
hgs
parents: 7
diff changeset
   286
	
hgs
parents: 7
diff changeset
   287
	printf("locale=th\n"); 
hgs
parents: 7
diff changeset
   288
	cpix_SetLocale( &result, "th" ); 
hgs
parents: 7
diff changeset
   289
	TestCustomAnalyzer(testMgr, L"natural");
hgs
parents: 7
diff changeset
   290
hgs
parents: 7
diff changeset
   291
	printf("locale=ko\n");
hgs
parents: 7
diff changeset
   292
	cpix_SetLocale( &result, "ko" ); 
hgs
parents: 7
diff changeset
   293
	TestCustomAnalyzer(testMgr, L"natural");
hgs
parents: 7
diff changeset
   294
	
hgs
parents: 7
diff changeset
   295
	printf("locale=zh\n");
hgs
parents: 7
diff changeset
   296
	cpix_SetLocale( &result, "zh" );
hgs
parents: 7
diff changeset
   297
	TestCustomAnalyzer(testMgr, L"natural");
hgs
parents: 7
diff changeset
   298
	
hgs
parents: 7
diff changeset
   299
	printf("locale=jp\n");
hgs
parents: 7
diff changeset
   300
	cpix_SetLocale( &result, "jp" ); 
hgs
parents: 7
diff changeset
   301
	TestCustomAnalyzer(testMgr, L"natural");
hgs
parents: 7
diff changeset
   302
hgs
parents: 7
diff changeset
   303
	cpix_SetLocale( &result, cpix_LOCALE_AUTO ); 
hgs
parents: 7
diff changeset
   304
}
hgs
parents: 7
diff changeset
   305
hgs
parents: 7
diff changeset
   306
template<typename T> 
hgs
parents: 7
diff changeset
   307
void TestTokenizationWithLocale(Itk::TestMgr * testMgr) {
hgs
parents: 7
diff changeset
   308
	cpix_Result result; 
hgs
parents: 7
diff changeset
   309
	cpix_SetLocale( &result, T::LOCALE ); 
hgs
parents: 7
diff changeset
   310
    TestCustomAnalyzer(testMgr, EnglishLocale::FILES, L"natural");
hgs
parents: 7
diff changeset
   311
	TestCustomAnalyzer(testMgr, T::FILES, L"natural");
hgs
parents: 7
diff changeset
   312
	cpix_SetLocale( &result, cpix_LOCALE_AUTO ); 
hgs
parents: 7
diff changeset
   313
}
hgs
parents: 7
diff changeset
   314
hgs
parents: 7
diff changeset
   315
hgs
parents: 7
diff changeset
   316
template<typename T>
hgs
parents: 7
diff changeset
   317
void AddTokenizationWithLocaleTest(Itk::SuiteTester* suite) {
hgs
parents: 7
diff changeset
   318
    suite->add(T::LOCALE,
hgs
parents: 7
diff changeset
   319
               &TestTokenizationWithLocale<T>,
hgs
parents: 7
diff changeset
   320
               T::LOCALE);
hgs
parents: 7
diff changeset
   321
}
hgs
parents: 7
diff changeset
   322
hgs
parents: 7
diff changeset
   323
void TestTokenizationWithCurrentLocale(Itk::TestMgr * testMgr) {
hgs
parents: 7
diff changeset
   324
	cpix_Result result; 
hgs
parents: 7
diff changeset
   325
	cpix_SetLocale( &result, cpix_LOCALE_AUTO ); 
hgs
parents: 7
diff changeset
   326
	TestCustomAnalyzer(testMgr, L"natural");
hgs
parents: 7
diff changeset
   327
}
hgs
parents: 7
diff changeset
   328
hgs
parents: 7
diff changeset
   329
void TestAnalyzerWithField(Itk::TestMgr * testMgr, const wchar_t* definition, const wchar_t* field) 	
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   330
{
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   331
	using namespace lucene::analysis; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   332
	using namespace lucene::util; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   333
	using namespace Cpix; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   334
	using namespace std; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   335
	CustomAnalyzer analyzer(definition);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   336
	
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   337
	printf("File !%s tokenized for field %S:\n", (CustomAnalyzerTestDocs[0]+1), field);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   338
	FileReader file( CustomAnalyzerTestDocs[0], DEFAULT_ENCODING ); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   339
	
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   340
	TokenStream* stream = analyzer.tokenStream( field, &file ); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   341
	PrintTokenStream( stream ); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   342
	stream->close(); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   343
	_CLDELETE( stream ); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   344
}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   345
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   346
void TestSwitchAnalyzers(Itk::TestMgr * testMgr)
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   347
{
8
hgs
parents: 7
diff changeset
   348
	const wchar_t* sw = L"\n"
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   349
		L"switch {\n"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   350
		L"    case '_docuid':          keyword;\n"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   351
		L"    case '_appclass':        whitespace>lowercase;\n"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   352
		L"    case 'title', 'message': stdtokens>accent>lowercase>stem(en)>stop(en);\n"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   353
		L"    default:                 letter>lowercase>stop('i');\n"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   354
		L"}";
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   355
	TestAnalyzerWithField(testMgr, sw, L"_docuid");
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   356
	TestAnalyzerWithField(testMgr, sw, L"_appclass");
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   357
	TestAnalyzerWithField(testMgr, sw, L"Title"); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   358
	TestAnalyzerWithField(testMgr, sw, L"message"); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   359
	TestAnalyzerWithField(testMgr, sw, L"field"); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   360
}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   361
8
hgs
parents: 7
diff changeset
   362
void TestLocaleSwitchAnalyzers(Itk::TestMgr * testMgr)
hgs
parents: 7
diff changeset
   363
{
hgs
parents: 7
diff changeset
   364
	const wchar_t* sw = L"\n"
hgs
parents: 7
diff changeset
   365
		L"locale_switch {\n"
hgs
parents: 7
diff changeset
   366
		L"    case 'en':       stdtokens>stdfilter>lowercase>stop(en);\n"
hgs
parents: 7
diff changeset
   367
		L"    case 'th':       stdtokens>stdfilter>lowercase>thai>stop(en);\n"
hgs
parents: 7
diff changeset
   368
		L"    case 'ca':       stdtokens>stdfilter>lowercase>accent;\n"
hgs
parents: 7
diff changeset
   369
		L"    default:         stdtokens>stdfilter>lowercase;\n"
hgs
parents: 7
diff changeset
   370
		L"}";
hgs
parents: 7
diff changeset
   371
	cpix_Result result; 
hgs
parents: 7
diff changeset
   372
	printf("locale=en:\n");
hgs
parents: 7
diff changeset
   373
	cpix_SetLocale( &result, "en" ); 
hgs
parents: 7
diff changeset
   374
	TestCustomAnalyzer(testMgr, sw);
hgs
parents: 7
diff changeset
   375
	printf("\n");
hgs
parents: 7
diff changeset
   376
	printf("locale=th:\n");
hgs
parents: 7
diff changeset
   377
	cpix_SetLocale( &result, "th" ); 
hgs
parents: 7
diff changeset
   378
	TestCustomAnalyzer(testMgr, sw);
hgs
parents: 7
diff changeset
   379
	printf("\n");
hgs
parents: 7
diff changeset
   380
	printf("locale=ca:\n");
hgs
parents: 7
diff changeset
   381
	cpix_SetLocale( &result, "ca" ); 
hgs
parents: 7
diff changeset
   382
	TestCustomAnalyzer(testMgr, sw);
hgs
parents: 7
diff changeset
   383
	printf("\n");
hgs
parents: 7
diff changeset
   384
	printf("default locale:\n");
hgs
parents: 7
diff changeset
   385
	cpix_SetLocale( &result, "fail" ); 
hgs
parents: 7
diff changeset
   386
	TestCustomAnalyzer(testMgr, sw);
hgs
parents: 7
diff changeset
   387
	cpix_SetLocale( &result, cpix_LOCALE_AUTO ); 
hgs
parents: 7
diff changeset
   388
}
hgs
parents: 7
diff changeset
   389
hgs
parents: 7
diff changeset
   390
hgs
parents: 7
diff changeset
   391
Itk::TesterBase * CreateAnalysisWhiteBoxLocalizationTests() {
hgs
parents: 7
diff changeset
   392
    using namespace Itk;
hgs
parents: 7
diff changeset
   393
    
hgs
parents: 7
diff changeset
   394
	SuiteTester
hgs
parents: 7
diff changeset
   395
		* tests = new SuiteTester("loc");
hgs
parents: 7
diff changeset
   396
hgs
parents: 7
diff changeset
   397
	std::string locale;
hgs
parents: 7
diff changeset
   398
	locale = "currentlocale_"; 
hgs
parents: 7
diff changeset
   399
    
hgs
parents: 7
diff changeset
   400
    Cpt::auto_array<char> name( Cpix::Spi::GetLanguageNames()[0].c_str() );
hgs
parents: 7
diff changeset
   401
    locale += name.get();
hgs
parents: 7
diff changeset
   402
    
hgs
parents: 7
diff changeset
   403
	tests->add(locale.c_str(),
hgs
parents: 7
diff changeset
   404
				  &TestTokenizationWithCurrentLocale,
hgs
parents: 7
diff changeset
   405
				  locale.c_str());
hgs
parents: 7
diff changeset
   406
	
hgs
parents: 7
diff changeset
   407
	AddTokenizationWithLocaleTest<EnglishLocale>(tests);
hgs
parents: 7
diff changeset
   408
	AddTokenizationWithLocaleTest<FrenchLocale>(tests);
hgs
parents: 7
diff changeset
   409
	AddTokenizationWithLocaleTest<HebrewLocale>(tests);
hgs
parents: 7
diff changeset
   410
	AddTokenizationWithLocaleTest<ThaiLocale>(tests);
hgs
parents: 7
diff changeset
   411
	AddTokenizationWithLocaleTest<KoreanLocale>(tests);
hgs
parents: 7
diff changeset
   412
	AddTokenizationWithLocaleTest<ChineseLocale>(tests);
hgs
parents: 7
diff changeset
   413
	AddTokenizationWithLocaleTest<JapaneseLocale>(tests);
hgs
parents: 7
diff changeset
   414
	    
hgs
parents: 7
diff changeset
   415
	return tests;
hgs
parents: 7
diff changeset
   416
}
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   417
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   418
Itk::TesterBase * CreateAnalysisWhiteBoxTests()
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   419
{
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   420
    using namespace Itk;
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   421
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   422
    SuiteTester
8
hgs
parents: 7
diff changeset
   423
        * analysisTests = new SuiteTester("whitebox");
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   424
    
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   425
    analysisTests->add("analyzer",
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   426
					   &TestCustomAnalyzers,
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   427
					   "analyzer");
8
hgs
parents: 7
diff changeset
   428
    analysisTests->add("switchAnalyzer",
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   429
					   &TestSwitchAnalyzers,
8
hgs
parents: 7
diff changeset
   430
					   "switchAnalyzer");
hgs
parents: 7
diff changeset
   431
    analysisTests->add("localeSwitchAnalyzer",
hgs
parents: 7
diff changeset
   432
					   &TestLocaleSwitchAnalyzers,
hgs
parents: 7
diff changeset
   433
					   "localeSwitchAnalyzer");
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   434
    analysisTests->add("tokenization",
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   435
    				   TestTokenization6,
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   436
    				   "tokenization");
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   437
  	analysisTests->add("parsing",
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   438
                      TestParsing,
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   439
                      "parsing");
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   440
    analysisTests->add("parsing2",
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   441
                      TestSwitch,
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   442
                      "parsing2");
8
hgs
parents: 7
diff changeset
   443
    analysisTests->add("parsing3",
hgs
parents: 7
diff changeset
   444
                      TestConfigSwitch,
hgs
parents: 7
diff changeset
   445
                      "parsing3");
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   446
    analysisTests->add("parsingerrors",
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   447
                      TestParsingErrors,
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   448
                      "parsingerrors");
8
hgs
parents: 7
diff changeset
   449
hgs
parents: 7
diff changeset
   450
    analysisTests->add(CreateAnalysisWhiteBoxLocalizationTests());
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   451
    return analysisTests;
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   452
}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   453
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   454
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   455