searchengine/cpix/cpix/src/analyzer.cpp
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Fri, 17 Sep 2010 08:35:54 +0300
changeset 21 2c484ac32ef0
parent 10 afe194b6b1cd
permissions -rw-r--r--
Revision: 201035 Kit: 201037
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     1
/*
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     2
* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     3
* All rights reserved.
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     4
* This component and the accompanying materials are made available
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     5
* under the terms of "Eclipse Public License v1.0"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     6
* which accompanies this distribution, and is available
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     8
*
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     9
* Initial Contributors:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    10
* Nokia Corporation - initial contribution.
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    11
*
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    12
* Contributors:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    13
*
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    14
* Description: 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    15
*
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    16
*/
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    17
10
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    18
// general utilities
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    19
#include "wchar.h"
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    20
#include <string>
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    21
#include <vector>
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    22
#include <sstream>
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    23
#include <iostream>
10
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    24
#include <fstream>
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    25
#include <algorithm>
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    26
10
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    27
// clucene
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    28
#include "CLucene.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    29
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    30
// support
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    31
#include "cpixparsetools.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    32
#include "cpixfstools.h"
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    33
10
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    34
// internal
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    35
#include "analyzer.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    36
#include "cpixanalyzer.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    37
#include "cpixexc.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    38
#include "document.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    39
#include "cluceneext.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    40
#include "indevicecfg.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    41
#include "initparams.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    42
#include "thaianalysis.h"
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    43
10
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    44
#include "analyzerexp.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    45
#include "customanalyzer.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    46
#include "common/cpixlog.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    47
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    48
namespace
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    49
{
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    50
    const char AGGR_NONFILEREADERPROXY_ERR[] 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    51
    = "Aggregated reader field should be FileReaderProxy instance";
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    52
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    53
    const char AGGR_STREAMREADER_ERR[] 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    54
    = "Aggregating streamValue-fields not implemented";
10
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    55
    
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    56
    const char THAI_LANGUAGE_FILE[] 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    57
    = "thaidict.sm";
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    58
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    59
    const char ANALYZER_FILE[]
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    60
    = "analyzer.loc";
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    61
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    62
    const wchar_t DEFAULT_ANALYZER_CONFIG[]
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    63
        = L"default";
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    64
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    65
    const wchar_t QUERY_ANALYZER_CONFIG[]
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    66
        = L"query";
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    67
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    68
    const wchar_t PREFIX_ANALYZER_CONFIG[]
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    69
        = L"prefix";
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    70
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    71
//    const wchar_t CPIX_ANALYZER_FALLBACK[]
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    72
//    = CPIX_ANALYZER_STANDARD;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    73
//
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    74
//    const wchar_t CPIX_PREFIX_ANALYZER_FALLBACK[]
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    75
//    = CPIX_TOKENIZER_LETTER L">" CPIX_FILTER_LOWERCASE;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    76
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    77
    
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    78
}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    79
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    80
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    81
namespace Cpix {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    82
10
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    83
	
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    84
Analysis* Analysis::theInstance_ = NULL; 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    85
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    86
	void Analysis::init(InitParams& ip) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    87
		// Init thai analysis with thai dictionary
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    88
		std::string thai( Cpt::appendpath(ip.getResourceDir(),
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    89
										  THAI_LANGUAGE_FILE) );
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    90
		
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    91
		if ( Cpt::filesize( thai.c_str() ) ) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    92
			analysis::InitThaiAnalysis(thai.c_str());
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    93
		} else {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    94
			logMsg(CPIX_LL_WARNING,
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    95
				   "Thai dictionary could not be found. Thai analysis will NOT work.");
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    96
		}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    97
	
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    98
		// Setup the analysis instance
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
    99
		theInstance_ = new Analysis(ip);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   100
	}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   101
	
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   102
	Analysis::Analysis(InitParams& ip) 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   103
	:	defaultAnalyzer_(),
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   104
	 	queryAnalyzer_(), 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   105
		prefixAnalyzer_() {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   106
		
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   107
		auto_ptr<AnalyzerExp::Piping> p = parse( Cpt::appendpath( ip.getResourceDir(), ANALYZER_FILE ) );
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   108
		
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   109
		defaultAnalyzer_.reset( new CustomAnalyzer( *p, DEFAULT_ANALYZER_CONFIG ) ); 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   110
		queryAnalyzer_.reset( new CustomAnalyzer( *p, QUERY_ANALYZER_CONFIG ) ); 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   111
		prefixAnalyzer_.reset( new CustomAnalyzer( *p, PREFIX_ANALYZER_CONFIG ) ); 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   112
	}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   113
	
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   114
	auto_ptr<AnalyzerExp::Piping> Analysis::parse(std::string path) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   115
		std::wifstream in(path.c_str());
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   116
		auto_ptr<AnalyzerExp::Piping> ret; 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   117
		if ( in ) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   118
		
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   119
			// Reserve constant size buffer and populate it with definition
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   120
			//
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   121
			int filesize = Cpt::filesize(path.c_str()); 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   122
			Cpt::auto_array<wchar_t> buf( new wchar_t[filesize+1] );
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   123
			in.read(buf.get(), filesize);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   124
			buf.get()[filesize] = '\0'; 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   125
			if ( !in.fail() ) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   126
				try {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   127
					ret = AnalyzerExp::ParsePiping( buf.get() );
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   128
				} catch (...) {}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   129
			} 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   130
			in.close();
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   131
		} 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   132
		
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   133
		if ( !ret.get() ) { 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   134
			THROW_CPIXEXC("Analyzer definition not found. %s could not be opened. ", path.c_str()); 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   135
		}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   136
		return ret; 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   137
	}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   138
	
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   139
	void Analysis::shutdown() {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   140
		analysis::ShutdownThaiAnalysis(); 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   141
		delete theInstance_;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   142
		theInstance_ = NULL; 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   143
	}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   144
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   145
	lucene::analysis::Analyzer& Analysis::getDefaultAnalyzer() {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   146
		// TODO: Assert( theInstance_ );
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   147
		return *theInstance_->defaultAnalyzer_; 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   148
	}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   149
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   150
	lucene::analysis::Analyzer& Analysis::getQueryAnalyzer() {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   151
		// TODO: Assert( theInstance_ );
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   152
		return *theInstance_->queryAnalyzer_; 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   153
	}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   154
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   155
	lucene::analysis::Analyzer& Analysis::getPrefixAnalyzer() {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   156
		// TODO: Assert( theInstance_ );
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   157
		return *theInstance_->prefixAnalyzer_; 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   158
	}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 7
diff changeset
   159
1
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   160
	PrefixGenerator::PrefixGenerator(
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   161
		lucene::analysis::TokenStream* in, 
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   162
		bool deleteTS, 
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   163
		size_t maxPrefixLength) 
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   164
	: 	TokenFilter(in, deleteTS),
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   165
	  	token_(), 
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   166
	  	prefixLength_(0),
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   167
	  	maxPrefixLength_(maxPrefixLength) {}
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   168
	
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   169
	
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   170
	PrefixGenerator::~PrefixGenerator() {
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   171
	}
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   172
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   173
	
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   174
	bool PrefixGenerator::next(lucene::analysis::Token* token) {
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   175
		token_.setPositionIncrement(0); 
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   176
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   177
		while (prefixLength_ == 0) {
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   178
			token_.setPositionIncrement(1); // default position increment
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   179
			if (!input->next(&token_)) {
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   180
				return false;
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   181
			}
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   182
			prefixLength_ = std::min(token_.termTextLength(), maxPrefixLength_);
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   183
		}
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   184
			
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   185
		// Clip token
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   186
		std::wstring clipped; 
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   187
		clipped = token_.termText();
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   188
		token_.setText(clipped.substr(0, prefixLength_).c_str());
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   189
		
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   190
		// Copy
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   191
		token->set(token_.termText(), token_.startOffset(), token_.endOffset(), token_.type());
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   192
		token->setPositionIncrement(token_.getPositionIncrement());
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   193
		
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   194
		// Reduce prefixLength_
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   195
		prefixLength_--;
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   196
		return true; 
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   197
	}
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   198
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   199
    AggregateFieldTokenStream::AggregateFieldTokenStream(lucene::analysis::Analyzer& analyzer, 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   200
                                                         DocumentFieldIterator* fields) 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   201
	: stream_(), analyzer_( analyzer ), reader_(), fields_( fields ) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   202
        getNextStream(); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   203
    }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   204
													   
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   205
    AggregateFieldTokenStream::~AggregateFieldTokenStream() {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   206
        _CLDELETE( stream_ ); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   207
        delete fields_; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   208
    }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   209
	
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   210
    bool AggregateFieldTokenStream::next(lucene::analysis::Token* token) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   211
        while ( stream_ ) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   212
            if ( stream_->next( token ) ) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   213
                return true;
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   214
            }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   215
            getNextStream();
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   216
        }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   217
        return false;
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   218
    }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   219
		
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   220
    void AggregateFieldTokenStream::close() {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   221
        if (stream_) stream_->close(); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   222
        _CLDELETE( stream_ ); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   223
        _CLDELETE( reader_ ); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   224
    }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   225
	
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   226
    void AggregateFieldTokenStream::getNextStream()
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   227
    {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   228
        using namespace lucene::document;
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   229
        using namespace lucene::util; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   230
	
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   231
        if ( stream_ ) stream_->close(); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   232
        _CLDELETE( stream_ ); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   233
        _CLDELETE( reader_ );
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   234
		
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   235
        Field* field = 0; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   236
        while (*fields_ && field == NULL)
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   237
            {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   238
                field = (*fields_)++;
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   239
	
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   240
                if (!field->isAggregated()) 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   241
                    {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   242
						field = 0;
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   243
                    }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   244
            }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   245
        if (field) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   246
            if (field->stringValue() != NULL)
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   247
                {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   248
                    reader_ = _CLNEW CL_NS(util)::StringReader(field->stringValue(),_tcslen(field->stringValue()),false);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   249
                }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   250
            else if (field->native().readerValue() != NULL)
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   251
                {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   252
                    Reader* r = field->native().readerValue();
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   253
                    FileReaderProxy
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   254
                        * frp = 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   255
                        dynamic_cast<FileReaderProxy*>(r);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   256
                    if (frp == NULL)
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   257
                        {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   258
                            _CLTHROWA(CL_ERR_IO, AGGR_NONFILEREADERPROXY_ERR);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   259
                        }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   260
                    else
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   261
                        {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   262
                            reader_ = frp->clone();
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   263
                        }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   264
                }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   265
            else
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   266
                {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   267
                    _CLTHROWA(CL_ERR_IO, AGGR_STREAMREADER_ERR);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   268
                }
7
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 2
diff changeset
   269
            if(field->isFreeText())
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 2
diff changeset
   270
                {
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 2
diff changeset
   271
                    using namespace lucene::analysis;
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 2
diff changeset
   272
                    stream_ = _CLNEW standard::StandardTokenizer(reader_);
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 2
diff changeset
   273
                    stream_ = _CLNEW standard::StandardFilter(stream_,true);
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 2
diff changeset
   274
                    stream_ = _CLNEW LowerCaseFilter(stream_,true);
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 2
diff changeset
   275
                }
21
2c484ac32ef0 Revision: 201035
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 10
diff changeset
   276
			// if it is phonenumber, use phone number analyser
2c484ac32ef0 Revision: 201035
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 10
diff changeset
   277
            else if(field->isPhoneNumber())
2c484ac32ef0 Revision: 201035
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 10
diff changeset
   278
                {
2c484ac32ef0 Revision: 201035
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 10
diff changeset
   279
                    lucene::analysis::Analyzer *PhoneNumerAnalyzer_;
2c484ac32ef0 Revision: 201035
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 10
diff changeset
   280
                    
2c484ac32ef0 Revision: 201035
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 10
diff changeset
   281
                    PhoneNumerAnalyzer_  = _CLNEW lucene::analysis::PhoneNumberAnalyzer(); 
2c484ac32ef0 Revision: 201035
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 10
diff changeset
   282
                    stream_ = PhoneNumerAnalyzer_->tokenStream( field->name(), reader_ );
2c484ac32ef0 Revision: 201035
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 10
diff changeset
   283
                    _CLDELETE(PhoneNumerAnalyzer_); 
2c484ac32ef0 Revision: 201035
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 10
diff changeset
   284
                }
7
a5fbfefd615f Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 2
diff changeset
   285
            else
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   286
            stream_ = analyzer_.tokenStream( field->name(), reader_ ); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   287
        }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   288
    }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   289
	
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   290
		
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   291
    AggregateFieldAnalyzer::AggregateFieldAnalyzer(Cpix::Document& document, 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   292
                                                   lucene::analysis::Analyzer& analyzer) 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   293
	:	analyzer_(analyzer), document_(document)
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   294
    {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   295
    }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   296
	
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   297
    lucene::analysis::TokenStream* AggregateFieldAnalyzer::tokenStream(const TCHAR     * fieldName, 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   298
                                                                       lucene::util::Reader * reader) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   299
        if ( wcscmp( fieldName, LCPIX_DEFAULT_FIELD ) == 0 ) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   300
            return new AggregateFieldTokenStream( analyzer_, document_.fields()); 
1
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   301
        } else if ( wcscmp( fieldName, LCPIX_DEFAULT_PREFIX_FIELD ) == 0 ) {
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   302
            return
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   303
				new PrefixGenerator(
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   304
					new AggregateFieldTokenStream( analyzer_, document_.fields()),
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   305
					true,
6f2c1c46032b Revision: 201015
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
   306
					OPTIMIZED_PREFIX_MAX_LENGTH);
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   307
        } else {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   308
            return analyzer_.tokenStream( fieldName, reader ); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   309
        }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   310
    }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   311
	
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   312
    SystemAnalyzer::SystemAnalyzer(lucene::analysis::Analyzer* analyzer) : analyzer_(analyzer) {} 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   313
    SystemAnalyzer::~SystemAnalyzer() { _CLDELETE(analyzer_); }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   314
	
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   315
    lucene::analysis::TokenStream* SystemAnalyzer::tokenStream(const TCHAR      	* fieldName, 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   316
                                                               lucene::util::Reader * reader) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   317
        using namespace lucene::analysis; 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   318
        if ( wcscmp( fieldName, LCPIX_DEFAULT_FIELD ) == 0 ) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   319
            // Use standard analyzer without stop filter for this task
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   320
            TokenStream* ret = _CLNEW standard::StandardTokenizer(reader);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   321
            ret = _CLNEW standard::StandardFilter(ret,true);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   322
            ret = _CLNEW LowerCaseFilter(ret,true);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   323
            return ret;
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   324
        } else if (wcscmp( fieldName, LCPIX_DOCUID_FIELD) == 0){
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   325
            // Use standard analyzer without stop filter for this task
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   326
            return  _CLNEW KeywordTokenizer(reader);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   327
        } else if (wcscmp( fieldName, LCPIX_APPCLASS_FIELD )  == 0){
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   328
            // Use standard analyzer without stop filter for this task
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   329
            TokenStream* ret = _CLNEW WhitespaceTokenizer(reader);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   330
            ret = _CLNEW LowerCaseFilter(ret,true);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   331
            return ret;
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   332
        } else if (wcscmp( fieldName, LCPIX_MIMETYPE_FIELD ) == 0) {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   333
            TokenStream* ret = _CLNEW KeywordTokenizer(reader);
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   334
            return ret;
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   335
        } else {
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   336
            return analyzer_->tokenStream( fieldName, reader ); 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   337
        }									 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   338
    }
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   339
}
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   340