searchengine/cpix/cpix/src/analyzer.cpp
changeset 8 6547bf8ca13a
parent 7 a5fbfefd615f
child 19 e3c09e9691e0
--- a/searchengine/cpix/cpix/src/analyzer.cpp	Fri Jun 11 14:43:47 2010 +0300
+++ b/searchengine/cpix/cpix/src/analyzer.cpp	Mon Jun 28 10:34:53 2010 +0530
@@ -15,30 +15,36 @@
 *
 */
 
-
-#include "CLucene.h"
-#include "CLucene/analysis/AnalysisHeader.h"
-#include "CLucene/analysis/Analyzers.h"
-
-#include "analyzer.h"
-#include "analyzerexp.h"
-#include "cpixanalyzer.h"
-#include "cluceneext.h"
-
-#include "cpixexc.h"
-#include "cpixparsetools.h"
-
+// general utilities
 #include "wchar.h"
 #include <string>
 #include <vector>
 #include <sstream>
 #include <iostream>
+#include <fstream>
+#include <algorithm>
 
-#include "document.h"
+// clucene
+#include "CLucene.h"
+
+// support
+#include "cpixparsetools.h"
+#include "cpixfstools.h"
 
-#include "indevicecfg.h" 
+// internal
+#include "analyzer.h"
+#include "cpixanalyzer.h"
+#include "cpixexc.h"
+#include "document.h"
+#include "cluceneext.h"
+#include "indevicecfg.h"
+#include "initparams.h"
+#include "thaianalysis.h"
 
-#include "initparams.h"
+#include "analyzerexp.h"
+#include "customanalyzer.h"
+#include "common/cpixlog.h"
+
 namespace
 {
     const char AGGR_NONFILEREADERPROXY_ERR[] 
@@ -46,11 +52,111 @@
 
     const char AGGR_STREAMREADER_ERR[] 
     = "Aggregating streamValue-fields not implemented";
+    
+    const char THAI_LANGUAGE_FILE[] 
+    = "thaidict.sm";
+
+    const char ANALYZER_FILE[]
+    = "analyzer.loc";
+
+    const wchar_t DEFAULT_ANALYZER_CONFIG[]
+        = L"default";
+
+    const wchar_t QUERY_ANALYZER_CONFIG[]
+        = L"query";
+
+    const wchar_t PREFIX_ANALYZER_CONFIG[]
+        = L"prefix";
+
+//    const wchar_t CPIX_ANALYZER_FALLBACK[]
+//    = CPIX_ANALYZER_STANDARD;
+//
+//    const wchar_t CPIX_PREFIX_ANALYZER_FALLBACK[]
+//    = CPIX_TOKENIZER_LETTER L">" CPIX_FILTER_LOWERCASE;
+
+    
 }
 
 
 namespace Cpix {
 
+	
+Analysis* Analysis::theInstance_ = NULL; 
+
+	void Analysis::init(InitParams& ip) {
+		// Init thai analysis with thai dictionary
+		std::string thai( Cpt::appendpath(ip.getResourceDir(),
+										  THAI_LANGUAGE_FILE) );
+		
+		if ( Cpt::filesize( thai.c_str() ) ) {
+			analysis::InitThaiAnalysis(thai.c_str());
+		} else {
+			logMsg(CPIX_LL_WARNING,
+				   "Thai dictionary could not be found. Thai analysis will NOT work.");
+		}
+	
+		// Setup the analysis instance
+		theInstance_ = new Analysis(ip);
+	}
+	
+	Analysis::Analysis(InitParams& ip) 
+	:	defaultAnalyzer_(),
+	 	queryAnalyzer_(), 
+		prefixAnalyzer_() {
+		
+		auto_ptr<AnalyzerExp::Piping> p = parse( Cpt::appendpath( ip.getResourceDir(), ANALYZER_FILE ) );
+		
+		defaultAnalyzer_.reset( new CustomAnalyzer( *p, DEFAULT_ANALYZER_CONFIG ) ); 
+		queryAnalyzer_.reset( new CustomAnalyzer( *p, QUERY_ANALYZER_CONFIG ) ); 
+		prefixAnalyzer_.reset( new CustomAnalyzer( *p, PREFIX_ANALYZER_CONFIG ) ); 
+	}
+	
+	auto_ptr<AnalyzerExp::Piping> Analysis::parse(std::string path) {
+		std::wifstream in(path.c_str());
+		auto_ptr<AnalyzerExp::Piping> ret; 
+		if ( in ) {
+		
+			// Reserve constant size buffer and populate it with definition
+			//
+			int filesize = Cpt::filesize(path.c_str()); 
+			Cpt::auto_array<wchar_t> buf( new wchar_t[filesize+1] );
+			in.read(buf.get(), filesize);
+			buf.get()[filesize] = '\0'; 
+			if ( !in.fail() ) {
+				try {
+					ret = AnalyzerExp::ParsePiping( buf.get() );
+				} catch (...) {}
+			} 
+			in.close();
+		} 
+		
+		if ( !ret.get() ) { 
+			THROW_CPIXEXC("Analyzer definition not found. %s could not be opened. ", path.c_str()); 
+		}
+		return ret; 
+	}
+	
+	void Analysis::shutdown() {
+		analysis::ShutdownThaiAnalysis(); 
+		delete theInstance_;
+		theInstance_ = NULL; 
+	}
+
+	lucene::analysis::Analyzer& Analysis::getDefaultAnalyzer() {
+		// TODO: Assert( theInstance_ );
+		return *theInstance_->defaultAnalyzer_; 
+	}
+
+	lucene::analysis::Analyzer& Analysis::getQueryAnalyzer() {
+		// TODO: Assert( theInstance_ );
+		return *theInstance_->queryAnalyzer_; 
+	}
+
+	lucene::analysis::Analyzer& Analysis::getPrefixAnalyzer() {
+		// TODO: Assert( theInstance_ );
+		return *theInstance_->prefixAnalyzer_; 
+	}
+
 	PrefixGenerator::PrefixGenerator(
 		lucene::analysis::TokenStream* in, 
 		bool deleteTS, 
@@ -221,488 +327,5 @@
             return analyzer_->tokenStream( fieldName, reader ); 
         }									 
     }
-
-    //
-    // Following sections provide the glue code for connecting the 
-    // analyzer definition syntax with analyzer, tokenizers and filter 
-    // implementations. 
-    //
-    // The glue code is template heavy with the indent of providing 
-    // automation for associating specific keywords with specific
-    // analyzers, tokenizers and filters implementing corresponding 
-    // CLucene abstractions. Additional classes are needed only if 
-    // filters, tokenizers, etc. accept parameters.
-    //
-    // NOTE: To understand the analyzers, it is sufficient to understand
-    // that an analyzer transforms characters stream into specific token streams 
-    // (e.g. character stream 'foobarmetawords' can be transformed into token 
-    // stream 'foo', 'bar' 'meta' 'words'). Analysis consist of two main
-    // parts which are tokenization and filtering. Tokenization converts
-    // the character stream into token stream (e.g. 'FoO bAr' -> 'FoO' 'bAr')
-    // and filtering modifies the tokens (e.g. lowercase filtering 'FoO' -> 
-    // 'foo', 'bAr' -> 'bar'). Analyzer as an object is responsible for
-    // constructing a tokenizer and a sequence of filters to perform
-    // these required tasks.  
-    // 
-    // See the documentation around TokenizerClassEntries and 
-    // FilterClassEntries to see how implementations not taking parameters
-    // can be easily added.  
-    // 
-
-    using namespace Cpix::AnalyzerExp;
-    
-    /**
-     * Creates token stream for the given reader and fieldName.
-     * This class in in many ways similar to CLucene analyzer class 
-     * definition.   
-     */
-    class TokenStreamFactory {
-    public: 
-        virtual ~TokenStreamFactory(); 
-        virtual lucene::analysis::TokenStream* tokenStream(const wchar_t        * fieldName, 
-                                                           lucene::util::Reader * reader) = 0;
-    };
-	
-    TokenStreamFactory::~TokenStreamFactory() {};
-	
-    /**
-     * Template class used to create CLucene tokenizers. Template
-     * parameter T must implement lucene::analysis::Tokenizer abstraction.  
-     */    
-    template<class T>
-    class TokenizerFactory : public TokenStreamFactory 
-    {
-    public:
-        TokenizerFactory(const Invokation& invokation) {
-            if (invokation.params().size() > 0) {
-                THROW_CPIXEXC(L"Tokenizer %S does not accept parameters",
-                              invokation.id().c_str());
-            }
-        }
-        virtual lucene::analysis::TokenStream* tokenStream(const TCHAR          * /*fieldName*/, 
-                                                           lucene::util::Reader * reader) {
-            return _CLNEW T(reader); 
-        }
-    };
-
-    /**
-     * Template class wrapping CLucene analyzers. Template parameter T must 
-     * implement lucene::analysis::Analyzer abstraction.  
-     */    
-    template<class T>
-    class AnalyzerWrap : public TokenStreamFactory 
-    {
-    public:
-        AnalyzerWrap(const Invokation& invokation) : analyzer_() {
-            if (invokation.params().size() > 0) {
-                THROW_CPIXEXC(L"Tokenizer %S does not accept parameters",
-                              invokation.id().c_str());
-            }
-        }
-        virtual lucene::analysis::TokenStream* tokenStream(const TCHAR          * fieldName, 
-                                                           lucene::util::Reader * reader) {
-            return analyzer_.tokenStream(fieldName, reader); 
-        }
-    private: 
-        T analyzer_;
-    };
-
-    /**
-     * Template class associated with CLucene filter and a TokenStreamFactory. 
-     * Uses TokenStreamFactory to transform given character stream into tokenstream
-     * and then applies the given Clucene filter to the token stream. 
-     * The template parameter T must implement lucene::analysis::Filter abstraction.     
-     */    
-    template<class T>
-    class FilterFactory : public TokenStreamFactory 
-    {
-    public:
-        FilterFactory(const Invokation& invokation, auto_ptr<TokenStreamFactory> factory) : factory_(factory) {
-            if (invokation.params().size() > 0) {
-                THROW_CPIXEXC(L"Filter %S does not accept parameters",
-                              invokation.id().c_str());
-            }
-        }
-        virtual lucene::analysis::TokenStream* tokenStream(const TCHAR          * fieldName, 
-                                                           lucene::util::Reader * reader) {
-            return _CLNEW T(factory_->tokenStream(fieldName, reader), true); 
-        }
-    private: 
-        std::auto_ptr<TokenStreamFactory> factory_; 
-    };
-
-	/**
-	 * Specialized Analyzer wrap for CLucene's PerFieldAnalyzer. Specialized
-	 * template is needed because perfield analyzer accepts parameters
-	 * (specific analyzers for different field plus default analyzer)
-	 */
-    template<>
-    class AnalyzerWrap<lucene::analysis::PerFieldAnalyzerWrapper> : public TokenStreamFactory {
-    public:
-        AnalyzerWrap(const Switch& sw) : analyzer_(0) {
-            using namespace Cpt::Parser;
-            using namespace lucene::analysis;
-			
-            analyzer_ = _CLNEW PerFieldAnalyzerWrapper(_CLNEW CustomAnalyzer(sw.def()));
-			
-            for (int i = 0; i < sw.cases().size(); i++) {
-                const Case& cs = *sw.cases()[i];
-                for (int j = 0; j < cs.fields().size(); j++) {
-                    analyzer_->addAnalyzer( cs.fields()[j].c_str(), _CLNEW CustomAnalyzer( cs.piping() ) );
-                }
-            }
-        }
-        virtual ~AnalyzerWrap() {
-            _CLDELETE(analyzer_);
-        }
-        virtual lucene::analysis::TokenStream* tokenStream(const TCHAR          * fieldName, 
-                                                           lucene::util::Reader * reader) {
-            return analyzer_->tokenStream(fieldName, reader); 
-        }
-    private: 
-        lucene::analysis::PerFieldAnalyzerWrapper* analyzer_;
-    };
-		
-	
-	
-	/**
-	 * Specialized StopFilter factory. Specialized filter is needed
-	 * because StopFilter needs parameters (stop word list or a language) 
-	 */
-    template<>
-    class FilterFactory<lucene::analysis::StopFilter> : public TokenStreamFactory 
-    {
-    public:
-        FilterFactory(const Invokation& invokation,
-                      auto_ptr<TokenStreamFactory> factory)
-            :words_(0),  ownWords_(0), factory_(factory) {
-            using namespace Cpt::Parser;
-            if (invokation.params().size() == 1 && dynamic_cast<Identifier*>(invokation.params()[0])) {
-                Identifier* id = dynamic_cast<Identifier*>(invokation.params()[0]);
-                //cpix_LangCode lang; 
-                if (id->id() == CPIX_WLANG_EN) {
-                    words_ = lucene::analysis::StopAnalyzer::ENGLISH_STOP_WORDS;
-                } else {
-                    THROW_CPIXEXC(L"No prepared stopword list for language code '%S'",
-                                  id->id().c_str());
-                }
-            } else {
-                ownWords_ = new wchar_t*[invokation.params().size()+1];
-                memset(ownWords_, 0, sizeof(wchar_t*)*(invokation.params().size()+1)); 
-                // FIXE: args may leak
-                for (int i = 0; i < invokation.params().size(); i++) {
-                    StringLit* lit = dynamic_cast<StringLit*>(invokation.params()[i]);
-                    if (lit) {
-                        const wstring& str = lit->text(); 
-                        ownWords_[i] = new wchar_t[str.length()+1]; 
-                        wcscpy(ownWords_[i], str.c_str());
-                    } else {
-                        THROW_CPIXEXC(L"StopFilter accepts only language identifer or list of strings as a parameters.");
-                    }
-                }
-            }
-		
-        }
-        virtual ~FilterFactory() { 
-            if (ownWords_) {
-                for (int i = 0; ownWords_[i]; i++) {
-                    delete[] ownWords_[i]; 
-                }
-                delete[] ownWords_;
-            }
-        }
-        virtual lucene::analysis::TokenStream* tokenStream(const TCHAR          * fieldName, 
-                                                           lucene::util::Reader * reader) {
-            return _CLNEW lucene::analysis::StopFilter(factory_->tokenStream(fieldName, reader), true, ownWords_ ? const_cast<const wchar_t**>(ownWords_) : words_); 
-        }
-    private: 
-        const wchar_t **words_;
-        wchar_t **ownWords_; // owned
-        std::auto_ptr<TokenStreamFactory> factory_; 
-    };
-	
-    /**
-     * Specialized SnowballFilter factory is needed, because SnowballFilter
-     * accepts parameters (the language). 
-     */
-    template<>
-    class FilterFactory<lucene::analysis::SnowballFilter> : public TokenStreamFactory 
-    {
-    public:
-        FilterFactory(const Invokation& invokation, 		
-                      auto_ptr<TokenStreamFactory> factory)
-            : factory_(factory) {
-            using namespace Cpt::Parser;
-            if (invokation.params().size() != 1 || !dynamic_cast<Identifier*>(invokation.params()[0])) {
-                THROW_CPIXEXC(L"Snowball filter takes exactly one identifier as a parameter." );
-            }
-            Identifier* id = dynamic_cast<Identifier*>(invokation.params()[0]);
-            if (id->id() == CPIX_WLANG_EN) {
-                lang_ = cpix_LANG_EN; 
-            } else {
-                THROW_CPIXEXC(L"Language identifier %S is not supported for stemming",
-                              id->id().c_str());
-            }
-        }
-        virtual lucene::analysis::TokenStream* tokenStream(const TCHAR          * fieldName, 
-                                                           lucene::util::Reader * reader) {
-            return _CLNEW lucene::analysis::SnowballFilter(factory_->tokenStream(fieldName, reader), true, lang_); 
-        }
-    private: 
-        cpix_LangCode lang_;
-        std::auto_ptr<TokenStreamFactory> factory_; 
-    };
-
-    /**
-     * Specialized LengthFilter factory is needed, because length filter 
-     * accepts parameters (minimum length and maximum length)
-     */
-    template<>
-    class FilterFactory<lucene::analysis::LengthFilter> : public TokenStreamFactory 
-    {
-    public:
-        FilterFactory(const Invokation& invokation, 
-                      auto_ptr<TokenStreamFactory> factory) 
-            : factory_(factory) {
-            using namespace Cpt::Parser;
-            if (!(invokation.params().empty())) {
-                if (invokation.params().size() != 2 || 
-                        !dynamic_cast<IntegerLit*>(invokation.params()[0]) || 
-                        !dynamic_cast<IntegerLit*>(invokation.params()[1])) {
-                    THROW_CPIXEXC("Length filter takes exactly two integer parameters");
-                }
-                min_ = dynamic_cast<IntegerLit*>(invokation.params()[0])->value();
-                max_ = dynamic_cast<IntegerLit*>(invokation.params()[1])->value();
-            }
-        }
-        virtual lucene::analysis::TokenStream* tokenStream(const TCHAR          * fieldName, 
-                                                           lucene::util::Reader * reader) {
-            return _CLNEW lucene::analysis::LengthFilter(factory_->tokenStream(fieldName, reader), true, min_, max_ ); 
-        }
-    private: 
-        int min_, max_;
-        std::auto_ptr<TokenStreamFactory> factory_; 
-    };
-    
-    /**
-     * Specialized PrefixGenerator factory is needed, because PrefixGenerator
-     * requires the max prefix size. 
-     */
-    template<>
-    class FilterFactory<PrefixGenerator> : public TokenStreamFactory 
-    {
-    public:
-        FilterFactory(const Invokation& invokation, 
-                      auto_ptr<TokenStreamFactory> factory) 
-            : factory_(factory) {
-            using namespace Cpt::Parser;
-            if (invokation.params().empty()) {
-                if (invokation.params().size() != 1 || 
-                    !dynamic_cast<IntegerLit*>(invokation.params()[0])) {
-                    THROW_CPIXEXC("Prefix generator takes exactly one integer parameter");
-                }            
-                maxPrefixLength_ = dynamic_cast<IntegerLit*>(invokation.params()[0])->value();
-            }
-        }
-        virtual lucene::analysis::TokenStream* tokenStream(const TCHAR          * fieldName, 
-                                                           lucene::util::Reader * reader) {
-            return _CLNEW PrefixGenerator(factory_->tokenStream(fieldName, reader), true, maxPrefixLength_ ); 
-        }
-    private: 
-        int maxPrefixLength_;
-        std::auto_ptr<TokenStreamFactory> factory_; 
-    };
-
-
-    typedef auto_ptr<TokenStreamFactory> (*TokenizerFactoryCreator)(const Invokation& invokation);
-    typedef auto_ptr<TokenStreamFactory> (*FilterFactoryCreator)(const Invokation& invokation, 
-                                                                 auto_ptr<TokenStreamFactory> factory);
-    /**
-     * Sets up a tokenizer factory with given invokation parameters
-     */
-    template<class T>
-    struct TokenizerFactoryCtor
-    {
-        static auto_ptr<TokenStreamFactory> create(const Invokation& invokation) {
-            return auto_ptr<TokenStreamFactory>(new TokenizerFactory<T>(invokation)); 
-        }
-    };
-
-    /**
-     * Sets up an analyzer wrap with given invokation parameters
-     */
-    template<class T>
-    struct AnalyzerWrapCtor
-    {
-        static auto_ptr<TokenStreamFactory> create(const Invokation& invokation) {
-            return auto_ptr<TokenStreamFactory>(new AnalyzerWrap<T>(invokation)); 
-        }
-    };
-
-    /**
-	 * Sets up a filter factory with given invokation parameters
-	 */
-	template<class T>
-	struct FilterFactoryCtor 
-    {
-        static auto_ptr<TokenStreamFactory> create(const Invokation& invokation,
-                                                   auto_ptr<TokenStreamFactory> factory) {
-            return auto_ptr<TokenStreamFactory>(new FilterFactory<T>(invokation, factory)); 
-        }
-    };
-
-    struct TokenizerClassEntry {
-        const wchar_t *id_;
-        TokenizerFactoryCreator createFactory_;
-    };
-    
-    //
-    // Following TokenizerClassEntries and FilterClassEntries contain
-    // the mapping from tokenizer/analyzer/filter names into glue code
-    // templates providing the implementations. 
-    // 
-	
-    TokenizerClassEntry TokenizerClassEntries[] = { 
-        {CPIX_TOKENIZER_STANDARD, 	TokenizerFactoryCtor<lucene::analysis::standard::StandardTokenizer>::create},
-        {CPIX_TOKENIZER_WHITESPACE, TokenizerFactoryCtor<lucene::analysis::WhitespaceTokenizer>::create},
-        {CPIX_TOKENIZER_LETTER, 	TokenizerFactoryCtor<lucene::analysis::LetterTokenizer>::create},
-        {CPIX_TOKENIZER_KEYWORD, 	TokenizerFactoryCtor<lucene::analysis::KeywordTokenizer>::create},
-        {CPIX_ANALYZER_STANDARD, 	AnalyzerWrapCtor<lucene::analysis::standard::StandardAnalyzer>::create},
-
-// 		TODO: Add more Tokenizers/Analyzers
-        
-// 		Example tokenizer (works as such if tokenizers don't take parameters)
-//      {CPIX_TOKENIZER_MYTOKENIZER,TokenizerFactoryCtor<MyTokenizer>::create},
-
-// 		Example analyzer (works as such if analyzer don't take parameters)
-//      {CPIX_ANALYZER_MYANALYZER,	AnalyzerWrapCtor<MyAnalyzer>::create},
-
-        {0, 						0}
-    };
-	
-    struct FilterClassEntry {
-        const wchar_t *id_;
-        FilterFactoryCreator createFactory_;
-    };
-
-    FilterClassEntry FilterClassEntries[] = {
-        {CPIX_FILTER_STANDARD, 	FilterFactoryCtor<lucene::analysis::standard::StandardFilter>::create},
-        {CPIX_FILTER_LOWERCASE, FilterFactoryCtor<lucene::analysis::LowerCaseFilter>::create},
-        {CPIX_FILTER_ACCENT, 	FilterFactoryCtor<lucene::analysis::ISOLatin1AccentFilter>::create},
-        {CPIX_FILTER_STOP, 		FilterFactoryCtor<lucene::analysis::StopFilter>::create},
-        {CPIX_FILTER_STEM, 		FilterFactoryCtor<lucene::analysis::SnowballFilter>::create},
-        {CPIX_FILTER_LENGTH, 	FilterFactoryCtor<lucene::analysis::LengthFilter>::create},
-        {CPIX_FILTER_PREFIXES, 	FilterFactoryCtor<PrefixGenerator>::create},
-
-// 		TODO: Add more Filters
-
-// 		Example filter (works as such if analyzer don't take parameters)
-//      {CPIX_FILTER_MYFILTER,	FilterFactoryCtor<MyFilter>::create},
-
-        {0, 					0}
-    };
-	
-    CustomAnalyzer::CustomAnalyzer(const wchar_t* definition)
-    {
-        using namespace Cpt::Lex;
-        using namespace Cpt::Parser;
-
-
-        try
-            {
-				// 1. Setup an tokenizer
-                Cpix::AnalyzerExp::Tokenizer 
-                    tokenizer; 
-                StdLexer 
-                    lexer(tokenizer, definition);
-                
-                // 2. Parse 
-                std::auto_ptr<Piping> 
-                    def = ParsePiping(lexer); 
-                lexer.eatEof();
-                
-                // 3. Setup this item based on parsed definition
-                setup(*def);
-            }
-        catch (Cpt::ITxtCtxtExc & exc)
-            {
-                // provide addition info for thrown exception
-                exc.setContext(definition);
-
-                // throw it fwd
-                throw;
-            }
-    }
-
-    CustomAnalyzer::CustomAnalyzer(const Piping& definition)
-    {	
-        setup(definition);
-    }
-    using namespace Cpt::Parser;
-	
-    void CustomAnalyzer::setup(const Piping& piping) {
-    
-		// If the first item is invokation, create corresponding analyzer/tokenizer 
-        if (dynamic_cast<const Invokation*>(&piping.tokenizer())) 
-        {
-            const Invokation& tokenizer = dynamic_cast<const Invokation&>(piping.tokenizer());
-            TokenizerClassEntry& tokenizerEntry = getTokenizerEntry( tokenizer.id() ); 
-            factory_ = tokenizerEntry.createFactory_( tokenizer );
-        } else {
-            // If the first item is switch statement, create per-field analyzer 
-            const Switch& tokenizer = dynamic_cast<const Switch&>(piping.tokenizer());
-            factory_ = new AnalyzerWrap<lucene::analysis::PerFieldAnalyzerWrapper>( tokenizer );
-        }
-        
-        // Add filters
-        const std::vector<Invokation*>& filters = piping.filters(); 
-        for (int i = 0; i < filters.size(); i++) {
-            FilterClassEntry& filterEntry = getFilterEntry( filters[i]->id() ); 
-            factory_ = filterEntry.createFactory_( *filters[i], factory_ );
-        }
-    }
-
-    TokenizerClassEntry& CustomAnalyzer::getTokenizerEntry(std::wstring id) {
-    
-		// Looks for a match in the TokenizerClassEntries. After finding 
-		// a match it returns a proper tokenizer/analyzer implementation provider 
-		// 
-        for (int i = 0; TokenizerClassEntries[i].id_; i++) {
-            if (id == std::wstring(TokenizerClassEntries[i].id_)) {
-                return TokenizerClassEntries[i];
-            }
-        }
-
-        THROW_CPIXEXC(L"Unknown tokenizer '%S'.",
-                      id.c_str());
-    }
-
-    FilterClassEntry& CustomAnalyzer::getFilterEntry(std::wstring id) {
-    
-		// Looks for a match in the FilterClassEntries. After finding 
-		// a match it returns a proper tokenizer/analyzer implementation 
-		// provider 
-		// 
-        for (int i = 0; FilterClassEntries[i].id_; i++) {
-            if (id == std::wstring(FilterClassEntries[i].id_)) {
-                return FilterClassEntries[i];
-            }
-        }
-
-        THROW_CPIXEXC(L"Unknown filter '%S'.",
-                      id.c_str());
-    }
-	
-    CustomAnalyzer::~CustomAnalyzer() {} 
-
-    lucene::analysis::TokenStream* CustomAnalyzer::tokenStream(const wchar_t        * fieldName, 
-                                                               lucene::util::Reader * reader) {
-        // Utilizes the the token stream factory to form token stream. 
-        // token stream factory is prepared during custom analyzer construction
-        // and based on the analyzer definition string.
-                                                               
-        return factory_->tokenStream(fieldName, reader);
-    }
-
 }