searchengine/cpix/cpix/src/analyzer.cpp
changeset 0 671dee74050a
child 1 6f2c1c46032b
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/cpix/cpix/src/analyzer.cpp	Mon Apr 19 14:40:16 2010 +0300
@@ -0,0 +1,622 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description: 
+*
+*/
+
+
+#include "CLucene.h"
+#include "CLucene/analysis/AnalysisHeader.h"
+#include "CLucene/analysis/Analyzers.h"
+
+#include "analyzer.h"
+#include "analyzerexp.h"
+#include "cpixanalyzer.h"
+#include "cluceneext.h"
+
+#include "cpixexc.h"
+#include "cpixparsetools.h"
+
+#include "wchar.h"
+#include <string>
+#include <vector>
+#include <sstream>
+#include <iostream>
+
+#include "document.h"
+
+#include "indevicecfg.h" 
+
+namespace
+{
+    const char AGGR_NONFILEREADERPROXY_ERR[] 
+    = "Aggregated reader field should be FileReaderProxy instance";
+
+    const char AGGR_STREAMREADER_ERR[] 
+    = "Aggregating streamValue-fields not implemented";
+}
+
+
+namespace Cpix {
+
+
+    AggregateFieldTokenStream::AggregateFieldTokenStream(lucene::analysis::Analyzer& analyzer, 
+                                                         DocumentFieldIterator* fields) 
+	: stream_(), analyzer_( analyzer ), reader_(), fields_( fields ) {
+        getNextStream(); 
+    }
+													   
+    AggregateFieldTokenStream::~AggregateFieldTokenStream() {
+        _CLDELETE( stream_ ); 
+        delete fields_; 
+    }
+	
+    bool AggregateFieldTokenStream::next(lucene::analysis::Token* token) {
+        while ( stream_ ) {
+            if ( stream_->next( token ) ) {
+                return true;
+            }
+            getNextStream();
+        }
+        return false;
+    }
+		
+    void AggregateFieldTokenStream::close() {
+        if (stream_) stream_->close(); 
+        _CLDELETE( stream_ ); 
+        _CLDELETE( reader_ ); 
+    }
+	
+    void AggregateFieldTokenStream::getNextStream()
+    {
+        using namespace lucene::document;
+        using namespace lucene::util; 
+	
+        if ( stream_ ) stream_->close(); 
+        _CLDELETE( stream_ ); 
+        _CLDELETE( reader_ );
+		
+        Field* field = 0; 
+        while (*fields_ && field == NULL)
+            {
+                field = (*fields_)++;
+	
+                if (!field->isAggregated()) 
+                    {
+						field = 0;
+                    }
+            }
+        if (field) {
+            if (field->stringValue() != NULL)
+                {
+                    reader_ = _CLNEW CL_NS(util)::StringReader(field->stringValue(),_tcslen(field->stringValue()),false);
+                }
+            else if (field->native().readerValue() != NULL)
+                {
+                    Reader* r = field->native().readerValue();
+                    FileReaderProxy
+                        * frp = 
+                        dynamic_cast<FileReaderProxy*>(r);
+                    if (frp == NULL)
+                        {
+                            _CLTHROWA(CL_ERR_IO, AGGR_NONFILEREADERPROXY_ERR);
+                        }
+                    else
+                        {
+                            reader_ = frp->clone();
+                        }
+                }
+            else
+                {
+                    _CLTHROWA(CL_ERR_IO, AGGR_STREAMREADER_ERR);
+                }
+            stream_ = analyzer_.tokenStream( field->name(), reader_ ); 
+        }
+    }
+	
+		
+    AggregateFieldAnalyzer::AggregateFieldAnalyzer(Cpix::Document& document, 
+                                                   lucene::analysis::Analyzer& analyzer) 
+	:	analyzer_(analyzer), document_(document)
+    {
+    }
+	
+    lucene::analysis::TokenStream* AggregateFieldAnalyzer::tokenStream(const TCHAR     * fieldName, 
+                                                                       lucene::util::Reader * reader) {
+        if ( wcscmp( fieldName, LCPIX_DEFAULT_FIELD ) == 0 ) {
+            return new AggregateFieldTokenStream( analyzer_, document_.fields()); 
+        } else {
+            return analyzer_.tokenStream( fieldName, reader ); 
+        }
+    }
+	
+    SystemAnalyzer::SystemAnalyzer(lucene::analysis::Analyzer* analyzer) : analyzer_(analyzer) {} 
+    SystemAnalyzer::~SystemAnalyzer() { _CLDELETE(analyzer_); }
+	
+    lucene::analysis::TokenStream* SystemAnalyzer::tokenStream(const TCHAR      	* fieldName, 
+                                                               lucene::util::Reader * reader) {
+        using namespace lucene::analysis; 
+        if ( wcscmp( fieldName, LCPIX_DEFAULT_FIELD ) == 0 ) {
+            // Use standard analyzer without stop filter for this task
+            TokenStream* ret = _CLNEW standard::StandardTokenizer(reader);
+            ret = _CLNEW standard::StandardFilter(ret,true);
+            ret = _CLNEW LowerCaseFilter(ret,true);
+            return ret;
+        } else if (wcscmp( fieldName, LCPIX_DOCUID_FIELD) == 0){
+            // Use standard analyzer without stop filter for this task
+            return  _CLNEW KeywordTokenizer(reader);
+        } else if (wcscmp( fieldName, LCPIX_APPCLASS_FIELD )  == 0){
+            // Use standard analyzer without stop filter for this task
+            TokenStream* ret = _CLNEW WhitespaceTokenizer(reader);
+            ret = _CLNEW LowerCaseFilter(ret,true);
+            return ret;
+        } else if (wcscmp( fieldName, LCPIX_MIMETYPE_FIELD ) == 0) {
+            TokenStream* ret = _CLNEW KeywordTokenizer(reader);
+            return ret;
+        } else {
+            return analyzer_->tokenStream( fieldName, reader ); 
+        }									 
+    }
+
+    //
+    // Following sections provide the glue code for connecting the 
+    // analyzer definition syntax with analyzer, tokenizers and filter 
+    // implementations. 
+    //
+    // The glue code is template heavy with the indent of providing 
+    // automation for associating specific keywords with specific
+    // analyzers, tokenizers and filters implementing corresponding 
+    // CLucene abstractions. Additional classes are needed only if 
+    // filters, tokenizers, etc. accept parameters.
+    //
+    // NOTE: To understand the analyzers, it is sufficient to understand
+    // that an analyzer transforms characters stream into specific token streams 
+    // (e.g. character stream 'foobarmetawords' can be transformed into token 
+    // stream 'foo', 'bar' 'meta' 'words'). Analysis consist of two main
+    // parts which are tokenization and filtering. Tokenization converts
+    // the character stream into token stream (e.g. 'FoO bAr' -> 'FoO' 'bAr')
+    // and filtering modifies the tokens (e.g. lowercase filtering 'FoO' -> 
+    // 'foo', 'bAr' -> 'bar'). Analyzer as an object is responsible for
+    // constructing a tokenizer and a sequence of filters to perform
+    // these required tasks.  
+    // 
+    // See the documentation around TokenizerClassEntries and 
+    // FilterClassEntries to see how implementations not taking parameters
+    // can be easily added.  
+    // 
+
+    using namespace Cpix::AnalyzerExp;
+    
+    /**
+     * Creates token stream for the given reader and fieldName.
+     * This class in in many ways similar to CLucene analyzer class 
+     * definition.   
+     */
+    class TokenStreamFactory {
+    public: 
+        virtual ~TokenStreamFactory(); 
+        virtual lucene::analysis::TokenStream* tokenStream(const wchar_t        * fieldName, 
+                                                           lucene::util::Reader * reader) = 0;
+    };
+	
+    TokenStreamFactory::~TokenStreamFactory() {};
+	
+    /**
+     * Template class used to create CLucene tokenizers. Template
+     * parameter T must implement lucene::analysis::Tokenizer abstraction.  
+     */    
+    template<class T>
+    class TokenizerFactory : public TokenStreamFactory 
+    {
+    public:
+        TokenizerFactory(const Invokation& invokation) {
+            if (invokation.params().size() > 0) {
+                THROW_CPIXEXC(L"Tokenizer %S does not accept parameters",
+                              invokation.id().c_str());
+            }
+        }
+        virtual lucene::analysis::TokenStream* tokenStream(const TCHAR          * /*fieldName*/, 
+                                                           lucene::util::Reader * reader) {
+            return _CLNEW T(reader); 
+        }
+    };
+
+    /**
+     * Template class wrapping CLucene analyzers. Template parameter T must 
+     * implement lucene::analysis::Analyzer abstraction.  
+     */    
+    template<class T>
+    class AnalyzerWrap : public TokenStreamFactory 
+    {
+    public:
+        AnalyzerWrap(const Invokation& invokation) : analyzer_() {
+            if (invokation.params().size() > 0) {
+                THROW_CPIXEXC(L"Tokenizer %S does not accept parameters",
+                              invokation.id().c_str());
+            }
+        }
+        virtual lucene::analysis::TokenStream* tokenStream(const TCHAR          * fieldName, 
+                                                           lucene::util::Reader * reader) {
+            return analyzer_.tokenStream(fieldName, reader); 
+        }
+    private: 
+        T analyzer_;
+    };
+
+    /**
+     * Template class associated with CLucene filter and a TokenStreamFactory. 
+     * Uses TokenStreamFactory to transform given character stream into tokenstream
+     * and then applies the given Clucene filter to the token stream. 
+     * The template parameter T must implement lucene::analysis::Filter abstraction.     
+     */    
+    template<class T>
+    class FilterFactory : public TokenStreamFactory 
+    {
+    public:
+        FilterFactory(const Invokation& invokation, auto_ptr<TokenStreamFactory> factory) : factory_(factory) {
+            if (invokation.params().size() > 0) {
+                THROW_CPIXEXC(L"Filter %S does not accept parameters",
+                              invokation.id().c_str());
+            }
+        }
+        virtual lucene::analysis::TokenStream* tokenStream(const TCHAR          * fieldName, 
+                                                           lucene::util::Reader * reader) {
+            return _CLNEW T(factory_->tokenStream(fieldName, reader), true); 
+        }
+    private: 
+        std::auto_ptr<TokenStreamFactory> factory_; 
+    };
+
+	/**
+	 * Specialized Analyzer wrap for CLucene's PerFieldAnalyzer. Specialized
+	 * template is needed because perfield analyzer accepts parameters
+	 * (specific analyzers for different field plus default analyzer)
+	 */
+    template<>
+    class AnalyzerWrap<lucene::analysis::PerFieldAnalyzerWrapper> : public TokenStreamFactory {
+    public:
+        AnalyzerWrap(const Switch& sw) : analyzer_(0) {
+            using namespace Cpt::Parser;
+            using namespace lucene::analysis;
+			
+            analyzer_ = _CLNEW PerFieldAnalyzerWrapper(_CLNEW CustomAnalyzer(sw.def()));
+			
+            for (int i = 0; i < sw.cases().size(); i++) {
+                const Case& cs = *sw.cases()[i];
+                for (int j = 0; j < cs.fields().size(); j++) {
+                    analyzer_->addAnalyzer( cs.fields()[j].c_str(), _CLNEW CustomAnalyzer( cs.piping() ) );
+                }
+            }
+        }
+        virtual ~AnalyzerWrap() {
+            _CLDELETE(analyzer_);
+        }
+        virtual lucene::analysis::TokenStream* tokenStream(const TCHAR          * fieldName, 
+                                                           lucene::util::Reader * reader) {
+            return analyzer_->tokenStream(fieldName, reader); 
+        }
+    private: 
+        lucene::analysis::PerFieldAnalyzerWrapper* analyzer_;
+    };
+		
+	
+	
+	/**
+	 * Specialized StopFilter factory. Specialized filter is needed
+	 * because StopFilter needs parameters (stop word list or a language) 
+	 */
+    template<>
+    class FilterFactory<lucene::analysis::StopFilter> : public TokenStreamFactory 
+    {
+    public:
+        FilterFactory(const Invokation& invokation,
+                      auto_ptr<TokenStreamFactory> factory)
+            :words_(0),  ownWords_(0), factory_(factory) {
+            using namespace Cpt::Parser;
+            if (invokation.params().size() == 1 && dynamic_cast<Identifier*>(invokation.params()[0])) {
+                Identifier* id = dynamic_cast<Identifier*>(invokation.params()[0]);
+                //cpix_LangCode lang; 
+                if (id->id() == CPIX_WLANG_EN) {
+                    words_ = lucene::analysis::StopAnalyzer::ENGLISH_STOP_WORDS;
+                } else {
+                    THROW_CPIXEXC(L"No prepared stopword list for language code '%S'",
+                                  id->id().c_str());
+                }
+            } else {
+                ownWords_ = new wchar_t*[invokation.params().size()+1];
+                memset(ownWords_, 0, sizeof(wchar_t*)*(invokation.params().size()+1)); 
+                // FIXE: args may leak
+                for (int i = 0; i < invokation.params().size(); i++) {
+                    StringLit* lit = dynamic_cast<StringLit*>(invokation.params()[i]);
+                    if (lit) {
+                        const wstring& str = lit->text(); 
+                        ownWords_[i] = new wchar_t[str.length()+1]; 
+                        wcscpy(ownWords_[i], str.c_str());
+                    } else {
+                        THROW_CPIXEXC(L"StopFilter accepts only language identifer or list of strings as a parameters.");
+                    }
+                }
+            }
+		
+        }
+        virtual ~FilterFactory() { 
+            if (ownWords_) {
+                for (int i = 0; ownWords_[i]; i++) {
+                    delete[] ownWords_[i]; 
+                }
+                delete[] ownWords_;
+            }
+        }
+        virtual lucene::analysis::TokenStream* tokenStream(const TCHAR          * fieldName, 
+                                                           lucene::util::Reader * reader) {
+            return _CLNEW lucene::analysis::StopFilter(factory_->tokenStream(fieldName, reader), true, ownWords_ ? const_cast<const wchar_t**>(ownWords_) : words_); 
+        }
+    private: 
+        const wchar_t **words_;
+        wchar_t **ownWords_; // owned
+        std::auto_ptr<TokenStreamFactory> factory_; 
+    };
+	
+    /**
+     * Specialized SnowballFilter factory is needed, because SnowballFilter
+     * accepts parameters (the language). 
+     */
+    template<>
+    class FilterFactory<lucene::analysis::SnowballFilter> : public TokenStreamFactory 
+    {
+    public:
+        FilterFactory(const Invokation& invokation, 		
+                      auto_ptr<TokenStreamFactory> factory)
+            : factory_(factory) {
+            using namespace Cpt::Parser;
+            if (invokation.params().size() != 1 || !dynamic_cast<Identifier*>(invokation.params()[0])) {
+                THROW_CPIXEXC(L"Snowball filter takes exactly one identifier as a parameter." );
+            }
+            Identifier* id = dynamic_cast<Identifier*>(invokation.params()[0]);
+            if (id->id() == CPIX_WLANG_EN) {
+                lang_ = cpix_LANG_EN; 
+            } else {
+                THROW_CPIXEXC(L"Language identifier %S is not supported for stemming",
+                              id->id().c_str());
+            }
+        }
+        virtual lucene::analysis::TokenStream* tokenStream(const TCHAR          * fieldName, 
+                                                           lucene::util::Reader * reader) {
+            return _CLNEW lucene::analysis::SnowballFilter(factory_->tokenStream(fieldName, reader), true, lang_); 
+        }
+    private: 
+        cpix_LangCode lang_;
+        std::auto_ptr<TokenStreamFactory> factory_; 
+    };
+
+    /**
+     * Specialized LengthFilter factory is needed, because length filter 
+     * accepts parameters (minimum length and maximum length)
+     */
+    template<>
+    class FilterFactory<lucene::analysis::LengthFilter> : public TokenStreamFactory 
+    {
+    public:
+        FilterFactory(const Invokation& invokation, 
+                      auto_ptr<TokenStreamFactory> factory) 
+            : factory_(factory) {
+            using namespace Cpt::Parser;
+            if (invokation.params().size() != 2 || 
+                !dynamic_cast<IntegerLit*>(invokation.params()[0]) || 
+                !dynamic_cast<IntegerLit*>(invokation.params()[1])) {
+                THROW_CPIXEXC("Length filter takes exactly two integer parameters");
+            }
+            min_ = dynamic_cast<IntegerLit*>(invokation.params()[0])->value();
+            max_ = dynamic_cast<IntegerLit*>(invokation.params()[1])->value();
+        }
+        virtual lucene::analysis::TokenStream* tokenStream(const TCHAR          * fieldName, 
+                                                           lucene::util::Reader * reader) {
+            return _CLNEW lucene::analysis::LengthFilter(factory_->tokenStream(fieldName, reader), true, min_, max_ ); 
+        }
+    private: 
+        int min_, max_;
+        std::auto_ptr<TokenStreamFactory> factory_; 
+    };
+
+    typedef auto_ptr<TokenStreamFactory> (*TokenizerFactoryCreator)(const Invokation& invokation);
+    typedef auto_ptr<TokenStreamFactory> (*FilterFactoryCreator)(const Invokation& invokation, 
+                                                                 auto_ptr<TokenStreamFactory> factory);
+    /**
+     * Sets up a tokenizer factory with given invokation parameters
+     */
+    template<class T>
+    struct TokenizerFactoryCtor
+    {
+        static auto_ptr<TokenStreamFactory> create(const Invokation& invokation) {
+            return auto_ptr<TokenStreamFactory>(new TokenizerFactory<T>(invokation)); 
+        }
+    };
+
+    /**
+     * Sets up an analyzer wrap with given invokation parameters
+     */
+    template<class T>
+    struct AnalyzerWrapCtor
+    {
+        static auto_ptr<TokenStreamFactory> create(const Invokation& invokation) {
+            return auto_ptr<TokenStreamFactory>(new AnalyzerWrap<T>(invokation)); 
+        }
+    };
+
+    /**
+	 * Sets up a filter factory with given invokation parameters
+	 */
+	template<class T>
+	struct FilterFactoryCtor 
+    {
+        static auto_ptr<TokenStreamFactory> create(const Invokation& invokation,
+                                                   auto_ptr<TokenStreamFactory> factory) {
+            return auto_ptr<TokenStreamFactory>(new FilterFactory<T>(invokation, factory)); 
+        }
+    };
+
+    struct TokenizerClassEntry {
+        const wchar_t *id_;
+        TokenizerFactoryCreator createFactory_;
+    };
+    
+    //
+    // Following TokenizerClassEntries and FilterClassEntries contain
+    // the mapping from tokenizer/analyzer/filter names into glue code
+    // templates providing the implementations. 
+    // 
+	
+    TokenizerClassEntry TokenizerClassEntries[] = { 
+        {CPIX_TOKENIZER_STANDARD, 	TokenizerFactoryCtor<lucene::analysis::standard::StandardTokenizer>::create},
+        {CPIX_TOKENIZER_WHITESPACE, TokenizerFactoryCtor<lucene::analysis::WhitespaceTokenizer>::create},
+        {CPIX_TOKENIZER_LETTER, 	TokenizerFactoryCtor<lucene::analysis::LetterTokenizer>::create},
+        {CPIX_TOKENIZER_KEYWORD, 	TokenizerFactoryCtor<lucene::analysis::KeywordTokenizer>::create},
+        {CPIX_ANALYZER_STANDARD, 	AnalyzerWrapCtor<lucene::analysis::standard::StandardAnalyzer>::create},
+
+// 		TODO: Add more Tokenizers/Analyzers
+        
+// 		Example tokenizer (works as such if tokenizers don't take parameters)
+//      {CPIX_TOKENIZER_MYTOKENIZER,TokenizerFactoryCtor<MyTokenizer>::create},
+
+// 		Example analyzer (works as such if analyzer don't take parameters)
+//      {CPIX_ANALYZER_MYANALYZER,	AnalyzerWrapCtor<MyAnalyzer>::create},
+
+        {0, 						0}
+    };
+	
+    struct FilterClassEntry {
+        const wchar_t *id_;
+        FilterFactoryCreator createFactory_;
+    };
+
+    FilterClassEntry FilterClassEntries[] = {
+        {CPIX_FILTER_STANDARD, 	FilterFactoryCtor<lucene::analysis::standard::StandardFilter>::create},
+        {CPIX_FILTER_LOWERCASE, FilterFactoryCtor<lucene::analysis::LowerCaseFilter>::create},
+        {CPIX_FILTER_ACCENT, 	FilterFactoryCtor<lucene::analysis::ISOLatin1AccentFilter>::create},
+        {CPIX_FILTER_STOP, 		FilterFactoryCtor<lucene::analysis::StopFilter>::create},
+        {CPIX_FILTER_STEM, 		FilterFactoryCtor<lucene::analysis::SnowballFilter>::create},
+        {CPIX_FILTER_LENGTH, 	FilterFactoryCtor<lucene::analysis::LengthFilter>::create},
+
+// 		TODO: Add more Filters
+
+// 		Example filter (works as such if analyzer don't take parameters)
+//      {CPIX_FILTER_MYFILTER,	FilterFactoryCtor<MyFilter>::create},
+
+        {0, 					0}
+    };
+	
+    CustomAnalyzer::CustomAnalyzer(const wchar_t* definition)
+    {
+        using namespace Cpt::Lex;
+        using namespace Cpt::Parser;
+
+
+        try
+            {
+				// 1. Setup an tokenizer
+                Cpix::AnalyzerExp::Tokenizer 
+                    tokenizer; 
+                StdLexer 
+                    lexer(tokenizer, definition);
+                
+                // 2. Parse 
+                std::auto_ptr<Piping> 
+                    def = ParsePiping(lexer); 
+                lexer.eatEof();
+                
+                // 3. Setup this item based on parsed definition
+                setup(*def);
+            }
+        catch (Cpt::ITxtCtxtExc & exc)
+            {
+                // provide addition info for thrown exception
+                exc.setContext(definition);
+
+                // throw it fwd
+                throw;
+            }
+    }
+
+    CustomAnalyzer::CustomAnalyzer(const Piping& definition)
+    {	
+        setup(definition);
+    }
+    using namespace Cpt::Parser;
+	
+    void CustomAnalyzer::setup(const Piping& piping) {
+    
+		// If the first item is invokation, create corresponding analyzer/tokenizer 
+        if (dynamic_cast<const Invokation*>(&piping.tokenizer())) 
+        {
+            const Invokation& tokenizer = dynamic_cast<const Invokation&>(piping.tokenizer());
+            TokenizerClassEntry& tokenizerEntry = getTokenizerEntry( tokenizer.id() ); 
+            factory_ = tokenizerEntry.createFactory_( tokenizer );
+        } else {
+            // If the first item is switch statement, create per-field analyzer 
+            const Switch& tokenizer = dynamic_cast<const Switch&>(piping.tokenizer());
+            factory_ = new AnalyzerWrap<lucene::analysis::PerFieldAnalyzerWrapper>( tokenizer );
+        }
+        
+        // Add filters
+        const std::vector<Invokation*>& filters = piping.filters(); 
+        for (int i = 0; i < filters.size(); i++) {
+            FilterClassEntry& filterEntry = getFilterEntry( filters[i]->id() ); 
+            factory_ = filterEntry.createFactory_( *filters[i], factory_ );
+        }
+    }
+
+    TokenizerClassEntry& CustomAnalyzer::getTokenizerEntry(std::wstring id) {
+    
+		// Looks for a match in the TokenizerClassEntries. After finding 
+		// a match it returns a proper tokenizer/analyzer implementation provider 
+		// 
+        for (int i = 0; TokenizerClassEntries[i].id_; i++) {
+            if (id == std::wstring(TokenizerClassEntries[i].id_)) {
+                return TokenizerClassEntries[i];
+            }
+        }
+
+        THROW_CPIXEXC(L"Unknown tokenizer '%S'.",
+                      id.c_str());
+    }
+
+    FilterClassEntry& CustomAnalyzer::getFilterEntry(std::wstring id) {
+    
+		// Looks for a match in the FilterClassEntries. After finding 
+		// a match it returns a proper tokenizer/analyzer implementation 
+		// provider 
+		// 
+        for (int i = 0; FilterClassEntries[i].id_; i++) {
+            if (id == std::wstring(FilterClassEntries[i].id_)) {
+                return FilterClassEntries[i];
+            }
+        }
+
+        THROW_CPIXEXC(L"Unknown filter '%S'.",
+                      id.c_str());
+    }
+	
+    CustomAnalyzer::~CustomAnalyzer() {} 
+
+    lucene::analysis::TokenStream* CustomAnalyzer::tokenStream(const wchar_t        * fieldName, 
+                                                               lucene::util::Reader * reader) {
+        // Utilizes the the token stream factory to form token stream. 
+        // token stream factory is prepared during custom analyzer construction
+        // and based on the analyzer definition string.
+                                                               
+        return factory_->tokenStream(fieldName, reader);
+    }
+
+}
+