diff -r 3e1f76dd2722 -r 2c484ac32ef0 searchengine/oss/cl/clucene/src/clucene/analysis/analyzers.h --- a/searchengine/oss/cl/clucene/src/clucene/analysis/analyzers.h Thu Sep 02 21:37:32 2010 +0300 +++ b/searchengine/oss/cl/clucene/src/clucene/analysis/analyzers.h Fri Sep 17 08:35:54 2010 +0300 @@ -100,6 +100,20 @@ bool isTokenChar(const TCHAR c) const; }; +/** A PhoneNumberTokenizer is a tokenizer that strips a number to its subset. + * ex: A number 567 is tokenized as 567, 67 and 7. This is introduced to make + * number/word searchable from middle*/ +class PhoneNumberTokenizer: public Tokenizer { +private: + const wchar_t *termText; + int32_t tokenLen; + int32_t termLen; +public: + /** Construct a new PhoneNumberTokenizer. */ + PhoneNumberTokenizer(CL_NS(util)::Reader* input); + ~PhoneNumberTokenizer(){} + bool next(Token* token); +}; /** An Analyzer that uses WhitespaceTokenizer. */ class WhitespaceAnalyzer: public Analyzer { @@ -285,6 +299,13 @@ virtual ~KeywordAnalyzer(){} }; + +/** An Analyzer that uses PhoneNumberTokenizer. */ +class PhoneNumberAnalyzer: public Analyzer { +public: + TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader); + virtual ~PhoneNumberAnalyzer(){} +}; /** * Removes words that are too long and too short from the stream.