searchengine/oss/cl/clucene/src/clucene/analysis/analyzers.h
changeset 21 2c484ac32ef0
parent 0 671dee74050a
equal deleted inserted replaced
18:3e1f76dd2722 21:2c484ac32ef0
    98 	/** Collects only characters which do not satisfy _istspace.
    98 	/** Collects only characters which do not satisfy _istspace.
    99 	*/
    99 	*/
   100 	bool isTokenChar(const TCHAR c) const;
   100 	bool isTokenChar(const TCHAR c) const;
   101 };
   101 };
   102 
   102 
       
   103 /** A PhoneNumberTokenizer is a tokenizer that strips a number to its subset.
       
   104  * ex: A number 567 is tokenized as 567, 67 and 7. This is introduced to make
       
   105  * number/word searchable from middle*/
       
   106 class PhoneNumberTokenizer: public Tokenizer {
       
   107 private:
       
   108     const wchar_t *termText;
       
   109     int32_t tokenLen;
       
   110     int32_t termLen;
       
   111 public:
       
   112     /** Construct a new PhoneNumberTokenizer. */ 
       
   113     PhoneNumberTokenizer(CL_NS(util)::Reader* input);
       
   114     ~PhoneNumberTokenizer(){}
       
   115     bool next(Token* token);
       
   116 };
   103 
   117 
   104 /** An Analyzer that uses WhitespaceTokenizer. */
   118 /** An Analyzer that uses WhitespaceTokenizer. */
   105 class WhitespaceAnalyzer: public Analyzer {
   119 class WhitespaceAnalyzer: public Analyzer {
   106  public:
   120  public:
   107   TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
   121   TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
   283 public:
   297 public:
   284     TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
   298     TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
   285     virtual ~KeywordAnalyzer(){}
   299     virtual ~KeywordAnalyzer(){}
   286 };
   300 };
   287 
   301 
       
   302 
       
   303 /** An Analyzer that uses PhoneNumberTokenizer. */
       
   304 class PhoneNumberAnalyzer: public Analyzer {
       
   305 public:
       
   306     TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
       
   307     virtual ~PhoneNumberAnalyzer(){}
       
   308 };
   288     
   309     
   289 /**
   310 /**
   290  * Removes words that are too long and too short from the stream.
   311  * Removes words that are too long and too short from the stream.
   291  *
   312  *
   292  */
   313  */