searchengine/oss/cl/clucene/src/clucene/analysis/analyzers.h
changeset 21 2c484ac32ef0
parent 0 671dee74050a
--- a/searchengine/oss/cl/clucene/src/clucene/analysis/analyzers.h	Thu Sep 02 21:37:32 2010 +0300
+++ b/searchengine/oss/cl/clucene/src/clucene/analysis/analyzers.h	Fri Sep 17 08:35:54 2010 +0300
@@ -100,6 +100,20 @@
 	bool isTokenChar(const TCHAR c) const;
 };
 
+/** A PhoneNumberTokenizer is a tokenizer that strips a number to its subset.
+ * ex: A number 567 is tokenized as 567, 67 and 7. This is introduced to make
+ * number/word searchable from middle*/
+class PhoneNumberTokenizer: public Tokenizer {
+private:
+    const wchar_t *termText;
+    int32_t tokenLen;
+    int32_t termLen;
+public:
+    /** Construct a new PhoneNumberTokenizer. */ 
+    PhoneNumberTokenizer(CL_NS(util)::Reader* input);
+    ~PhoneNumberTokenizer(){}
+    bool next(Token* token);
+};
 
 /** An Analyzer that uses WhitespaceTokenizer. */
 class WhitespaceAnalyzer: public Analyzer {
@@ -285,6 +299,13 @@
     virtual ~KeywordAnalyzer(){}
 };
 
+
+/** An Analyzer that uses PhoneNumberTokenizer. */
+class PhoneNumberAnalyzer: public Analyzer {
+public:
+    TokenStream* tokenStream(const TCHAR* fieldName, CL_NS(util)::Reader* reader);
+    virtual ~PhoneNumberAnalyzer(){}
+};
     
 /**
  * Removes words that are too long and too short from the stream.