searchengine/oss/cl/clucene/src/clucene/analysis/standard/standardtokenizer.cpp
changeset 15 cf5c74390b98
parent 10 afe194b6b1cd
child 18 3e1f76dd2722
--- a/searchengine/oss/cl/clucene/src/clucene/analysis/standard/standardtokenizer.cpp	Tue Jul 06 15:30:04 2010 +0300
+++ b/searchengine/oss/cl/clucene/src/clucene/analysis/standard/standardtokenizer.cpp	Wed Aug 18 10:53:26 2010 +0300
@@ -65,7 +65,7 @@
   /* otherMatches is a condition (possibly compound) under which a character
   ** that's not an ALNUM or UNDERSCORE can be considered not to break the
   ** span.  Callers should pass false if only ALNUM/UNDERSCORE are acceptable. */
-  #define CONSUME_WORD                  _CONSUME_AS_LONG_AS(ALNUM || UNDERSCORE)
+  #define CONSUME_WORD                  _CONSUME_AS_LONG_AS(ALNUM /*|| UNDERSCORE*/)
   
   /*
   ** Consume CJK characters
@@ -148,7 +148,7 @@
 	  // CHANGED
 	  } else if (SPACE) {
         continue;
-      } else if (ALPHA || UNDERSCORE) {
+      } else if (ALPHA) {
         tokenStart = rdPos;
         return ReadAlphaNum(ch,t);
       } else if (DIGIT || NEGATIVE_SIGN_ || DECIMAL) {
@@ -241,8 +241,8 @@
     TCHAR rightmost = RIGHTMOST(str);
     /* Don't including a trailing decimal point. */
     if(ALPHA){
-            return ReadAlphaNum(prev,t);  
-     }
+        CONSUME_WORD;
+    }
     if (rightmost == '.') {
       SHAVE_RIGHTMOST(str);
       unReadChar();