--- a/searchengine/oss/cl/clucene/src/clucene/analysis/standard/standardtokenizer.cpp Tue Jul 06 15:30:04 2010 +0300
+++ b/searchengine/oss/cl/clucene/src/clucene/analysis/standard/standardtokenizer.cpp Wed Aug 18 10:53:26 2010 +0300
@@ -65,7 +65,7 @@
/* otherMatches is a condition (possibly compound) under which a character
** that's not an ALNUM or UNDERSCORE can be considered not to break the
** span. Callers should pass false if only ALNUM/UNDERSCORE are acceptable. */
- #define CONSUME_WORD _CONSUME_AS_LONG_AS(ALNUM || UNDERSCORE)
+ #define CONSUME_WORD _CONSUME_AS_LONG_AS(ALNUM /*|| UNDERSCORE*/)
/*
** Consume CJK characters
@@ -148,7 +148,7 @@
// CHANGED
} else if (SPACE) {
continue;
- } else if (ALPHA || UNDERSCORE) {
+ } else if (ALPHA) {
tokenStart = rdPos;
return ReadAlphaNum(ch,t);
} else if (DIGIT || NEGATIVE_SIGN_ || DECIMAL) {
@@ -241,8 +241,8 @@
TCHAR rightmost = RIGHTMOST(str);
/* Don't including a trailing decimal point. */
if(ALPHA){
- return ReadAlphaNum(prev,t);
- }
+ CONSUME_WORD;
+ }
if (rightmost == '.') {
SHAVE_RIGHTMOST(str);
unReadChar();