searchengine/oss/cl/clucene/src/clucene/analysis/standard/standardtokenizer.cpp
changeset 15 cf5c74390b98
parent 10 afe194b6b1cd
child 18 3e1f76dd2722
equal deleted inserted replaced
10:afe194b6b1cd 15:cf5c74390b98
    63   #define CONSUME_DIGITS _CONSUME_AS_LONG_AS(DIGIT)
    63   #define CONSUME_DIGITS _CONSUME_AS_LONG_AS(DIGIT)
    64 
    64 
    65   /* otherMatches is a condition (possibly compound) under which a character
    65   /* otherMatches is a condition (possibly compound) under which a character
    66   ** that's not an ALNUM or UNDERSCORE can be considered not to break the
    66   ** that's not an ALNUM or UNDERSCORE can be considered not to break the
    67   ** span.  Callers should pass false if only ALNUM/UNDERSCORE are acceptable. */
    67   ** span.  Callers should pass false if only ALNUM/UNDERSCORE are acceptable. */
    68   #define CONSUME_WORD                  _CONSUME_AS_LONG_AS(ALNUM || UNDERSCORE)
    68   #define CONSUME_WORD                  _CONSUME_AS_LONG_AS(ALNUM /*|| UNDERSCORE*/)
    69   
    69   
    70   /*
    70   /*
    71   ** Consume CJK characters
    71   ** Consume CJK characters
    72   */
    72   */
    73   #define CONSUME_CJK                   _CONSUME_AS_LONG_AS(_CJK)
    73   #define CONSUME_CJK                   _CONSUME_AS_LONG_AS(_CJK)
   146 		  if ( ReadThai(ch,t) ) 
   146 		  if ( ReadThai(ch,t) ) 
   147 			  return true; 
   147 			  return true; 
   148 	  // CHANGED
   148 	  // CHANGED
   149 	  } else if (SPACE) {
   149 	  } else if (SPACE) {
   150         continue;
   150         continue;
   151       } else if (ALPHA || UNDERSCORE) {
   151       } else if (ALPHA) {
   152         tokenStart = rdPos;
   152         tokenStart = rdPos;
   153         return ReadAlphaNum(ch,t);
   153         return ReadAlphaNum(ch,t);
   154       } else if (DIGIT || NEGATIVE_SIGN_ || DECIMAL) {
   154       } else if (DIGIT || NEGATIVE_SIGN_ || DECIMAL) {
   155         tokenStart = rdPos;
   155         tokenStart = rdPos;
   156         /* ReadNumber returns NULL if it fails to extract a valid number; in
   156         /* ReadNumber returns NULL if it fails to extract a valid number; in
   239 
   239 
   240     SUCCESSFULLY_EXTRACTED_NUMBER:
   240     SUCCESSFULLY_EXTRACTED_NUMBER:
   241     TCHAR rightmost = RIGHTMOST(str);
   241     TCHAR rightmost = RIGHTMOST(str);
   242     /* Don't including a trailing decimal point. */
   242     /* Don't including a trailing decimal point. */
   243     if(ALPHA){
   243     if(ALPHA){
   244             return ReadAlphaNum(prev,t);  
   244         CONSUME_WORD;
   245      }
   245     }
   246     if (rightmost == '.') {
   246     if (rightmost == '.') {
   247       SHAVE_RIGHTMOST(str);
   247       SHAVE_RIGHTMOST(str);
   248       unReadChar();
   248       unReadChar();
   249       rightmost = RIGHTMOST(str);
   249       rightmost = RIGHTMOST(str);
   250     }
   250     }