equal
deleted
inserted
replaced
253 str.appendChar('.'); |
253 str.appendChar('.'); |
254 return ReadDotted(&str, CL_NS2(analysis,standard)::UNKNOWN,t); |
254 return ReadDotted(&str, CL_NS2(analysis,standard)::UNKNOWN,t); |
255 case '\'': |
255 case '\'': |
256 str.appendChar('\''); |
256 str.appendChar('\''); |
257 return ReadApostrophe(&str,t); |
257 return ReadApostrophe(&str,t); |
258 case '@': |
258 // case '@': |
259 str.appendChar('@'); |
259 // str.appendChar('@'); |
260 return ReadAt(&str,t); |
260 // return ReadAt(&str,t); |
261 case '&': |
261 case '&': |
262 str.appendChar('&'); |
262 str.appendChar('&'); |
263 return ReadCompany(&str,t); |
263 return ReadCompany(&str,t); |
264 /* default: fall through to end of this function. */ |
264 /* default: fall through to end of this function. */ |
265 } |
265 } |
302 } |
302 } |
303 while (!EOS && str.len < LUCENE_MAX_WORD_LEN-1 ) { |
303 while (!EOS && str.len < LUCENE_MAX_WORD_LEN-1 ) { |
304 ch = readChar(); |
304 ch = readChar(); |
305 const bool dot = ch == '.'; |
305 const bool dot = ch == '.'; |
306 const bool dash = ch == '-'; |
306 const bool dash = ch == '-'; |
|
307 //const bool at = ch == '@'; |
307 |
308 |
308 if (!(ALNUM || UNDERSCORE || dot || dash)) { |
309 if (!(ALNUM || UNDERSCORE || dot || dash)) { |
309 break; |
310 break; |
310 } |
311 } |
311 /* Multiple dots or dashes in succession end the token. |
312 /* Multiple dots or dashes in succession end the token. |
320 if (!prevWasDot) { |
321 if (!prevWasDot) { |
321 SHAVE_RIGHTMOST(str); |
322 SHAVE_RIGHTMOST(str); |
322 } |
323 } |
323 break; |
324 break; |
324 } |
325 } |
|
326 |
325 |
327 |
326 str.appendChar(ch); |
328 str.appendChar(ch); |
|
329 |
|
330 |
327 |
331 |
328 prevWasDot = dot; |
332 prevWasDot = dot; |
329 prevWasDash = dash; |
333 prevWasDash = dash; |
330 } |
334 } |
331 } |
335 } |
377 } |
381 } |
378 } |
382 } |
379 } |
383 } |
380 } /* End block-guard of strBuf */ |
384 } /* End block-guard of strBuf */ |
381 |
385 |
382 if (!EOS) { |
386 |
383 if (ch == '@' && str.len < LUCENE_MAX_WORD_LEN-1) { |
387 |
384 str.appendChar('@'); |
388 // if (!EOS) { |
385 return ReadAt(&str,t); |
389 // if (ch == '@' && str.len < LUCENE_MAX_WORD_LEN-1) { |
386 } else { |
390 // str.appendChar('@'); |
387 unReadChar(); |
391 // return ReadAt(&str,t); |
388 } |
392 // } else { |
389 } |
393 // unReadChar(); |
390 |
394 // } |
391 return setToken(t,&str,CL_NS2(analysis,standard)::UNKNOWN |
395 |
392 ? forcedType : CL_NS2(analysis,standard)::HOST); |
396 if (!EOS) { |
|
397 unReadChar(); |
|
398 } |
|
399 |
|
400 |
|
401 |
|
402 return setToken(t,&str,CL_NS2(analysis,standard)::ALPHANUM); |
|
403 // return setToken(t,&str,CL_NS2(analysis,standard)::UNKNOWN |
|
404 // ? forcedType : CL_NS2(analysis,standard)::HOST); |
393 } |
405 } |
394 |
406 |
395 bool StandardTokenizer::ReadApostrophe(StringBuffer* _str, Token* t) { |
407 bool StandardTokenizer::ReadApostrophe(StringBuffer* _str, Token* t) { |
396 StringBuffer& str=*_str; |
408 StringBuffer& str=*_str; |
397 |
409 |