equal
deleted
inserted
replaced
|
1 /* |
|
2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 |
|
19 #include "ngram.h" |
|
20 #include "tinyunicode.h" |
|
21 #include "tinyanalysis.inl" |
|
22 |
|
23 |
|
24 namespace analysis { |
|
25 |
|
26 using namespace unicode; |
|
27 |
|
28 int IsNonCjk(int c) { |
|
29 return iswalnum(c) && !IsCjk(c); |
|
30 } |
|
31 |
|
32 CjkNGramTokenizer::CjkNGramTokenizer( |
|
33 lucene::util::Reader* reader, |
|
34 int gramSize ) |
|
35 : lucene::analysis::Tokenizer(reader), |
|
36 t_( gramSize ), |
|
37 in_( *reader ), |
|
38 i_( buffer_iterator( in_ ) ){ |
|
39 } |
|
40 |
|
41 bool CjkNGramTokenizer::next( lucene::analysis::Token* token ) { |
|
42 using namespace tiny; |
|
43 |
|
44 Token<iterator> t = t_.consume(i_); |
|
45 if ( t ) { |
|
46 t.copyTo( token ); |
|
47 return true; |
|
48 } |
|
49 return false; |
|
50 } |
|
51 |
|
52 JamuNGramTokenizer::JamuNGramTokenizer( lucene::util::Reader* reader, |
|
53 int gramSize ) |
|
54 : lucene::analysis::Tokenizer( reader ), |
|
55 t_( gramSize ), |
|
56 in_( *reader ), |
|
57 i_( utf16_iterator( buffer_iterator( in_ ) ) ) {} |
|
58 |
|
59 bool JamuNGramTokenizer::next( lucene::analysis::Token* token ) { |
|
60 using namespace tiny; |
|
61 |
|
62 Token<iterator> t = t_.consume(i_); |
|
63 if ( t ) { |
|
64 t.copyTo( token ); |
|
65 return true; |
|
66 } |
|
67 return false; |
|
68 } |
|
69 } |