|
1 /* |
|
2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 #include "koreananalyzer.h" |
|
19 #include "tinyanalysis.inl" |
|
20 |
|
21 namespace analysis { |
|
22 |
|
23 KoreanTokenizer::KoreanTokenizer(lucene::util::Reader* reader) : |
|
24 begin_(0), |
|
25 end_(0), |
|
26 state_(0), |
|
27 t_(1), |
|
28 in_(*reader), |
|
29 i_(iterator(utf16_iterator(in_.begin()))) {} |
|
30 |
|
31 bool KoreanTokenizer::next(lucene::analysis::Token* token) { |
|
32 using namespace unicode; |
|
33 using namespace tiny; |
|
34 |
|
35 if ( state_ ) { |
|
36 jamu_[state_--] = '\0'; |
|
37 const wchar_t buf[] = { ComposeJamu(jamu_), '\0' }; |
|
38 token->set( buf, begin_, end_); |
|
39 token->setPositionIncrement(0); |
|
40 return true; |
|
41 } else { |
|
42 while ( *i_ ) { |
|
43 if ( IsHangulSyllable( *i_ ) ) { |
|
44 DecomposeHangul( IteratorOutput<wchar_t*>(jamu_), *i_ ); |
|
45 state_ = wcslen(jamu_)-1; |
|
46 wchar_t buf[] = {*i_, '\0'}; |
|
47 begin_ = i_; |
|
48 end_ = ++i_; |
|
49 token->set( buf, begin_, end_ ); |
|
50 return true; |
|
51 } else { |
|
52 Token<iterator> t = t_.consume( i_ ); |
|
53 if ( t ) { |
|
54 t.copyTo(token); |
|
55 return true; |
|
56 } |
|
57 } |
|
58 ++i_; |
|
59 } |
|
60 return false; |
|
61 } |
|
62 } |
|
63 |
|
64 |
|
65 KoreanQueryTokenizer::KoreanQueryTokenizer( lucene::util::Reader* reader ) |
|
66 : lucene::analysis::Tokenizer( reader ), |
|
67 t_( 1 ), |
|
68 in_( *reader ), |
|
69 i_( utf16_iterator( buffer_iterator( in_ ) ) ) {} |
|
70 |
|
71 bool KoreanQueryTokenizer::next( lucene::analysis::Token* token ) { |
|
72 using namespace tiny; |
|
73 |
|
74 Token<iterator> t = t_.consume(i_); |
|
75 if ( t ) { |
|
76 t.copyTo( token ); |
|
77 return true; |
|
78 } |
|
79 return false; |
|
80 } |
|
81 } |