--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/oss/loc/analysis/src/koreananalyzer.cpp Fri Oct 15 12:09:28 2010 +0530
@@ -0,0 +1,81 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+
+#include "koreananalyzer.h"
+#include "tinyanalysis.inl"
+
+namespace analysis {
+
+ KoreanTokenizer::KoreanTokenizer(lucene::util::Reader* reader) :
+ begin_(0),
+ end_(0),
+ state_(0),
+ t_(1),
+ in_(*reader),
+ i_(iterator(utf16_iterator(in_.begin()))) {}
+
+ bool KoreanTokenizer::next(lucene::analysis::Token* token) {
+ using namespace unicode;
+ using namespace tiny;
+
+ if ( state_ ) {
+ jamu_[state_--] = '\0';
+ const wchar_t buf[] = { ComposeJamu(jamu_), '\0' };
+ token->set( buf, begin_, end_);
+ token->setPositionIncrement(0);
+ return true;
+ } else {
+ while ( *i_ ) {
+ if ( IsHangulSyllable( *i_ ) ) {
+ DecomposeHangul( IteratorOutput<wchar_t*>(jamu_), *i_ );
+ state_ = wcslen(jamu_)-1;
+ wchar_t buf[] = {*i_, '\0'};
+ begin_ = i_;
+ end_ = ++i_;
+ token->set( buf, begin_, end_ );
+ return true;
+ } else {
+ Token<iterator> t = t_.consume( i_ );
+ if ( t ) {
+ t.copyTo(token);
+ return true;
+ }
+ }
+ ++i_;
+ }
+ return false;
+ }
+ }
+
+
+ KoreanQueryTokenizer::KoreanQueryTokenizer( lucene::util::Reader* reader )
+ : lucene::analysis::Tokenizer( reader ),
+ t_( 1 ),
+ in_( *reader ),
+ i_( utf16_iterator( buffer_iterator( in_ ) ) ) {}
+
+ bool KoreanQueryTokenizer::next( lucene::analysis::Token* token ) {
+ using namespace tiny;
+
+ Token<iterator> t = t_.consume(i_);
+ if ( t ) {
+ t.copyTo( token );
+ return true;
+ }
+ return false;
+ }
+}