searchengine/oss/loc/analysis/src/koreananalyzer.cpp
changeset 24 65456528cac2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/oss/loc/analysis/src/koreananalyzer.cpp	Fri Oct 15 12:09:28 2010 +0530
@@ -0,0 +1,81 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description: 
+*
+*/
+
+#include "koreananalyzer.h"
+#include "tinyanalysis.inl"
+
+namespace analysis {
+
+    KoreanTokenizer::KoreanTokenizer(lucene::util::Reader* reader) :
+        begin_(0),
+        end_(0),
+        state_(0),
+        t_(1), 
+        in_(*reader),
+        i_(iterator(utf16_iterator(in_.begin()))) {}
+
+    bool KoreanTokenizer::next(lucene::analysis::Token* token) {
+        using namespace unicode;
+        using namespace tiny;
+         
+        if ( state_ ) {
+            jamu_[state_--] = '\0';
+            const wchar_t buf[] = { ComposeJamu(jamu_), '\0' }; 
+            token->set( buf, begin_, end_);
+            token->setPositionIncrement(0);
+            return true;
+        } else {
+            while ( *i_ ) {
+                if ( IsHangulSyllable( *i_ ) ) {
+                    DecomposeHangul( IteratorOutput<wchar_t*>(jamu_), *i_ );
+                    state_ = wcslen(jamu_)-1;
+					wchar_t buf[] = {*i_, '\0'};
+                    begin_ = i_;
+                    end_ = ++i_;
+                    token->set( buf, begin_, end_ );
+                    return true;
+                } else {
+                    Token<iterator> t = t_.consume( i_ );
+                    if ( t ) {
+                        t.copyTo(token);
+                        return true;
+                    }
+                }
+                ++i_;
+            }
+            return false;
+        }
+    }
+    
+	
+	KoreanQueryTokenizer::KoreanQueryTokenizer( lucene::util::Reader* reader ) 
+    : lucene::analysis::Tokenizer( reader ),
+       t_( 1 ),
+       in_( *reader ), 
+       i_( utf16_iterator( buffer_iterator( in_ ) ) ) {}
+	
+    bool KoreanQueryTokenizer::next( lucene::analysis::Token* token ) {
+        using namespace tiny;
+        
+        Token<iterator> t = t_.consume(i_);
+        if ( t ) {
+            t.copyTo( token );
+            return true; 
+        } 
+        return false; 
+    }   
+}