diff -r d4d56f5e7c55 -r 65456528cac2 searchengine/oss/loc/analysis/src/ngram.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/searchengine/oss/loc/analysis/src/ngram.cpp Fri Oct 15 12:09:28 2010 +0530 @@ -0,0 +1,69 @@ +/* +* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). +* All rights reserved. +* This component and the accompanying materials are made available +* under the terms of "Eclipse Public License v1.0" +* which accompanies this distribution, and is available +* at the URL "http://www.eclipse.org/legal/epl-v10.html". +* +* Initial Contributors: +* Nokia Corporation - initial contribution. +* +* Contributors: +* +* Description: +* +*/ + + +#include "ngram.h" +#include "tinyunicode.h" +#include "tinyanalysis.inl" + + +namespace analysis { + + using namespace unicode; + + int IsNonCjk(int c) { + return iswalnum(c) && !IsCjk(c); + } + + CjkNGramTokenizer::CjkNGramTokenizer( + lucene::util::Reader* reader, + int gramSize ) + : lucene::analysis::Tokenizer(reader), + t_( gramSize ), + in_( *reader ), + i_( buffer_iterator( in_ ) ){ + } + + bool CjkNGramTokenizer::next( lucene::analysis::Token* token ) { + using namespace tiny; + + Token t = t_.consume(i_); + if ( t ) { + t.copyTo( token ); + return true; + } + return false; + } + + JamuNGramTokenizer::JamuNGramTokenizer( lucene::util::Reader* reader, + int gramSize ) + : lucene::analysis::Tokenizer( reader ), + t_( gramSize ), + in_( *reader ), + i_( utf16_iterator( buffer_iterator( in_ ) ) ) {} + + bool JamuNGramTokenizer::next( lucene::analysis::Token* token ) { + using namespace tiny; + + Token t = t_.consume(i_); + if ( t ) { + t.copyTo( token ); + return true; + } + return false; + } +}