searchengine/oss/loc/analysis/src/ngram.cpp
author hgs
Fri, 15 Oct 2010 12:09:28 +0530
changeset 24 65456528cac2
permissions -rw-r--r--
201041

/*
* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
* All rights reserved.
* This component and the accompanying materials are made available
* under the terms of "Eclipse Public License v1.0"
* which accompanies this distribution, and is available
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
*
* Initial Contributors:
* Nokia Corporation - initial contribution.
*
* Contributors:
*
* Description: 
*
*/


#include "ngram.h"
#include "tinyunicode.h"
#include "tinyanalysis.inl"


namespace analysis {

	using namespace unicode; 

	int IsNonCjk(int c) {
		return iswalnum(c) && !IsCjk(c); 
	}

	CjkNGramTokenizer::CjkNGramTokenizer( 
			lucene::util::Reader* reader, 
			int gramSize ) 
		: lucene::analysis::Tokenizer(reader),
          t_( gramSize ),
		  in_( *reader ),
	      i_( buffer_iterator( in_ ) ){
	}
		
	bool CjkNGramTokenizer::next( lucene::analysis::Token* token ) {
		using namespace tiny;

		Token<iterator> t = t_.consume(i_);
        if ( t ) {
            t.copyTo( token );
            return true; 
        } 
		return false; 
	}
	
	JamuNGramTokenizer::JamuNGramTokenizer( lucene::util::Reader* reader, 
                                            int gramSize ) 
    : lucene::analysis::Tokenizer( reader ),
       t_( gramSize ),
       in_( *reader ), 
       i_( utf16_iterator( buffer_iterator( in_ ) ) ) {}
	
    bool JamuNGramTokenizer::next( lucene::analysis::Token* token ) {
        using namespace tiny;
        
        Token<iterator> t = t_.consume(i_);
        if ( t ) {
            t.copyTo( token );
            return true; 
        } 
        return false; 
    }
}