searchengine/oss/loc/analysis/src/ngram.cpp
author hgs
Fri, 15 Oct 2010 12:09:28 +0530
changeset 24 65456528cac2
permissions -rw-r--r--
201041
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
24
hgs
parents:
diff changeset
     1
/*
hgs
parents:
diff changeset
     2
* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
hgs
parents:
diff changeset
     3
* All rights reserved.
hgs
parents:
diff changeset
     4
* This component and the accompanying materials are made available
hgs
parents:
diff changeset
     5
* under the terms of "Eclipse Public License v1.0"
hgs
parents:
diff changeset
     6
* which accompanies this distribution, and is available
hgs
parents:
diff changeset
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
hgs
parents:
diff changeset
     8
*
hgs
parents:
diff changeset
     9
* Initial Contributors:
hgs
parents:
diff changeset
    10
* Nokia Corporation - initial contribution.
hgs
parents:
diff changeset
    11
*
hgs
parents:
diff changeset
    12
* Contributors:
hgs
parents:
diff changeset
    13
*
hgs
parents:
diff changeset
    14
* Description: 
hgs
parents:
diff changeset
    15
*
hgs
parents:
diff changeset
    16
*/
hgs
parents:
diff changeset
    17
hgs
parents:
diff changeset
    18
hgs
parents:
diff changeset
    19
#include "ngram.h"
hgs
parents:
diff changeset
    20
#include "tinyunicode.h"
hgs
parents:
diff changeset
    21
#include "tinyanalysis.inl"
hgs
parents:
diff changeset
    22
hgs
parents:
diff changeset
    23
hgs
parents:
diff changeset
    24
namespace analysis {
hgs
parents:
diff changeset
    25
hgs
parents:
diff changeset
    26
	using namespace unicode; 
hgs
parents:
diff changeset
    27
hgs
parents:
diff changeset
    28
	int IsNonCjk(int c) {
hgs
parents:
diff changeset
    29
		return iswalnum(c) && !IsCjk(c); 
hgs
parents:
diff changeset
    30
	}
hgs
parents:
diff changeset
    31
hgs
parents:
diff changeset
    32
	CjkNGramTokenizer::CjkNGramTokenizer( 
hgs
parents:
diff changeset
    33
			lucene::util::Reader* reader, 
hgs
parents:
diff changeset
    34
			int gramSize ) 
hgs
parents:
diff changeset
    35
		: lucene::analysis::Tokenizer(reader),
hgs
parents:
diff changeset
    36
          t_( gramSize ),
hgs
parents:
diff changeset
    37
		  in_( *reader ),
hgs
parents:
diff changeset
    38
	      i_( buffer_iterator( in_ ) ){
hgs
parents:
diff changeset
    39
	}
hgs
parents:
diff changeset
    40
		
hgs
parents:
diff changeset
    41
	bool CjkNGramTokenizer::next( lucene::analysis::Token* token ) {
hgs
parents:
diff changeset
    42
		using namespace tiny;
hgs
parents:
diff changeset
    43
hgs
parents:
diff changeset
    44
		Token<iterator> t = t_.consume(i_);
hgs
parents:
diff changeset
    45
        if ( t ) {
hgs
parents:
diff changeset
    46
            t.copyTo( token );
hgs
parents:
diff changeset
    47
            return true; 
hgs
parents:
diff changeset
    48
        } 
hgs
parents:
diff changeset
    49
		return false; 
hgs
parents:
diff changeset
    50
	}
hgs
parents:
diff changeset
    51
	
hgs
parents:
diff changeset
    52
	JamuNGramTokenizer::JamuNGramTokenizer( lucene::util::Reader* reader, 
hgs
parents:
diff changeset
    53
                                            int gramSize ) 
hgs
parents:
diff changeset
    54
    : lucene::analysis::Tokenizer( reader ),
hgs
parents:
diff changeset
    55
       t_( gramSize ),
hgs
parents:
diff changeset
    56
       in_( *reader ), 
hgs
parents:
diff changeset
    57
       i_( utf16_iterator( buffer_iterator( in_ ) ) ) {}
hgs
parents:
diff changeset
    58
	
hgs
parents:
diff changeset
    59
    bool JamuNGramTokenizer::next( lucene::analysis::Token* token ) {
hgs
parents:
diff changeset
    60
        using namespace tiny;
hgs
parents:
diff changeset
    61
        
hgs
parents:
diff changeset
    62
        Token<iterator> t = t_.consume(i_);
hgs
parents:
diff changeset
    63
        if ( t ) {
hgs
parents:
diff changeset
    64
            t.copyTo( token );
hgs
parents:
diff changeset
    65
            return true; 
hgs
parents:
diff changeset
    66
        } 
hgs
parents:
diff changeset
    67
        return false; 
hgs
parents:
diff changeset
    68
    }
hgs
parents:
diff changeset
    69
}