searchengine/oss/loc/analysis/inc/public/tinyunicode.h
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Tue, 06 Jul 2010 15:30:04 +0300
changeset 10 afe194b6b1cd
permissions -rw-r--r--
Revision: 201025 Kit: 2010127
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
10
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     1
/*
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     2
* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     3
* All rights reserved.
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     4
* This component and the accompanying materials are made available
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     5
* under the terms of "Eclipse Public License v1.0"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     6
* which accompanies this distribution, and is available
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     8
*
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     9
* Initial Contributors:
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    10
* Nokia Corporation - initial contribution.
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    11
*
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    12
* Contributors:
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    13
*
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    14
* Description: 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    15
*
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    16
*/
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    17
#ifndef UNICODEINFO_H_
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    18
#define UNICODEINFO_H_
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    19
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    20
#include "tinyiterator.h"
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    21
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    22
namespace analysis {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    23
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    24
	/**
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    25
	 * The package contains various unicode related functionality as
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    26
	 * needed by the remaining analysis package
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    27
	 */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    28
	namespace unicode {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    29
	
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    30
		/** Returns true, if character c is either Hangul Jamo or Syllable */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    31
        int IsHangul(int c);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    32
        
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    33
        /** Returns true, if character c is Hangul Syllable */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    34
        int IsHangulSyllable(int c);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    35
        
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    36
        /** Returns true, if character c is Hangul Jamo */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    37
        int IsHangulJamo(int c);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    38
        
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    39
        /** 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    40
         * Returns true, if character is of either Chinese, 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    41
         * Japanese or Korean writing systems
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    42
         */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    43
        int IsCjk(int c);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    44
        
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    45
        /**
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    46
         * Returns true, if characters is on the Thai unicode block
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    47
         */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    48
        int IsThai(int c);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    49
        
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    50
        namespace hangul {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    51
        
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    52
			/* First Hangul Syllable code */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    53
            static const int SyllableBase = 0xAC00;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    54
            
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    55
            // Jamu Alphabets
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    56
            /** First leading Jamu consonant */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    57
            static const int LeadingBase = 0x1100;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    58
            
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    59
            /** First vowel  */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    60
            static const int VowelBase = 0x1161;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    61
            
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    62
            /** First trailing Jamu consonant */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    63
            static const int TrailingBase = 0x11A7;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    64
            
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    65
            /** Leading consonants count */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    66
            static const int LeadingCount = 19;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    67
            
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    68
            /** Vowel count */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    69
            static const int VowelCount = 21;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    70
            
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    71
            /** Trailing consonant count */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    72
            static const int TrailingCount = 28;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    73
            
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    74
            /** 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    75
             * Amount of syllables that are composed of a leading 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    76
             * consonant and a vowel 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    77
             */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    78
            static const int LvSyllableCount = VowelCount * TrailingCount; // 588
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    79
            
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    80
            /**
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    81
             * Amount of syllables that are composed of a leading
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    82
             * consonant, a vowel and a trailing consonant 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    83
             */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    84
            static const int SyllableCount = LvSyllableCount * TrailingCount; // 11172 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    85
        }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    86
        
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    87
        /**
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    88
         * Decomposes hangul syllable into jamu alphabets
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    89
         */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    90
        template<typename Output>
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    91
        void DecomposeHangul(Output out, int c) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    92
            using namespace hangul;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    93
            int sindex = c - SyllableBase;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    94
            if (sindex < 0 || sindex >= SyllableCount) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    95
                out<<c<<'\0';
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    96
            } else {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    97
                // Leading
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    98
                out<<(LeadingBase + sindex / LvSyllableCount);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    99
                // Vocal
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   100
                out<<(VowelBase + (sindex % LvSyllableCount) / TrailingCount);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   101
                // Trailing (voluntary)
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   102
                int toffset = sindex % TrailingCount;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   103
                if (toffset) out<<(TrailingBase + toffset);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   104
                // Finish
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   105
                out<<'\0';
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   106
            }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   107
        }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   108
        
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   109
        /**
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   110
         * Composes encountered jamu alphabets into hangul syllable.
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   111
         * Moves given iterator over the consumed unicode character.
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   112
         */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   113
        template<typename Iterator> 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   114
        int ConsumeComposedJamu(Iterator& i) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   115
            using namespace hangul;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   116
            int c = *i; ++i;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   117
            int lindex = c - LeadingBase;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   118
            if (0 <= lindex && lindex < LeadingCount) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   119
                int vindex = *i - VowelBase;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   120
                if (0 <= vindex && vindex < VowelCount) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   121
                    ++i;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   122
                    int tindex = *i - TrailingBase;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   123
                    c = (SyllableBase + (lindex * VowelCount + vindex) * TrailingCount);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   124
                    if (0 <= tindex && tindex < TrailingCount) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   125
                        ++i;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   126
                        c += tindex;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   127
                    } 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   128
                }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   129
            }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   130
            return c;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   131
        }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   132
        
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   133
        /**
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   134
         * Composes encountered jamu alphabets into hangul syllable.
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   135
         */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   136
        template<typename Iterator> 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   137
        inline int ComposeJamu(Iterator i) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   138
        	return ConsumeComposedJamu(i); 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   139
        }        
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   140
	}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   141
	
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   142
	namespace tiny { // tiny analysis
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   143
	
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   144
        using namespace analysis::unicode::hangul;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   145
        using namespace analysis::unicode;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   146
	
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   147
        /**
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   148
         * Composes encountered Hangul Jamu characters into 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   149
         * Hangul syllables.  
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   150
         */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   151
        template <typename Iterator> 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   152
        struct HangulIterator {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   153
            public:
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   154
                HangulIterator() : i_(), c_(), offset_(0) {}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   155
                HangulIterator(Iterator i) : i_(i) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   156
                    ++(*this); // populate c_
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   157
                }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   158
                int operator*() {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   159
                    return c_;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   160
                }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   161
                operator int() {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   162
                    return offset_;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   163
                }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   164
                HangulIterator& operator++() {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   165
                    offset_ = i_;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   166
                    c_ = ConsumeComposedJamu(i_);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   167
                    return *this;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   168
                }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   169
            private:
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   170
                Iterator i_;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   171
                int c_;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   172
                int offset_;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   173
        };
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   174
        
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   175
        /**
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   176
         * Decomposes encountered Hangul syllables into 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   177
         * Hangul Jamu characters  
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   178
         */
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   179
        template <typename Iterator> 
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   180
        struct JamuIterator {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   181
            public:
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   182
                JamuIterator() : i_(), b_(0), offset_(0) { buf_[0];}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   183
                JamuIterator(Iterator i) : i_(i), b_(0) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   184
                    buf_[1] = '\0';
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   185
                    ++(*this); // populate buffer
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   186
                }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   187
                int operator*() {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   188
                    return buf_[b_];
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   189
                }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   190
                JamuIterator& operator++() {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   191
                    offset_ = i_;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   192
                    if (!buf_[++b_]) {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   193
                        b_ = 0; // reset buf
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   194
                        tiny::IteratorOutput<int*> out(buf_);
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   195
                        DecomposeHangul(out, *i_); ++i_;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   196
                    }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   197
                    return *this;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   198
                }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   199
                operator int() {
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   200
                    return offset_;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   201
                }
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   202
            private:
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   203
                Iterator i_;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   204
                int buf_[4];
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   205
                int b_;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   206
                int offset_;
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   207
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   208
        };
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   209
	}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   210
}
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   211
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   212
afe194b6b1cd Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   213
#endif /* UNICODEINFO_H_ */