author | Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com> |
Tue, 06 Jul 2010 15:30:04 +0300 | |
changeset 10 | afe194b6b1cd |
permissions | -rw-r--r-- |
10
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
1 |
/* |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
2 |
* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
3 |
* All rights reserved. |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
4 |
* This component and the accompanying materials are made available |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
5 |
* under the terms of "Eclipse Public License v1.0" |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
6 |
* which accompanies this distribution, and is available |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
7 |
* at the URL "http://www.eclipse.org/legal/epl-v10.html". |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
8 |
* |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
9 |
* Initial Contributors: |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
10 |
* Nokia Corporation - initial contribution. |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
11 |
* |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
12 |
* Contributors: |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
13 |
* |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
14 |
* Description: |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
15 |
* |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
16 |
*/ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
17 |
#ifndef UNICODEINFO_H_ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
18 |
#define UNICODEINFO_H_ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
19 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
20 |
#include "tinyiterator.h" |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
21 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
22 |
namespace analysis { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
23 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
24 |
/** |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
25 |
* The package contains various unicode related functionality as |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
26 |
* needed by the remaining analysis package |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
27 |
*/ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
28 |
namespace unicode { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
29 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
30 |
/** Returns true, if character c is either Hangul Jamo or Syllable */ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
31 |
int IsHangul(int c); |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
32 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
33 |
/** Returns true, if character c is Hangul Syllable */ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
34 |
int IsHangulSyllable(int c); |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
35 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
36 |
/** Returns true, if character c is Hangul Jamo */ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
37 |
int IsHangulJamo(int c); |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
38 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
39 |
/** |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
40 |
* Returns true, if character is of either Chinese, |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
41 |
* Japanese or Korean writing systems |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
42 |
*/ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
43 |
int IsCjk(int c); |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
44 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
45 |
/** |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
46 |
* Returns true, if characters is on the Thai unicode block |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
47 |
*/ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
48 |
int IsThai(int c); |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
49 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
50 |
namespace hangul { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
51 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
52 |
/* First Hangul Syllable code */ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
53 |
static const int SyllableBase = 0xAC00; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
54 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
55 |
// Jamu Alphabets |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
56 |
/** First leading Jamu consonant */ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
57 |
static const int LeadingBase = 0x1100; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
58 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
59 |
/** First vowel */ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
60 |
static const int VowelBase = 0x1161; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
61 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
62 |
/** First trailing Jamu consonant */ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
63 |
static const int TrailingBase = 0x11A7; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
64 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
65 |
/** Leading consonants count */ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
66 |
static const int LeadingCount = 19; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
67 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
68 |
/** Vowel count */ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
69 |
static const int VowelCount = 21; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
70 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
71 |
/** Trailing consonant count */ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
72 |
static const int TrailingCount = 28; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
73 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
74 |
/** |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
75 |
* Amount of syllables that are composed of a leading |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
76 |
* consonant and a vowel |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
77 |
*/ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
78 |
static const int LvSyllableCount = VowelCount * TrailingCount; // 588 |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
79 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
80 |
/** |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
81 |
* Amount of syllables that are composed of a leading |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
82 |
* consonant, a vowel and a trailing consonant |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
83 |
*/ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
84 |
static const int SyllableCount = LvSyllableCount * TrailingCount; // 11172 |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
85 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
86 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
87 |
/** |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
88 |
* Decomposes hangul syllable into jamu alphabets |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
89 |
*/ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
90 |
template<typename Output> |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
91 |
void DecomposeHangul(Output out, int c) { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
92 |
using namespace hangul; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
93 |
int sindex = c - SyllableBase; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
94 |
if (sindex < 0 || sindex >= SyllableCount) { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
95 |
out<<c<<'\0'; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
96 |
} else { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
97 |
// Leading |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
98 |
out<<(LeadingBase + sindex / LvSyllableCount); |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
99 |
// Vocal |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
100 |
out<<(VowelBase + (sindex % LvSyllableCount) / TrailingCount); |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
101 |
// Trailing (voluntary) |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
102 |
int toffset = sindex % TrailingCount; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
103 |
if (toffset) out<<(TrailingBase + toffset); |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
104 |
// Finish |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
105 |
out<<'\0'; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
106 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
107 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
108 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
109 |
/** |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
110 |
* Composes encountered jamu alphabets into hangul syllable. |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
111 |
* Moves given iterator over the consumed unicode character. |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
112 |
*/ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
113 |
template<typename Iterator> |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
114 |
int ConsumeComposedJamu(Iterator& i) { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
115 |
using namespace hangul; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
116 |
int c = *i; ++i; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
117 |
int lindex = c - LeadingBase; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
118 |
if (0 <= lindex && lindex < LeadingCount) { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
119 |
int vindex = *i - VowelBase; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
120 |
if (0 <= vindex && vindex < VowelCount) { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
121 |
++i; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
122 |
int tindex = *i - TrailingBase; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
123 |
c = (SyllableBase + (lindex * VowelCount + vindex) * TrailingCount); |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
124 |
if (0 <= tindex && tindex < TrailingCount) { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
125 |
++i; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
126 |
c += tindex; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
127 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
128 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
129 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
130 |
return c; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
131 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
132 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
133 |
/** |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
134 |
* Composes encountered jamu alphabets into hangul syllable. |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
135 |
*/ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
136 |
template<typename Iterator> |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
137 |
inline int ComposeJamu(Iterator i) { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
138 |
return ConsumeComposedJamu(i); |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
139 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
140 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
141 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
142 |
namespace tiny { // tiny analysis |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
143 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
144 |
using namespace analysis::unicode::hangul; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
145 |
using namespace analysis::unicode; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
146 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
147 |
/** |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
148 |
* Composes encountered Hangul Jamu characters into |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
149 |
* Hangul syllables. |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
150 |
*/ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
151 |
template <typename Iterator> |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
152 |
struct HangulIterator { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
153 |
public: |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
154 |
HangulIterator() : i_(), c_(), offset_(0) {} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
155 |
HangulIterator(Iterator i) : i_(i) { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
156 |
++(*this); // populate c_ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
157 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
158 |
int operator*() { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
159 |
return c_; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
160 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
161 |
operator int() { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
162 |
return offset_; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
163 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
164 |
HangulIterator& operator++() { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
165 |
offset_ = i_; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
166 |
c_ = ConsumeComposedJamu(i_); |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
167 |
return *this; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
168 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
169 |
private: |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
170 |
Iterator i_; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
171 |
int c_; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
172 |
int offset_; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
173 |
}; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
174 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
175 |
/** |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
176 |
* Decomposes encountered Hangul syllables into |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
177 |
* Hangul Jamu characters |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
178 |
*/ |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
179 |
template <typename Iterator> |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
180 |
struct JamuIterator { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
181 |
public: |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
182 |
JamuIterator() : i_(), b_(0), offset_(0) { buf_[0];} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
183 |
JamuIterator(Iterator i) : i_(i), b_(0) { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
184 |
buf_[1] = '\0'; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
185 |
++(*this); // populate buffer |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
186 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
187 |
int operator*() { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
188 |
return buf_[b_]; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
189 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
190 |
JamuIterator& operator++() { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
191 |
offset_ = i_; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
192 |
if (!buf_[++b_]) { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
193 |
b_ = 0; // reset buf |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
194 |
tiny::IteratorOutput<int*> out(buf_); |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
195 |
DecomposeHangul(out, *i_); ++i_; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
196 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
197 |
return *this; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
198 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
199 |
operator int() { |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
200 |
return offset_; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
201 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
202 |
private: |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
203 |
Iterator i_; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
204 |
int buf_[4]; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
205 |
int b_; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
206 |
int offset_; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
207 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
208 |
}; |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
209 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
210 |
} |
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
211 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
212 |
|
afe194b6b1cd
Revision: 201025
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
213 |
#endif /* UNICODEINFO_H_ */ |