|
1 /* |
|
2 * |
|
3 * (C) Copyright IBM Corp. 1998-2005 - All Rights Reserved |
|
4 * |
|
5 */ |
|
6 |
|
7 #ifndef __INDICREORDERING_H |
|
8 #define __INDICREORDERING_H |
|
9 |
|
10 /** |
|
11 * \file |
|
12 * \internal |
|
13 */ |
|
14 |
|
15 #include "LETypes.h" |
|
16 #include "OpenTypeTables.h" |
|
17 |
|
18 U_NAMESPACE_BEGIN |
|
19 |
|
20 // Characters that get refered to by name... |
|
21 #define C_SIGN_ZWNJ 0x200C |
|
22 #define C_SIGN_ZWJ 0x200D |
|
23 |
|
24 // Character class values |
|
25 #define CC_RESERVED 0U |
|
26 #define CC_VOWEL_MODIFIER 1U |
|
27 #define CC_STRESS_MARK 2U |
|
28 #define CC_INDEPENDENT_VOWEL 3U |
|
29 #define CC_INDEPENDENT_VOWEL_2 4U |
|
30 #define CC_CONSONANT 5U |
|
31 #define CC_CONSONANT_WITH_NUKTA 6U |
|
32 #define CC_NUKTA 7U |
|
33 #define CC_DEPENDENT_VOWEL 8U |
|
34 #define CC_SPLIT_VOWEL_PIECE_1 9U |
|
35 #define CC_SPLIT_VOWEL_PIECE_2 10U |
|
36 #define CC_SPLIT_VOWEL_PIECE_3 11U |
|
37 #define CC_VIRAMA 12U |
|
38 #define CC_ZERO_WIDTH_MARK 13U |
|
39 // Added by Nokia -- special case |
|
40 #define CC_INDEPENDENT_VOWEL_A 14U |
|
41 // Added by Nokia -- special case with independent vowel A |
|
42 #define CC_DEPENDENT_VOWEL_CANDRA_E 15U |
|
43 // Added by Nokia -- special case for Kannada Ra |
|
44 #define CC_CONSONANT_KANNADA_BENGALI_RA 16U |
|
45 // Added by Nokia -- special case for Tamil independent vowel O |
|
46 #define CC_INDEPENDENT_VOWEL_TAMIL_O 17U |
|
47 |
|
48 // Added by Nokia -- special case for Gurmukhi Bearer ARA and independent A |
|
49 #define CC_GUR_BEARER_A 18U |
|
50 // Added by Nokia -- special case for Gurmukhi Bearer IRI |
|
51 #define CC_GUR_BEARER_I 19U |
|
52 // Added by Nokia -- special case for Gurmukhi Bearer URA |
|
53 #define CC_GUR_BEARER_U 20U |
|
54 |
|
55 // Added by Nokia -- special case for Gurmukhi dependent Vowel which can combine with Bearer ARA |
|
56 #define CC_GUR_DEPENDENT_VOWEL_A 21U |
|
57 // Added by Nokia -- special case for Gurmukhi dependent Vowel which can combine with Bearer IRI |
|
58 #define CC_GUR_DEPENDENT_VOWEL_I 22U |
|
59 // Added by Nokia -- special case for Gurmukhi dependent Vowel which can combine with Bearer URA |
|
60 #define CC_GUR_DEPENDENT_VOWEL_U 23U |
|
61 |
|
62 #define CC_COUNT 24U |
|
63 |
|
64 // Character class flags |
|
65 #define CF_CLASS_MASK 0x0000FFFFU |
|
66 |
|
67 #define CF_CONSONANT 0x80000000U |
|
68 |
|
69 #define CF_REPH 0x40000000U |
|
70 #define CF_VATTU 0x20000000U |
|
71 #define CF_BELOW_BASE 0x10000000U |
|
72 #define CF_POST_BASE 0x08000000U |
|
73 #define CF_LENGTH_MARK 0x04000000U |
|
74 // Added by Nokia -- special case for Bengali Ya |
|
75 #define CF_CONSONANT_BENGALI_YA 0x00800000U |
|
76 |
|
77 // Added by Nokia -- 1922 Malayalam Chillu --> |
|
78 #define CF_CONSONANT_MLYL_CHILLU 0x00400000U |
|
79 #define CC_DEPENDENT_VOWEL_SIGN_MLYL_E 0xD46U |
|
80 #define CC_CONSONANT_MLYL_YA 0xD2FU |
|
81 #define CC_CONSONANT_MLYL_VA 0xD35U |
|
82 #define CC_CONSONANT_MLYL_RA 0xD30U |
|
83 #define CC_CONSONANT_MLYL_RRA 0xD31U |
|
84 #define CC_CONSONANT_MLYL_LLA 0xD33U |
|
85 #define CC_CONSONANT_MLYL_LLLA 0xD34U |
|
86 // <-- 1922 mlyl |
|
87 |
|
88 #define CF_POS_BEFORE 0x00300000U |
|
89 #define CF_POS_BELOW 0x00200000U |
|
90 #define CF_POS_ABOVE 0x00100000U |
|
91 #define CF_POS_AFTER 0x00000000U |
|
92 #define CF_POS_MASK 0x00300000U |
|
93 |
|
94 #define CF_INDEX_MASK 0x000F0000U |
|
95 #define CF_INDEX_SHIFT 16 |
|
96 |
|
97 // Script flag bits |
|
98 #define SF_MATRAS_AFTER_BASE 0x80000000U |
|
99 #define SF_REPH_AFTER_BELOW 0x40000000U |
|
100 #define SF_EYELASH_RA 0x20000000U |
|
101 #define SF_MPRE_FIXUP 0x10000000U |
|
102 //Added by Nokia for matra combined with final form of YA in Gurmukhi |
|
103 //Flags SF_MATRAS_AFTER_BASE and SF_MATRAS_AFTER_POSTBASE cannot coexist. |
|
104 //with SF_MATRAS_AFTER_BASE setting: Mbelow, Mabove and Mpost are attached to base consonant. |
|
105 //With SF_MATRAS_AFTER_AFTERBASE setting: Mbelow, Mabove and Mpost are attached to postbase consonant |
|
106 //without SF_MATRAS_AFTER_BASE or SF_MATRAS_AFTER_AFTERBASE setting: Mbelow, Mabove are attached to |
|
107 //base (or base+below-base consonant cluster), but Mpost attached to postbase |
|
108 #define SF_MATRAS_AFTER_POSTBASE 0x08000000U |
|
109 |
|
110 #define SF_POST_BASE_LIMIT_MASK 0x0000FFFFU |
|
111 #define SF_NO_POST_BASE_LIMIT 0x00007FFFU |
|
112 |
|
113 typedef LEUnicode SplitMatra[3]; |
|
114 |
|
115 class MPreFixups; |
|
116 class LEGlyphStorage; |
|
117 |
|
118 struct IndicClassTable |
|
119 { |
|
120 typedef le_uint32 CharClass; |
|
121 typedef le_uint32 ScriptFlags; |
|
122 |
|
123 LEUnicode firstChar; |
|
124 LEUnicode lastChar; |
|
125 le_int32 worstCaseExpansion; |
|
126 ScriptFlags scriptFlags; |
|
127 const CharClass *classTable; |
|
128 const SplitMatra *splitMatraTable; |
|
129 |
|
130 inline le_int32 getWorstCaseExpansion() const; |
|
131 |
|
132 CharClass getCharClass(LEUnicode ch) const; |
|
133 |
|
134 inline const SplitMatra *getSplitMatra(CharClass charClass) const; |
|
135 |
|
136 inline le_bool isVowelModifier(LEUnicode ch) const; |
|
137 inline le_bool isStressMark(LEUnicode ch) const; |
|
138 inline le_bool isConsonant(LEUnicode ch) const; |
|
139 inline le_bool isReph(LEUnicode ch) const; |
|
140 inline le_bool isVirama(LEUnicode ch) const; |
|
141 inline le_bool isNukta(LEUnicode ch) const; |
|
142 inline le_bool isVattu(LEUnicode ch) const; |
|
143 inline le_bool isMatra(LEUnicode ch) const; |
|
144 inline le_bool isSplitMatra(LEUnicode ch) const; |
|
145 inline le_bool isLengthMark(LEUnicode ch) const; |
|
146 inline le_bool hasPostOrBelowBaseForm(LEUnicode ch) const; |
|
147 inline le_bool hasPostBaseForm(LEUnicode ch) const; |
|
148 inline le_bool hasBelowBaseForm(LEUnicode ch) const; |
|
149 // Added by Nokia: For special case Bengali Ya |
|
150 inline le_bool isBengaliYa(LEUnicode ch) const; |
|
151 // Added by Nokia -- 1922 mlyl --> |
|
152 inline le_bool isMlylChillu(LEUnicode ch) const; |
|
153 // <-- 1922 mlyl |
|
154 |
|
155 inline static le_bool isVowelModifier(CharClass charClass); |
|
156 inline static le_bool isStressMark(CharClass charClass); |
|
157 inline static le_bool isConsonant(CharClass charClass); |
|
158 inline static le_bool isReph(CharClass charClass); |
|
159 inline static le_bool isVirama(CharClass charClass); |
|
160 inline static le_bool isNukta(CharClass charClass); |
|
161 inline static le_bool isVattu(CharClass charClass); |
|
162 inline static le_bool isMatra(CharClass charClass); |
|
163 inline static le_bool isSplitMatra(CharClass charClass); |
|
164 inline static le_bool isLengthMark(CharClass charClass); |
|
165 inline static le_bool hasPostOrBelowBaseForm(CharClass charClass); |
|
166 inline static le_bool hasPostBaseForm(CharClass charClass); |
|
167 inline static le_bool hasBelowBaseForm(CharClass charClass); |
|
168 // Added by Nokia: For special case Bengali Ya |
|
169 inline static le_bool isBengaliYa(CharClass charClass); |
|
170 // Added by Nokia -- 1922 mlyl --> |
|
171 inline static le_bool isMlylChillu(CharClass charClass); |
|
172 // <-- 1922 mlyl |
|
173 |
|
174 static const IndicClassTable *getScriptClassTable(le_int32 scriptCode); |
|
175 }; |
|
176 |
|
177 class IndicReordering /* not : public UObject because all methods are static */ { |
|
178 public: |
|
179 static le_int32 getWorstCaseExpansion(le_int32 scriptCode); |
|
180 |
|
181 static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode, |
|
182 LEUnicode *outChars, LEGlyphStorage &glyphStorage, |
|
183 MPreFixups **outMPreFixups, LEErrorCode& success); |
|
184 |
|
185 static void adjustMPres(MPreFixups *mpreFixups, LEGlyphStorage &glyphStorage, |
|
186 LEErrorCode& success); |
|
187 |
|
188 static const LETag *getFeatureOrder(); |
|
189 |
|
190 private: |
|
191 // do not instantiate |
|
192 IndicReordering(); |
|
193 |
|
194 static le_int32 findSyllable(const IndicClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount); |
|
195 |
|
196 }; |
|
197 |
|
198 inline le_int32 IndicClassTable::getWorstCaseExpansion() const |
|
199 { |
|
200 return worstCaseExpansion; |
|
201 } |
|
202 |
|
203 inline const SplitMatra *IndicClassTable::getSplitMatra(CharClass charClass) const |
|
204 { |
|
205 le_int32 index = (charClass & CF_INDEX_MASK) >> CF_INDEX_SHIFT; |
|
206 |
|
207 return &splitMatraTable[index - 1]; |
|
208 } |
|
209 |
|
210 inline le_bool IndicClassTable::isVowelModifier(CharClass charClass) |
|
211 { |
|
212 return (charClass & CF_CLASS_MASK) == CC_VOWEL_MODIFIER; |
|
213 } |
|
214 |
|
215 inline le_bool IndicClassTable::isStressMark(CharClass charClass) |
|
216 { |
|
217 return (charClass & CF_CLASS_MASK) == CC_STRESS_MARK; |
|
218 } |
|
219 |
|
220 inline le_bool IndicClassTable::isConsonant(CharClass charClass) |
|
221 { |
|
222 return (charClass & CF_CONSONANT) != 0; |
|
223 } |
|
224 |
|
225 inline le_bool IndicClassTable::isReph(CharClass charClass) |
|
226 { |
|
227 return (charClass & CF_REPH) != 0; |
|
228 } |
|
229 |
|
230 inline le_bool IndicClassTable::isNukta(CharClass charClass) |
|
231 { |
|
232 return (charClass & CF_CLASS_MASK) == CC_NUKTA; |
|
233 } |
|
234 |
|
235 inline le_bool IndicClassTable::isVirama(CharClass charClass) |
|
236 { |
|
237 return (charClass & CF_CLASS_MASK) == CC_VIRAMA; |
|
238 } |
|
239 |
|
240 inline le_bool IndicClassTable::isVattu(CharClass charClass) |
|
241 { |
|
242 return (charClass & CF_VATTU) != 0; |
|
243 } |
|
244 |
|
245 inline le_bool IndicClassTable::isMatra(CharClass charClass) |
|
246 { |
|
247 charClass &= CF_CLASS_MASK; |
|
248 |
|
249 // Added special CANDRA E char class check to enable formation of Devanagari CANDRA A |
|
250 // Added speical CC_GUR_DEPENDENT_VOWEL_* to support decompositions of the independent |
|
251 // vowels into a sequence of a vowel bearer and a depending vowel sign |
|
252 return charClass >= CC_DEPENDENT_VOWEL && charClass <= CC_SPLIT_VOWEL_PIECE_3 |
|
253 || charClass == CC_DEPENDENT_VOWEL_CANDRA_E |
|
254 || charClass == CC_GUR_DEPENDENT_VOWEL_A |
|
255 || charClass == CC_GUR_DEPENDENT_VOWEL_I |
|
256 || charClass == CC_GUR_DEPENDENT_VOWEL_U; |
|
257 } |
|
258 |
|
259 inline le_bool IndicClassTable::isSplitMatra(CharClass charClass) |
|
260 { |
|
261 return (charClass & CF_INDEX_MASK) != 0; |
|
262 } |
|
263 |
|
264 inline le_bool IndicClassTable::isLengthMark(CharClass charClass) |
|
265 { |
|
266 return (charClass & CF_LENGTH_MARK) != 0; |
|
267 } |
|
268 |
|
269 inline le_bool IndicClassTable::hasPostOrBelowBaseForm(CharClass charClass) |
|
270 { |
|
271 return (charClass & (CF_POST_BASE | CF_BELOW_BASE)) != 0; |
|
272 } |
|
273 |
|
274 inline le_bool IndicClassTable::hasPostBaseForm(CharClass charClass) |
|
275 { |
|
276 return (charClass & CF_POST_BASE) != 0; |
|
277 } |
|
278 |
|
279 inline le_bool IndicClassTable::hasBelowBaseForm(CharClass charClass) |
|
280 { |
|
281 return (charClass & CF_BELOW_BASE) != 0; |
|
282 } |
|
283 |
|
284 // Added by Nokia -- For determining whether a character is a Bengali Ya |
|
285 inline le_bool IndicClassTable::isBengaliYa(CharClass charClass) |
|
286 { |
|
287 return (charClass & CF_CONSONANT_BENGALI_YA) != 0; |
|
288 } |
|
289 |
|
290 // Added by Nokia -- 1922 mlyl --> |
|
291 inline le_bool IndicClassTable::isMlylChillu(CharClass charClass) |
|
292 { |
|
293 return (charClass & CF_CONSONANT_MLYL_CHILLU) != 0; |
|
294 } |
|
295 // <-- 1922 mlyl |
|
296 |
|
297 inline le_bool IndicClassTable::isVowelModifier(LEUnicode ch) const |
|
298 { |
|
299 return isVowelModifier(getCharClass(ch)); |
|
300 } |
|
301 |
|
302 inline le_bool IndicClassTable::isStressMark(LEUnicode ch) const |
|
303 { |
|
304 return isStressMark(getCharClass(ch)); |
|
305 } |
|
306 |
|
307 inline le_bool IndicClassTable::isConsonant(LEUnicode ch) const |
|
308 { |
|
309 return isConsonant(getCharClass(ch)); |
|
310 } |
|
311 |
|
312 inline le_bool IndicClassTable::isReph(LEUnicode ch) const |
|
313 { |
|
314 return isReph(getCharClass(ch)); |
|
315 } |
|
316 |
|
317 inline le_bool IndicClassTable::isVirama(LEUnicode ch) const |
|
318 { |
|
319 return isVirama(getCharClass(ch)); |
|
320 } |
|
321 |
|
322 inline le_bool IndicClassTable::isNukta(LEUnicode ch) const |
|
323 { |
|
324 return isNukta(getCharClass(ch)); |
|
325 } |
|
326 |
|
327 inline le_bool IndicClassTable::isVattu(LEUnicode ch) const |
|
328 { |
|
329 return isVattu(getCharClass(ch)); |
|
330 } |
|
331 |
|
332 inline le_bool IndicClassTable::isMatra(LEUnicode ch) const |
|
333 { |
|
334 return isMatra(getCharClass(ch)); |
|
335 } |
|
336 |
|
337 inline le_bool IndicClassTable::isSplitMatra(LEUnicode ch) const |
|
338 { |
|
339 return isSplitMatra(getCharClass(ch)); |
|
340 } |
|
341 |
|
342 inline le_bool IndicClassTable::isLengthMark(LEUnicode ch) const |
|
343 { |
|
344 return isLengthMark(getCharClass(ch)); |
|
345 } |
|
346 |
|
347 inline le_bool IndicClassTable::hasPostOrBelowBaseForm(LEUnicode ch) const |
|
348 { |
|
349 return hasPostOrBelowBaseForm(getCharClass(ch)); |
|
350 } |
|
351 |
|
352 inline le_bool IndicClassTable::hasPostBaseForm(LEUnicode ch) const |
|
353 { |
|
354 return hasPostBaseForm(getCharClass(ch)); |
|
355 } |
|
356 |
|
357 inline le_bool IndicClassTable::hasBelowBaseForm(LEUnicode ch) const |
|
358 { |
|
359 return hasBelowBaseForm(getCharClass(ch)); |
|
360 } |
|
361 |
|
362 // Added by Nokia -- For determining whether a character is a Bengali Ya |
|
363 inline le_bool IndicClassTable::isBengaliYa(LEUnicode ch) const |
|
364 { |
|
365 return isBengaliYa(getCharClass(ch)); |
|
366 } |
|
367 |
|
368 // Added by Nokia -- 1922 mlyl --> |
|
369 inline le_bool IndicClassTable::isMlylChillu(LEUnicode ch) const |
|
370 { |
|
371 return isMlylChillu(getCharClass(ch)); |
|
372 } |
|
373 // <-- 1922 mlyl |
|
374 |
|
375 U_NAMESPACE_END |
|
376 #endif |