|
1 /* |
|
2 * |
|
3 * (C) Copyright IBM Corp. 1998-2004 - All Rights Reserved |
|
4 * |
|
5 * This file is a modification of the ICU file IndicReordering.h |
|
6 * by Jens Herden and Javier Sola for Khmer language |
|
7 * |
|
8 */ |
|
9 |
|
10 #ifndef __KHMERREORDERING_H |
|
11 #define __KHMERREORDERING_H |
|
12 |
|
13 /** |
|
14 * \file |
|
15 * \internal |
|
16 */ |
|
17 |
|
18 // #include "LETypes.h" |
|
19 // #include "OpenTypeTables.h" |
|
20 |
|
21 U_NAMESPACE_BEGIN |
|
22 |
|
23 class LEGlyphStorage; |
|
24 |
|
25 // Vocabulary |
|
26 // Base -> A consonant or an independent vowel in its full (not subscript) form. It is the |
|
27 // center of the syllable, it can be souranded by coeng (subscript) consonants, vowels, |
|
28 // split vowels, signs... but there is only one base in a syllable, it has to be coded as |
|
29 // the first character of the syllable. |
|
30 // split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant). |
|
31 // Khmer language has five of them. Khmer split vowels either have one part before the |
|
32 // base and one after the base or they have a part before the base and a part above the base. |
|
33 // The first part of all Khmer split vowels is the same character, identical to |
|
34 // the glyph of Khmer dependent vowel SRA EI |
|
35 // coeng --> modifier used in Khmer to construct coeng (subscript) consonants |
|
36 // Differently than indian languages, the coeng modifies the consonant that follows it, |
|
37 // not the one preceding it Each consonant has two forms, the base form and the subscript form |
|
38 // the base form is the normal one (using the consonants code-point), the subscript form is |
|
39 // displayed when the combination coeng + consonant is encountered. |
|
40 // Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant |
|
41 // Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO) |
|
42 // Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA) |
|
43 // Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds |
|
44 // if it is attached to a consonant of the first series or a consonant of the second series |
|
45 // Most consonants have an equivalent in the other series, but some of theme exist only in |
|
46 // one series (for example SA). If we want to use the consonant SA with a vowel sound that |
|
47 // can only be done with a vowel sound that corresponds to a vowel accompanying a consonant |
|
48 // of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN |
|
49 // x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and |
|
50 // MUSIKATOAN a second series consonant to have a first series vowel sound. |
|
51 // Consonant shifter are both normally supercript marks, but, when they are followed by a |
|
52 // superscript, they change shape and take the form of subscript dependent vowel SRA U. |
|
53 // If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they |
|
54 // should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should |
|
55 // be placed after the coeng consonant. |
|
56 // Dependent vowel -> In khmer dependent vowels can be placed above, below, before or after the base |
|
57 // Each vowel has its own position. Only one vowel per syllable is allowed. |
|
58 // Signs -> Khmer has above signs and post signs. Only one above sign and/or one post sign are |
|
59 // Allowed in a syllable. |
|
60 // |
|
61 // |
|
62 |
|
63 struct KhmerClassTable // This list must include all types of components that can be used inside a syllable |
|
64 { |
|
65 enum CharClassValues // order is important here! This order must be the same that is found in each horizontal |
|
66 // line in the statetable for Khmer (file KhmerReordering.cpp). |
|
67 { |
|
68 CC_RESERVED = 0, |
|
69 CC_CONSONANT = 1, // consonant of type 1 or independent vowel |
|
70 CC_CONSONANT2 = 2, // Consonant of type 2 |
|
71 CC_CONSONANT3 = 3, // Consonant of type 3 |
|
72 CC_ZERO_WIDTH_NJ_MARK = 4, // Zero Width non joiner character (0x200C) |
|
73 CC_CONSONANT_SHIFTER = 5, |
|
74 CC_ROBAT = 6, // Khmer special diacritic accent -treated differently in state table |
|
75 CC_COENG = 7, // Subscript consonant combining character |
|
76 CC_DEPENDENT_VOWEL = 8, |
|
77 CC_SIGN_ABOVE = 9, |
|
78 CC_SIGN_AFTER = 10, |
|
79 CC_ZERO_WIDTH_J_MARK = 11, // Zero width joiner character |
|
80 CC_COUNT = 12 // This is the number of character classes |
|
81 }; |
|
82 |
|
83 enum CharClassFlags |
|
84 { |
|
85 CF_CLASS_MASK = 0x0000FFFF, |
|
86 |
|
87 CF_CONSONANT = 0x01000000, // flag to speed up comparing |
|
88 CF_SPLIT_VOWEL = 0x02000000, // flag for a split vowel -> the first part is added in front of the syllable |
|
89 CF_DOTTED_CIRCLE = 0x04000000, // add a dotted circle if a character with this flag is the first in a syllable |
|
90 CF_COENG = 0x08000000, // flag to speed up comparing |
|
91 CF_SHIFTER = 0x10000000, // flag to speed up comparing |
|
92 CF_ABOVE_VOWEL = 0x20000000, // flag to speed up comparing |
|
93 |
|
94 // position flags |
|
95 CF_POS_BEFORE = 0x00080000, |
|
96 CF_POS_BELOW = 0x00040000, |
|
97 CF_POS_ABOVE = 0x00020000, |
|
98 CF_POS_AFTER = 0x00010000, |
|
99 CF_POS_MASK = 0x000f0000 |
|
100 }; |
|
101 |
|
102 typedef le_uint32 CharClass; |
|
103 |
|
104 typedef le_int32 ScriptFlags; |
|
105 |
|
106 LEUnicode firstChar; // for Khmer this will become x1780 |
|
107 LEUnicode lastChar; // and this x17DF |
|
108 const CharClass *classTable; |
|
109 |
|
110 CharClass getCharClass(LEUnicode ch) const; |
|
111 |
|
112 static const KhmerClassTable *getKhmerClassTable(); |
|
113 }; |
|
114 |
|
115 |
|
116 class KhmerReordering /* not : public UObject because all methods are static */ { |
|
117 public: |
|
118 static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode, |
|
119 LEUnicode *outChars, LEGlyphStorage &glyphStorage); |
|
120 |
|
121 static const LETag *getFeatureOrder(); |
|
122 |
|
123 private: |
|
124 // do not instantiate |
|
125 KhmerReordering(); |
|
126 |
|
127 static le_int32 findSyllable(const KhmerClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount); |
|
128 |
|
129 }; |
|
130 |
|
131 |
|
132 U_NAMESPACE_END |
|
133 #endif |