fontservices/textshaperplugin/IcuSource/layout/KhmerReordering.h
changeset 0 1fb32624e06b
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2  *
       
     3  * (C) Copyright IBM Corp. 1998-2004 - All Rights Reserved 
       
     4  *
       
     5  * This file is a modification of the ICU file IndicReordering.h
       
     6  * by Jens Herden and Javier Sola for Khmer language 
       
     7  *
       
     8  */
       
     9 
       
    10 #ifndef __KHMERREORDERING_H
       
    11 #define __KHMERREORDERING_H
       
    12 
       
    13 /**
       
    14  * \file
       
    15  * \internal
       
    16  */
       
    17 
       
    18 // #include "LETypes.h"
       
    19 // #include "OpenTypeTables.h"
       
    20 
       
    21 U_NAMESPACE_BEGIN
       
    22 
       
    23 class LEGlyphStorage;
       
    24 
       
    25 // Vocabulary 
       
    26 //     Base ->         A consonant or an independent vowel in its full (not subscript) form. It is the 
       
    27 //                     center of the syllable, it can be souranded by coeng (subscript) consonants, vowels,
       
    28 //                     split vowels, signs... but there is only one base in a syllable, it has to be coded as
       
    29 //                     the first character of the syllable.
       
    30 //     split vowel --> vowel that has two parts placed separately (e.g. Before and after the consonant).  
       
    31 //                     Khmer language has five of them. Khmer split vowels either have one part before the
       
    32 //                     base and one after the base or they have a part before the base and a part above the base.
       
    33 //                     The first part of all Khmer split vowels is the same character, identical to 
       
    34 //                     the glyph of Khmer dependent vowel SRA EI   
       
    35 //     coeng -->  modifier used in Khmer to construct coeng (subscript) consonants 
       
    36 //                Differently than indian languages, the coeng modifies the consonant that follows it,
       
    37 //                not the one preceding it  Each consonant has two forms, the base form and the subscript form
       
    38 //                the base form is the normal one (using the consonants code-point), the subscript form is
       
    39 //                displayed when the combination coeng + consonant is encountered.
       
    40 //     Consonant of type 1 -> A consonant which has subscript for that only occupies space under a base consonant
       
    41 //     Consonant of type 2.-> Its subscript form occupies space under and before the base (only one, RO)
       
    42 //     Consonant of Type 3 -> Its subscript form occupies space under and after the base (KHO, CHHO, THHO, BA, YO, SA)
       
    43 //     Consonant shifter -> Khmer has to series of consonants. The same dependent vowel has different sounds
       
    44 //                          if it is attached to a consonant of the first series or a consonant of the second series
       
    45 //                          Most consonants have an equivalent in the other series, but some of theme exist only in
       
    46 //                          one series (for example SA). If we want to use the consonant SA with a vowel sound that
       
    47 //                          can only be done with a vowel sound that corresponds to a vowel accompanying a consonant
       
    48 //                          of the other series, then we need to use a consonant shifter: TRIISAP or MUSIKATOAN
       
    49 //                          x17C9 y x17CA. TRIISAP changes a first series consonant to second series sound and 
       
    50 //                          MUSIKATOAN a second series consonant to have a first series vowel sound.
       
    51 //                          Consonant shifter are both normally supercript marks, but, when they are followed by a
       
    52 //                          superscript, they change shape and take the form of subscript dependent vowel SRA U.
       
    53 //                          If they are in the same syllable as a coeng consonant, Unicode 3.0 says that they
       
    54 //                          should be typed before the coeng. Unicode 4.0 breaks the standard and says that it should
       
    55 //                          be placed after the coeng consonant.
       
    56 //     Dependent vowel ->   In khmer dependent vowels can be placed above, below, before or after the base                             
       
    57 //                          Each vowel has its own position. Only one vowel per syllable is allowed.
       
    58 //     Signs            ->  Khmer has above signs and post signs. Only one above sign and/or one post sign are
       
    59 //                          Allowed in a syllable.
       
    60 //
       
    61 //     
       
    62 
       
    63 struct KhmerClassTable    // This list must include all types of components that can be used inside a syllable
       
    64 {
       
    65     enum CharClassValues  // order is important here! This order must be the same that is found in each horizontal 
       
    66                           // line in the statetable for Khmer (file KhmerReordering.cpp).
       
    67     {
       
    68         CC_RESERVED             =  0,
       
    69         CC_CONSONANT            =  1, // consonant of type 1 or independent vowel
       
    70         CC_CONSONANT2           =  2, // Consonant of type 2
       
    71         CC_CONSONANT3           =  3, // Consonant of type 3 
       
    72         CC_ZERO_WIDTH_NJ_MARK   =  4, // Zero Width non joiner character (0x200C)
       
    73         CC_CONSONANT_SHIFTER    =  5, 
       
    74         CC_ROBAT                =  6, // Khmer special diacritic accent -treated differently in state table
       
    75         CC_COENG                =  7, // Subscript consonant combining character
       
    76         CC_DEPENDENT_VOWEL      =  8, 
       
    77         CC_SIGN_ABOVE           =  9,
       
    78         CC_SIGN_AFTER           = 10,
       
    79         CC_ZERO_WIDTH_J_MARK    = 11, // Zero width joiner character
       
    80         CC_COUNT                = 12  // This is the number of character classes
       
    81     };
       
    82 
       
    83     enum CharClassFlags
       
    84     {
       
    85         CF_CLASS_MASK    = 0x0000FFFF,
       
    86 
       
    87         CF_CONSONANT     = 0x01000000,  // flag to speed up comparing
       
    88         CF_SPLIT_VOWEL   = 0x02000000,  // flag for a split vowel -> the first part is added in front of the syllable
       
    89         CF_DOTTED_CIRCLE = 0x04000000,  // add a dotted circle if a character with this flag is the first in a syllable
       
    90         CF_COENG         = 0x08000000,  // flag to speed up comparing
       
    91         CF_SHIFTER       = 0x10000000,  // flag to speed up comparing
       
    92         CF_ABOVE_VOWEL   = 0x20000000,  // flag to speed up comparing
       
    93 
       
    94         // position flags
       
    95         CF_POS_BEFORE    = 0x00080000,
       
    96         CF_POS_BELOW     = 0x00040000,
       
    97         CF_POS_ABOVE     = 0x00020000,
       
    98         CF_POS_AFTER     = 0x00010000,
       
    99         CF_POS_MASK      = 0x000f0000
       
   100     };
       
   101 
       
   102     typedef le_uint32 CharClass;
       
   103 
       
   104     typedef le_int32 ScriptFlags;
       
   105 
       
   106     LEUnicode firstChar;   // for Khmer this will become x1780
       
   107     LEUnicode lastChar;    //  and this x17DF
       
   108     const CharClass *classTable;
       
   109 
       
   110     CharClass getCharClass(LEUnicode ch) const;
       
   111 
       
   112     static const KhmerClassTable *getKhmerClassTable();
       
   113 };
       
   114 
       
   115 
       
   116 class KhmerReordering /* not : public UObject because all methods are static */ {
       
   117 public:
       
   118     static le_int32 reorder(const LEUnicode *theChars, le_int32 charCount, le_int32 scriptCode,
       
   119         LEUnicode *outChars, LEGlyphStorage &glyphStorage);
       
   120 
       
   121     static const LETag *getFeatureOrder();
       
   122 
       
   123 private:
       
   124     // do not instantiate
       
   125     KhmerReordering();
       
   126 
       
   127     static le_int32 findSyllable(const KhmerClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount);
       
   128 
       
   129 };
       
   130 
       
   131 
       
   132 U_NAMESPACE_END
       
   133 #endif