util/unicode/data/CompositionExclusions.txt
changeset 0 1918ee327afb
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 # CompositionExclusions-5.0.0.txt
       
     2 # Date: 2006-05-23, 12:42:00 PST [KW]
       
     3 #
       
     4 # This file lists the characters for the Composition Exclusion Table
       
     5 # defined in UAX #15, Unicode Normalization Forms.
       
     6 #
       
     7 # This file is a normative contributory data file in the
       
     8 # Unicode Character Database.
       
     9 #
       
    10 # Copyright (c) 1991-2006 Unicode, Inc.
       
    11 # For terms of use, see http://www.unicode.org/terms_of_use.html
       
    12 #
       
    13 # For more information, see
       
    14 # http://www.unicode.org/unicode/reports/tr15/#Primary Exclusion List Table
       
    15 #
       
    16 # For a full derivation of composition exclusions, see the derived property
       
    17 # Full_Composition_Exclusion in DerivedNormalizationProps.txt
       
    18 #
       
    19 
       
    20 # ================================================
       
    21 # (1) Script Specifics
       
    22 #
       
    23 # This list of characters cannot be derived from the UnicodeData.txt file.
       
    24 # ================================================
       
    25 
       
    26 0958    #  DEVANAGARI LETTER QA
       
    27 0959    #  DEVANAGARI LETTER KHHA
       
    28 095A    #  DEVANAGARI LETTER GHHA
       
    29 095B    #  DEVANAGARI LETTER ZA
       
    30 095C    #  DEVANAGARI LETTER DDDHA
       
    31 095D    #  DEVANAGARI LETTER RHA
       
    32 095E    #  DEVANAGARI LETTER FA
       
    33 095F    #  DEVANAGARI LETTER YYA
       
    34 09DC    #  BENGALI LETTER RRA
       
    35 09DD    #  BENGALI LETTER RHA
       
    36 09DF    #  BENGALI LETTER YYA
       
    37 0A33    #  GURMUKHI LETTER LLA
       
    38 0A36    #  GURMUKHI LETTER SHA
       
    39 0A59    #  GURMUKHI LETTER KHHA
       
    40 0A5A    #  GURMUKHI LETTER GHHA
       
    41 0A5B    #  GURMUKHI LETTER ZA
       
    42 0A5E    #  GURMUKHI LETTER FA
       
    43 0B5C    #  ORIYA LETTER RRA
       
    44 0B5D    #  ORIYA LETTER RHA
       
    45 0F43    #  TIBETAN LETTER GHA
       
    46 0F4D    #  TIBETAN LETTER DDHA
       
    47 0F52    #  TIBETAN LETTER DHA
       
    48 0F57    #  TIBETAN LETTER BHA
       
    49 0F5C    #  TIBETAN LETTER DZHA
       
    50 0F69    #  TIBETAN LETTER KSSA
       
    51 0F76    #  TIBETAN VOWEL SIGN VOCALIC R
       
    52 0F78    #  TIBETAN VOWEL SIGN VOCALIC L
       
    53 0F93    #  TIBETAN SUBJOINED LETTER GHA
       
    54 0F9D    #  TIBETAN SUBJOINED LETTER DDHA
       
    55 0FA2    #  TIBETAN SUBJOINED LETTER DHA
       
    56 0FA7    #  TIBETAN SUBJOINED LETTER BHA
       
    57 0FAC    #  TIBETAN SUBJOINED LETTER DZHA
       
    58 0FB9    #  TIBETAN SUBJOINED LETTER KSSA
       
    59 FB1D    #  HEBREW LETTER YOD WITH HIRIQ
       
    60 FB1F    #  HEBREW LIGATURE YIDDISH YOD YOD PATAH
       
    61 FB2A    #  HEBREW LETTER SHIN WITH SHIN DOT
       
    62 FB2B    #  HEBREW LETTER SHIN WITH SIN DOT
       
    63 FB2C    #  HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT
       
    64 FB2D    #  HEBREW LETTER SHIN WITH DAGESH AND SIN DOT
       
    65 FB2E    #  HEBREW LETTER ALEF WITH PATAH
       
    66 FB2F    #  HEBREW LETTER ALEF WITH QAMATS
       
    67 FB30    #  HEBREW LETTER ALEF WITH MAPIQ
       
    68 FB31    #  HEBREW LETTER BET WITH DAGESH
       
    69 FB32    #  HEBREW LETTER GIMEL WITH DAGESH
       
    70 FB33    #  HEBREW LETTER DALET WITH DAGESH
       
    71 FB34    #  HEBREW LETTER HE WITH MAPIQ
       
    72 FB35    #  HEBREW LETTER VAV WITH DAGESH
       
    73 FB36    #  HEBREW LETTER ZAYIN WITH DAGESH
       
    74 FB38    #  HEBREW LETTER TET WITH DAGESH
       
    75 FB39    #  HEBREW LETTER YOD WITH DAGESH
       
    76 FB3A    #  HEBREW LETTER FINAL KAF WITH DAGESH
       
    77 FB3B    #  HEBREW LETTER KAF WITH DAGESH
       
    78 FB3C    #  HEBREW LETTER LAMED WITH DAGESH
       
    79 FB3E    #  HEBREW LETTER MEM WITH DAGESH
       
    80 FB40    #  HEBREW LETTER NUN WITH DAGESH
       
    81 FB41    #  HEBREW LETTER SAMEKH WITH DAGESH
       
    82 FB43    #  HEBREW LETTER FINAL PE WITH DAGESH
       
    83 FB44    #  HEBREW LETTER PE WITH DAGESH
       
    84 FB46    #  HEBREW LETTER TSADI WITH DAGESH
       
    85 FB47    #  HEBREW LETTER QOF WITH DAGESH
       
    86 FB48    #  HEBREW LETTER RESH WITH DAGESH
       
    87 FB49    #  HEBREW LETTER SHIN WITH DAGESH
       
    88 FB4A    #  HEBREW LETTER TAV WITH DAGESH
       
    89 FB4B    #  HEBREW LETTER VAV WITH HOLAM
       
    90 FB4C    #  HEBREW LETTER BET WITH RAFE
       
    91 FB4D    #  HEBREW LETTER KAF WITH RAFE
       
    92 FB4E    #  HEBREW LETTER PE WITH RAFE
       
    93 
       
    94 # Total code points: 67
       
    95 
       
    96 # ================================================
       
    97 # (2) Post Composition Version precomposed characters
       
    98 #
       
    99 # These characters cannot be derived solely from the UnicodeData.txt file
       
   100 # in this version of Unicode.
       
   101 #
       
   102 # Note that characters added to the standard after the
       
   103 # Composition Version and which have canonical decomposition mappings
       
   104 # are not automatically added to this list of Post Composition
       
   105 # Version precomposed characters.
       
   106 # ================================================
       
   107 
       
   108 2ADC    #  FORKING
       
   109 1D15E   #  MUSICAL SYMBOL HALF NOTE
       
   110 1D15F   #  MUSICAL SYMBOL QUARTER NOTE
       
   111 1D160   #  MUSICAL SYMBOL EIGHTH NOTE
       
   112 1D161   #  MUSICAL SYMBOL SIXTEENTH NOTE
       
   113 1D162   #  MUSICAL SYMBOL THIRTY-SECOND NOTE
       
   114 1D163   #  MUSICAL SYMBOL SIXTY-FOURTH NOTE
       
   115 1D164   #  MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
       
   116 1D1BB   #  MUSICAL SYMBOL MINIMA
       
   117 1D1BC   #  MUSICAL SYMBOL MINIMA BLACK
       
   118 1D1BD   #  MUSICAL SYMBOL SEMIMINIMA WHITE
       
   119 1D1BE   #  MUSICAL SYMBOL SEMIMINIMA BLACK
       
   120 1D1BF   #  MUSICAL SYMBOL FUSA WHITE
       
   121 1D1C0   #  MUSICAL SYMBOL FUSA BLACK
       
   122 
       
   123 # Total code points: 14
       
   124 
       
   125 # ================================================
       
   126 # (3) Singleton Decompositions
       
   127 #
       
   128 # These characters can be derived from the UnicodeData.txt file
       
   129 # by including all characters whose canonical decomposition
       
   130 # consists of a single character.
       
   131 #
       
   132 # These characters are simply quoted here for reference.
       
   133 # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
       
   134 # ================================================
       
   135 
       
   136 # 0340..0341       [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
       
   137 # 0343                 COMBINING GREEK KORONIS
       
   138 # 0374                 GREEK NUMERAL SIGN
       
   139 # 037E                 GREEK QUESTION MARK
       
   140 # 0387                 GREEK ANO TELEIA
       
   141 # 1F71                 GREEK SMALL LETTER ALPHA WITH OXIA
       
   142 # 1F73                 GREEK SMALL LETTER EPSILON WITH OXIA
       
   143 # 1F75                 GREEK SMALL LETTER ETA WITH OXIA
       
   144 # 1F77                 GREEK SMALL LETTER IOTA WITH OXIA
       
   145 # 1F79                 GREEK SMALL LETTER OMICRON WITH OXIA
       
   146 # 1F7B                 GREEK SMALL LETTER UPSILON WITH OXIA
       
   147 # 1F7D                 GREEK SMALL LETTER OMEGA WITH OXIA
       
   148 # 1FBB                 GREEK CAPITAL LETTER ALPHA WITH OXIA
       
   149 # 1FBE                 GREEK PROSGEGRAMMENI
       
   150 # 1FC9                 GREEK CAPITAL LETTER EPSILON WITH OXIA
       
   151 # 1FCB                 GREEK CAPITAL LETTER ETA WITH OXIA
       
   152 # 1FD3                 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
       
   153 # 1FDB                 GREEK CAPITAL LETTER IOTA WITH OXIA
       
   154 # 1FE3                 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
       
   155 # 1FEB                 GREEK CAPITAL LETTER UPSILON WITH OXIA
       
   156 # 1FEE..1FEF       [2] GREEK DIALYTIKA AND OXIA..GREEK VARIA
       
   157 # 1FF9                 GREEK CAPITAL LETTER OMICRON WITH OXIA
       
   158 # 1FFB                 GREEK CAPITAL LETTER OMEGA WITH OXIA
       
   159 # 1FFD                 GREEK OXIA
       
   160 # 2000..2001       [2] EN QUAD..EM QUAD
       
   161 # 2126                 OHM SIGN
       
   162 # 212A..212B       [2] KELVIN SIGN..ANGSTROM SIGN
       
   163 # 2329                 LEFT-POINTING ANGLE BRACKET
       
   164 # 232A                 RIGHT-POINTING ANGLE BRACKET
       
   165 # F900..FA0D     [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
       
   166 # FA10                 CJK COMPATIBILITY IDEOGRAPH-FA10
       
   167 # FA12                 CJK COMPATIBILITY IDEOGRAPH-FA12
       
   168 # FA15..FA1E      [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
       
   169 # FA20                 CJK COMPATIBILITY IDEOGRAPH-FA20
       
   170 # FA22                 CJK COMPATIBILITY IDEOGRAPH-FA22
       
   171 # FA25..FA26       [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
       
   172 # FA2A..FA2D       [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D
       
   173 # FA30..FA6A      [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
       
   174 # FA70..FAD9     [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
       
   175 # 2F800..2FA1D   [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
       
   176 
       
   177 # Total code points: 924
       
   178 
       
   179 # ================================================
       
   180 # (4) Non-Starter Decompositions
       
   181 #
       
   182 # These characters can be derived from the UnicodeData file
       
   183 # by including all characters whose canonical decomposition consists
       
   184 # of a sequence of characters, the first of which has a non-zero
       
   185 # combining class.
       
   186 #
       
   187 # These characters are simply quoted here for reference.
       
   188 # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
       
   189 # ================================================
       
   190 
       
   191 # 0344                 COMBINING GREEK DIALYTIKA TONOS
       
   192 # 0F73                 TIBETAN VOWEL SIGN II
       
   193 # 0F75                 TIBETAN VOWEL SIGN UU
       
   194 # 0F81                 TIBETAN VOWEL SIGN REVERSED II
       
   195 
       
   196 # Total code points: 4
       
   197