util/unicode/data/ArabicShaping.txt
changeset 0 1918ee327afb
equal deleted inserted replaced
-1:000000000000 0:1918ee327afb
       
     1 # ArabicShaping-5.0.0.txt
       
     2 # Date: 2006-07-14, 11:23:00 PST [KW]
       
     3 #
       
     4 # This file is a normative contributory data file in the
       
     5 # Unicode Character Database.
       
     6 #
       
     7 # Copyright (c) 1991-2006 Unicode, Inc.
       
     8 # For terms of use, see http://www.unicode.org/terms_of_use.html
       
     9 #
       
    10 # This file defines the shaping classes for Arabic and Syriac
       
    11 # positional shaping, repeating in machine readable form the
       
    12 # information printed in Tables 8-3, 8-7, 8-8, 8-11, 8-12, and
       
    13 # 8-13 of The Unicode Standard, Version 4.0.
       
    14 #
       
    15 # See sections 8.2 and 8.3 of The Unicode Standard, Version 4.0
       
    16 # for more information.
       
    17 #
       
    18 # Each line contains four fields, separated by a semicolon.
       
    19 #
       
    20 # Field 0: the code point, in 4-digit hexadecimal
       
    21 #   form, of an Arabic or Syriac character.
       
    22 # Field 1: gives a short schematic name for that character,
       
    23 #   abbreviated from the normative Unicode character name.
       
    24 # Field 2: defines the joining type (property name: Joining_Type)
       
    25 #   R Right_Joining
       
    26 #   L Left_Joining
       
    27 #   D Dual_Joining
       
    28 #   C Join_Causing
       
    29 #   U Non_Joining
       
    30 #   T Transparent
       
    31 #       See the Arabic block description for more information on these types.
       
    32 # Field 3: defines the joining group (property name: Joining_Group)
       
    33 #
       
    34 # The values of the joining group are based schematically on character
       
    35 # names. Where a schematic character name consists of two or more parts separated
       
    36 # by spaces, the formal Joining_Group property value, as specified in
       
    37 # PropertyValueAliases.txt, consists of the same name parts joined by
       
    38 # underscores. Hence, the entry:
       
    39 #
       
    40 #   0629; TEH MARBUTA; R; TEH MARBUTA
       
    41 #
       
    42 # corresponds to [Joining_Group = Teh_Marbuta].
       
    43 #
       
    44 # Note: For historical reasons, the property value [Joining_Group = Hamza_On_Heh_Goal]
       
    45 #   is anachronistically named. It used to apply to both of the following characters
       
    46 #   in earlier versions of the standard:
       
    47 #
       
    48 #   U+06C2 ARABIC LETTER HEH GOAL WITH HAMZA ABOVE
       
    49 #   U+06C3 ARABIC LETTER TEH MARBUTA GOAL
       
    50 #
       
    51 #   However, it currently applies only to U+06C3, and *not* to U+06C2.
       
    52 #   To avoid destabilizing existing Joining_Group property aliases, the
       
    53 #   value Hamza_On_Heh_Goal has not been changed, despite the fact that it
       
    54 #   no longer applies to Hamza On Heh Goal, but only to Teh Marbuta Goal.
       
    55 #
       
    56 # Note: Code points that are not explicitly listed in this file are
       
    57 # either of joining type T or U:
       
    58 #
       
    59 # - Those that not explicitly listed that are of General Category Mn, Me, or Cf
       
    60 #   have joining type T.
       
    61 # - All others not explicitly listed have type U.
       
    62 #
       
    63 # For an explicit listing of characters of joining type T, see
       
    64 # the derived property file DerivedJoiningType.txt.
       
    65 #
       
    66 # There are currently no characters of type L defined in Unicode.
       
    67 #
       
    68 # #############################################################
       
    69  
       
    70 # Unicode; Schematic Name; Joining Type; Joining Group
       
    71 
       
    72 # Arabic characters
       
    73 
       
    74 0600; ARABIC NUMBER SIGN; U; No_Joining_Group
       
    75 0601; ARABIC SIGN SANAH; U; No_Joining_Group
       
    76 0602; ARABIC FOOTNOTE MARKER; U; No_Joining_Group
       
    77 0603; ARABIC SIGN SAFHA; U; No_Joining_Group
       
    78 060B; AFGHANI SIGN; U; No_Joining_Group
       
    79 0621; HAMZA; U; No_Joining_Group
       
    80 0622; MADDA ON ALEF; R; ALEF
       
    81 0623; HAMZA ON ALEF; R; ALEF
       
    82 0624; HAMZA ON WAW; R; WAW
       
    83 0625; HAMZA UNDER ALEF; R; ALEF
       
    84 0626; HAMZA ON YEH; D; YEH
       
    85 0627; ALEF; R; ALEF
       
    86 0628; BEH; D; BEH
       
    87 0629; TEH MARBUTA; R; TEH MARBUTA
       
    88 062A; TEH; D; BEH
       
    89 062B; THEH; D; BEH
       
    90 062C; JEEM; D; HAH
       
    91 062D; HAH; D; HAH
       
    92 062E; KHAH; D; HAH
       
    93 062F; DAL; R; DAL
       
    94 0630; THAL; R; DAL
       
    95 0631; REH; R; REH
       
    96 0632; ZAIN; R; REH
       
    97 0633; SEEN; D; SEEN
       
    98 0634; SHEEN; D; SEEN
       
    99 0635; SAD; D; SAD
       
   100 0636; DAD; D; SAD
       
   101 0637; TAH; D; TAH
       
   102 0638; ZAH; D; TAH
       
   103 0639; AIN; D; AIN
       
   104 063A; GHAIN; D; AIN
       
   105 0640; TATWEEL; C; No_Joining_Group
       
   106 0641; FEH; D; FEH
       
   107 0642; QAF; D; QAF
       
   108 0643; KAF; D; KAF
       
   109 0644; LAM; D; LAM
       
   110 0645; MEEM; D; MEEM
       
   111 0646; NOON; D; NOON
       
   112 0647; HEH; D; HEH
       
   113 0648; WAW; R; WAW
       
   114 0649; ALEF MAKSURA; D; YEH
       
   115 064A; YEH; D; YEH
       
   116 066E; DOTLESS BEH; D; BEH
       
   117 066F; DOTLESS QAF; D; QAF
       
   118 0671; HAMZAT WASL ON ALEF; R; ALEF
       
   119 0672; WAVY HAMZA ON ALEF; R; ALEF
       
   120 0673; WAVY HAMZA UNDER ALEF; R; ALEF
       
   121 0674; HIGH HAMZA; U; No_Joining_Group
       
   122 0675; HIGH HAMZA ALEF; R; ALEF
       
   123 0676; HIGH HAMZA WAW; R; WAW
       
   124 0677; HIGH HAMZA WAW WITH DAMMA; R; WAW
       
   125 0678; HIGH HAMZA YEH; D; YEH
       
   126 0679; TEH WITH SMALL TAH; D; BEH
       
   127 067A; TEH WITH 2 DOTS VERTICAL ABOVE; D; BEH
       
   128 067B; BEH WITH 2 DOTS VERTICAL BELOW; D; BEH
       
   129 067C; TEH WITH RING; D; BEH
       
   130 067D; TEH WITH 3 DOTS ABOVE DOWNWARD; D; BEH
       
   131 067E; TEH WITH 3 DOTS BELOW; D; BEH
       
   132 067F; TEH WITH 4 DOTS ABOVE; D; BEH
       
   133 0680; BEH WITH 4 DOTS BELOW; D; BEH
       
   134 0681; HAMZA ON HAH; D; HAH
       
   135 0682; HAH WITH 2 DOTS VERTICAL ABOVE; D; HAH
       
   136 0683; HAH WITH MIDDLE 2 DOTS; D; HAH
       
   137 0684; HAH WITH MIDDLE 2 DOTS VERTICAL; D; HAH
       
   138 0685; HAH WITH 3 DOTS ABOVE; D; HAH
       
   139 0686; HAH WITH MIDDLE 3 DOTS DOWNWARD; D; HAH
       
   140 0687; HAH WITH MIDDLE 4 DOTS; D; HAH
       
   141 0688; DAL WITH SMALL TAH; R; DAL
       
   142 0689; DAL WITH RING; R; DAL
       
   143 068A; DAL WITH DOT BELOW; R; DAL
       
   144 068B; DAL WITH DOT BELOW AND SMALL TAH; R; DAL
       
   145 068C; DAL WITH 2 DOTS ABOVE; R; DAL
       
   146 068D; DAL WITH 2 DOTS BELOW; R; DAL
       
   147 068E; DAL WITH 3 DOTS ABOVE; R; DAL
       
   148 068F; DAL WITH 3 DOTS ABOVE DOWNWARD; R; DAL
       
   149 0690; DAL WITH 4 DOTS ABOVE; R; DAL
       
   150 0691; REH WITH SMALL TAH; R; REH
       
   151 0692; REH WITH SMALL V; R; REH
       
   152 0693; REH WITH RING; R; REH
       
   153 0694; REH WITH DOT BELOW; R; REH
       
   154 0695; REH WITH SMALL V BELOW; R; REH
       
   155 0696; REH WITH DOT BELOW AND DOT ABOVE; R; REH
       
   156 0697; REH WITH 2 DOTS ABOVE; R; REH
       
   157 0698; REH WITH 3 DOTS ABOVE; R; REH
       
   158 0699; REH WITH 4 DOTS ABOVE; R; REH
       
   159 069A; SEEN WITH DOT BELOW AND DOT ABOVE; D; SEEN
       
   160 069B; SEEN WITH 3 DOTS BELOW; D; SEEN
       
   161 069C; SEEN WITH 3 DOTS BELOW AND 3 DOTS ABOVE; D; SEEN
       
   162 069D; SAD WITH 2 DOTS BELOW; D; SAD
       
   163 069E; SAD WITH 3 DOTS ABOVE; D; SAD
       
   164 069F; TAH WITH 3 DOTS ABOVE; D; TAH
       
   165 06A0; AIN WITH 3 DOTS ABOVE; D; AIN
       
   166 06A1; DOTLESS FEH; D; FEH
       
   167 06A2; FEH WITH DOT MOVED BELOW; D; FEH
       
   168 06A3; FEH WITH DOT BELOW; D; FEH
       
   169 06A4; FEH WITH 3 DOTS ABOVE; D; FEH
       
   170 06A5; FEH WITH 3 DOTS BELOW; D; FEH
       
   171 06A6; FEH WITH 4 DOTS ABOVE; D; FEH
       
   172 06A7; QAF WITH DOT ABOVE; D; QAF
       
   173 06A8; QAF WITH 3 DOTS ABOVE; D; QAF
       
   174 06A9; KEHEH; D; GAF
       
   175 06AA; SWASH KAF; D; SWASH KAF
       
   176 06AB; KAF WITH RING; D; GAF
       
   177 06AC; KAF WITH DOT ABOVE; D; KAF
       
   178 06AD; KAF WITH 3 DOTS ABOVE; D; KAF
       
   179 06AE; KAF WITH 3 DOTS BELOW; D; KAF
       
   180 06AF; GAF; D; GAF
       
   181 06B0; GAF WITH RING; D; GAF
       
   182 06B1; GAF WITH 2 DOTS ABOVE; D; GAF
       
   183 06B2; GAF WITH 2 DOTS BELOW; D; GAF
       
   184 06B3; GAF WITH 2 DOTS VERTICAL BELOW; D; GAF
       
   185 06B4; GAF WITH 3 DOTS ABOVE; D; GAF
       
   186 06B5; LAM WITH SMALL V; D; LAM
       
   187 06B6; LAM WITH DOT ABOVE; D; LAM
       
   188 06B7; LAM WITH 3 DOTS ABOVE; D; LAM
       
   189 06B8; LAM WITH 3 DOTS BELOW; D; LAM
       
   190 06B9; NOON WITH DOT BELOW; D; NOON
       
   191 06BA; DOTLESS NOON; D; NOON
       
   192 06BB; DOTLESS NOON WITH SMALL TAH; D; NOON
       
   193 06BC; NOON WITH RING; D; NOON
       
   194 06BD; NOON WITH 3 DOTS ABOVE; D; NOON
       
   195 06BE; KNOTTED HEH; D; KNOTTED HEH
       
   196 06BF; HAH WITH MIDDLE 3 DOTS DOWNWARD AND DOT ABOVE; D; HAH
       
   197 06C0; HAMZA ON HEH; R; TEH MARBUTA
       
   198 06C1; HEH GOAL; D; HEH GOAL
       
   199 06C2; HAMZA ON HEH GOAL; D; HEH GOAL
       
   200 06C3; TEH MARBUTA GOAL; R; HAMZA ON HEH GOAL
       
   201 06C4; WAW WITH RING; R; WAW
       
   202 06C5; WAW WITH BAR; R; WAW
       
   203 06C6; WAW WITH SMALL V; R; WAW
       
   204 06C7; WAW WITH DAMMA; R; WAW
       
   205 06C8; WAW WITH ALEF ABOVE; R; WAW
       
   206 06C9; WAW WITH INVERTED SMALL V; R; WAW
       
   207 06CA; WAW WITH 2 DOTS ABOVE; R; WAW
       
   208 06CB; WAW WITH 3 DOTS ABOVE; R; WAW
       
   209 06CC; DOTLESS YEH; D; YEH
       
   210 06CD; YEH WITH TAIL; R; YEH WITH TAIL
       
   211 06CE; YEH WITH SMALL V; D; YEH
       
   212 06CF; WAW WITH DOT ABOVE; R; WAW
       
   213 06D0; YEH WITH 2 DOTS VERTICAL BELOW; D; YEH
       
   214 06D1; YEH WITH 3 DOTS BELOW; D; YEH
       
   215 06D2; YEH BARREE; R; YEH BARREE
       
   216 06D3; HAMZA ON YEH BARREE; R; YEH BARREE
       
   217 06D5; AE; R; TEH MARBUTA
       
   218 06DD; ARABIC END OF AYAH; U; No_Joining_Group
       
   219 06EE; DAL WITH INVERTED V; R; DAL
       
   220 06EF; REH WITH INVERTED V; R; REH
       
   221 06FA; SEEN WITH DOT BELOW AND 3 DOTS ABOVE; D; SEEN
       
   222 06FB; DAD WITH DOT BELOW; D; SAD
       
   223 06FC; GHAIN WITH DOT BELOW; D; AIN
       
   224 06FF; HEH WITH INVERTED V; D; KNOTTED HEH
       
   225 
       
   226 # Syriac characters
       
   227 
       
   228 0710; ALAPH; R; ALAPH
       
   229 0712; BETH; D; BETH
       
   230 0713; GAMAL; D; GAMAL
       
   231 0714; GAMAL GARSHUNI; D; GAMAL
       
   232 0715; DALATH; R; DALATH RISH
       
   233 0716; DOTLESS DALATH RISH; R; DALATH RISH
       
   234 0717; HE; R; HE
       
   235 0718; WAW; R; SYRIAC WAW
       
   236 0719; ZAIN; R; ZAIN
       
   237 071A; HETH; D; HETH
       
   238 071B; TETH; D; TETH
       
   239 071C; TETH GARSHUNI; D; TETH
       
   240 071D; YUDH; D; YUDH
       
   241 071E; YUDH HE; R; YUDH HE
       
   242 071F; KAPH; D; KAPH
       
   243 0720; LAMADH; D; LAMADH
       
   244 0721; MIM; D; MIM
       
   245 0722; NUN; D; NUN
       
   246 0723; SEMKATH; D; SEMKATH
       
   247 0724; FINAL SEMKATH; D; FINAL SEMKATH
       
   248 0725; E; D; E
       
   249 0726; PE; D; PE
       
   250 0727; REVERSED PE; D; REVERSED PE
       
   251 0728; SADHE; R; SADHE
       
   252 0729; QAPH; D; QAPH
       
   253 072A; RISH; R; DALATH RISH
       
   254 072B; SHIN; D; SHIN
       
   255 072C; TAW; R; TAW
       
   256 072D; PERSIAN BHETH; D; BETH
       
   257 072E; PERSIAN GHAMAL; D; GAMAL
       
   258 072F; PERSIAN DHALATH; R; DALATH RISH
       
   259 074D; SOGDIAN ZHAIN; R; ZHAIN
       
   260 074E; SOGDIAN KHAPH; D; KHAPH
       
   261 074F; SOGDIAN FE; D; FE
       
   262 
       
   263 # Arabic supplement characters
       
   264 
       
   265 0750; BEH WITH 3 DOTS HORIZONTALLY BELOW; D; BEH
       
   266 0751; BEH WITH DOT BELOW AND 3 DOTS ABOVE; D; BEH
       
   267 0752; BEH WITH 3 DOTS POINTING UPWARDS BELOW; D; BEH
       
   268 0753; BEH WITH 3 DOTS POINTING UPWARDS BELOW AND 2 DOTS ABOVE; D; BEH
       
   269 0754; BEH WITH 2 DOTS BELOW AND DOT ABOVE; D; BEH
       
   270 0755; BEH WITH INVERTED SMALL V BELOW; D; BEH
       
   271 0756; BEH WITH SMALL V; D; BEH
       
   272 0757; HAH WITH 2 DOTS ABOVE; D; HAH
       
   273 0758; HAH WITH 3 DOTS POINTING UPWARDS BELOW; D; HAH
       
   274 0759; DAL WITH 2 DOTS VERTICALLY BELOW AND SMALL TAH; R; DAL
       
   275 075A; DAL WITH INVERTED SMALL V BELOW; R; DAL
       
   276 075B; REH WITH STROKE; R; REH
       
   277 075C; SEEN WITH 4 DOTS ABOVE; D; SEEN
       
   278 075D; AIN WITH 2 DOTS ABOVE; D; AIN
       
   279 075E; AIN WITH 3 DOTS POINTING DOWNWARDS ABOVE; D; AIN
       
   280 075F; AIN WITH 2 DOTS VERTICALLY ABOVE; D; AIN
       
   281 0760; FEH WITH 2 DOTS BELOW; D; FEH
       
   282 0761; FEH WITH 3 DOTS POINTING UPWARDS BELOW; D; FEH
       
   283 0762; KEHEH WITH DOT ABOVE; D; GAF
       
   284 0763; KEHEH WITH 3 DOTS ABOVE; D; GAF
       
   285 0764; KEHEH WITH 3 DOTS POINTING UPWARDS BELOW; D; GAF
       
   286 0765; MEEM WITH DOT ABOVE; D; MEEM
       
   287 0766; MEEM WITH DOT BELOW; D; MEEM
       
   288 0767; NOON WITH 2 DOTS BELOW; D; NOON
       
   289 0768; NOON WITH SMALL TAH; D; NOON
       
   290 0769; NOON WITH SMALL V; D; NOON
       
   291 076A; LAM WITH BAR; D; LAM
       
   292 076B; REH WITH 2 DOTS VERTICALLY ABOVE; R; REH
       
   293 076C; REH WITH HAMZA ABOVE; R; REH
       
   294 076D; SEEN WITH 2 DOTS VERTICALLY ABOVE; D; SEEN
       
   295 
       
   296 # N'Ko Characters
       
   297 
       
   298 07CA; NKO A; D; No_Joining_Group
       
   299 07CB; NKO EE; D; No_Joining_Group
       
   300 07CC; NKO I; D; No_Joining_Group
       
   301 07CD; NKO E; D; No_Joining_Group
       
   302 07CE; NKO U; D; No_Joining_Group
       
   303 07CF; NKO OO; D; No_Joining_Group
       
   304 07D0; NKO O; D; No_Joining_Group
       
   305 07D1; NKO DAGBASINNA; D; No_Joining_Group
       
   306 07D2; NKO N; D; No_Joining_Group
       
   307 07D3; NKO BA; D; No_Joining_Group
       
   308 07D4; NKO PA; D; No_Joining_Group
       
   309 07D5; NKO TA; D; No_Joining_Group
       
   310 07D6; NKO JA; D; No_Joining_Group
       
   311 07D7; NKO CHA; D; No_Joining_Group
       
   312 07D8; NKO DA; D; No_Joining_Group
       
   313 07D9; NKO RA; D; No_Joining_Group
       
   314 07DA; NKO RRA; D; No_Joining_Group
       
   315 07DB; NKO SA; D; No_Joining_Group
       
   316 07DC; NKO GBA; D; No_Joining_Group
       
   317 07DD; NKO FA; D; No_Joining_Group
       
   318 07DE; NKO KA; D; No_Joining_Group
       
   319 07DF; NKO LA; D; No_Joining_Group
       
   320 07E0; NKO NA WOLOSO; D; No_Joining_Group
       
   321 07E1; NKO MA; D; No_Joining_Group
       
   322 07E2; NKO NYA; D; No_Joining_Group
       
   323 07E3; NKO NA; D; No_Joining_Group
       
   324 07E4; NKO HA; D; No_Joining_Group
       
   325 07E5; NKO WA; D; No_Joining_Group
       
   326 07E6; NKO YA; D; No_Joining_Group
       
   327 07E7; NKO NYA WOLOSO; D; No_Joining_Group
       
   328 07E8; NKO JONA JA; D; No_Joining_Group
       
   329 07E9; NKO JONA CHA; D; No_Joining_Group
       
   330 07EA; NKO JONA RA; D; No_Joining_Group
       
   331 07FA; NKO LAJANYALAN; C; No_Joining_Group
       
   332 
       
   333 # Other
       
   334 
       
   335 200D; ZERO WIDTH JOINER; C; No_Joining_Group
       
   336 200C; ZERO WIDTH NON-JOINER; U; No_Joining_Group
       
   337 
       
   338 # EOF