textinput/ptihangulcore/src/hangulctype.c
branchRCL_3
changeset 3 f5a1e66df979
equal deleted inserted replaced
0:eb1f2e154e89 3:f5a1e66df979
       
     1 /**
       
     2  * @file    hangulctype.c
       
     3  * @brief   hangulctype source file
       
     4  */
       
     5 
       
     6 /* libhangul
       
     7  * Copyright (c) 2005,2006 Choe Hwanjin
       
     8  * All rights reserved.
       
     9  * This library is free software; you can redistribute it and/or
       
    10  * modify it under the terms of the GNU Lesser General Public
       
    11  * License as published by the Free Software Foundation; either
       
    12  * version 2.1 of the License, or (at your option) any later version.
       
    13  *
       
    14  * This library is distributed in the hope that it will be useful,
       
    15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
       
    17  * Lesser General Public License for more details.
       
    18  *
       
    19  * You should have received a copy of the GNU Lesser General Public
       
    20  * License along with this library; if not, write to the Free Software
       
    21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
       
    22  */
       
    23 
       
    24 #ifdef HAVE_CONFIG_H
       
    25 #include <config.h>
       
    26 #endif
       
    27 
       
    28 #include <stdlib.h>
       
    29 
       
    30 #include "hangul.h"
       
    31 
       
    32 static const ucschar syllable_base  = 0xac00;
       
    33 static const ucschar choseong_base  = 0x1100;
       
    34 static const ucschar jungseong_base = 0x1161;
       
    35 static const ucschar jongseong_base = 0x11a7;
       
    36 static const int njungseong = 21;
       
    37 static const int njongseong = 28;
       
    38 
       
    39 /**
       
    40  * @brief check for a choseong
       
    41  * @param c ucs4 code value
       
    42  * @return true if the character c falls into choseong class
       
    43  *
       
    44  * This function check whether c, which must have ucs4 value, falls into
       
    45  * choseong (leading consonants) class.
       
    46  */
       
    47 bool
       
    48 hangul_is_choseong(ucschar c)
       
    49 {
       
    50     return c >= 0x1100 && c <= 0x1159;
       
    51 }
       
    52 
       
    53 /**
       
    54  * @brief check for a jungseong
       
    55  * @param c ucs4 code value
       
    56  * @return true if the character c falls into jungseong class
       
    57  *
       
    58  * This function check whether c, which must have ucs4 value, falls into
       
    59  * jungseong (vowels) class.
       
    60  */
       
    61 bool
       
    62 hangul_is_jungseong(ucschar c)
       
    63 {
       
    64     return c >= 0x1161 && c <= 0x11a2;
       
    65 }
       
    66 
       
    67 /**
       
    68  * @brief check for a jongseong
       
    69  * @param c ucs4 code value
       
    70  * @return true if the character c falls into jongseong class
       
    71  *
       
    72  * This function check whether c, which must have ucs4 value, falls into
       
    73  * jongseong (trailing consonants) class.
       
    74  */
       
    75 bool
       
    76 hangul_is_jongseong(ucschar c)
       
    77 {
       
    78     return c >= 0x11a8 && c <= 0x11f9;
       
    79 }
       
    80 
       
    81 bool
       
    82 hangul_is_combining_mark(ucschar c)
       
    83 {
       
    84     return  c == 0x302e || c == 0x302f  ||
       
    85 	   (c >= 0x0300 && c <= 0x036F) ||
       
    86 	   (c >= 0x1dc0 && c <= 0x1dff) ||
       
    87 	   (c >= 0xfe20 && c <= 0xfe2f);
       
    88 }
       
    89 
       
    90 bool
       
    91 hangul_is_choseong_conjoinable(ucschar c)
       
    92 {
       
    93     return c >= 0x1100 && c <= 0x1112;
       
    94 }
       
    95 
       
    96 bool
       
    97 hangul_is_jungseong_conjoinable(ucschar c)
       
    98 {
       
    99     return c >= 0x1161 && c <= 0x1175;
       
   100 }
       
   101 
       
   102 bool
       
   103 hangul_is_jongseong_conjoinable(ucschar c)
       
   104 {
       
   105     return c >= 0x11a7 && c <= 0x11c2;
       
   106 }
       
   107 
       
   108 /**
       
   109  * @brief check for a syllable
       
   110  * @param c ucs4 code value
       
   111  * @return true if the character c falls into syllable class
       
   112  *
       
   113  * This function check whether c, which must have ucs4 value, falls into
       
   114  * syllable class; that is from U+AC00 to 0xD7A3.
       
   115  */
       
   116 bool
       
   117 hangul_is_syllable(ucschar c)
       
   118 {
       
   119     return c >= 0xac00 && c <= 0xd7a3;
       
   120 }
       
   121 
       
   122 /**
       
   123  * @brief check for a jaso
       
   124  * @param c ucs4 code value
       
   125  * @return true if the character c falls into jaso class
       
   126  *
       
   127  * This function check whether c, which must have ucs4 value, falls into
       
   128  * jaso class; that is choseong, jungseong or jongseong.
       
   129  */
       
   130 bool
       
   131 hangul_is_jaso(ucschar c)
       
   132 {
       
   133     return hangul_is_choseong(c) ||
       
   134 	   hangul_is_jungseong(c) ||
       
   135 	   hangul_is_jongseong(c);
       
   136 }
       
   137 
       
   138 /**
       
   139  * @brief check for a compatibility jamo
       
   140  * @param c ucs4 code value
       
   141  * @return true if the character c falls into compatibility class
       
   142  *
       
   143  * This function check whether c, which must have ucs4 value, falls into
       
   144  * compatibility jamo class.
       
   145  */
       
   146 bool
       
   147 hangul_is_jamo(ucschar c)
       
   148 {
       
   149     return c >= 0x3131 && c <= 0x318e;
       
   150 }
       
   151 
       
   152 /**
       
   153  * @brief convert a jaso to the compatibility jamo
       
   154  * @param c ucs4 code value
       
   155  * @return converted value, or c
       
   156  *
       
   157  * This function converts the jaso c, which must have ucs4 value, to
       
   158  * comaptibility jamo or c if the conversion is failed
       
   159  */
       
   160 ucschar
       
   161 hangul_jaso_to_jamo(ucschar c)
       
   162 {
       
   163     static
       
   164 #ifdef __SYMBIAN32__
       
   165     const 
       
   166 #endif
       
   167     ucschar choseong[] = {
       
   168 	0x3131,	    /* 0x1100 */
       
   169 	0x3132,	    /* 0x1101 */
       
   170 	0x3134,	    /* 0x1102 */
       
   171 	0x3137,	    /* 0x1103 */
       
   172 	0x3138,	    /* 0x1104 */
       
   173 	0x3139,	    /* 0x1105 */
       
   174 	0x3141,	    /* 0x1106 */
       
   175 	0x3142,	    /* 0x1107 */
       
   176 	0x3143,	    /* 0x1108 */
       
   177 	0x3145,	    /* 0x1109 */
       
   178 	0x3146,	    /* 0x110a */
       
   179 	0x3147,	    /* 0x110b */
       
   180 	0x3148,	    /* 0x110c */
       
   181 	0x3149,	    /* 0x110d */
       
   182 	0x314a,	    /* 0x110e */
       
   183 	0x314b,	    /* 0x110f */
       
   184 	0x314c,	    /* 0x1110 */
       
   185 	0x314d,	    /* 0x1111 */
       
   186 	0x314e,	    /* 0x1112 */
       
   187     };
       
   188 
       
   189     static 
       
   190 #ifdef __SYMBIAN32__
       
   191     const 
       
   192 #endif
       
   193     ucschar jungseong[] = {
       
   194 	0x314f,	    /* 0x1161 */
       
   195 	0x3150,	    /* 0x1162 */
       
   196 	0x3151,	    /* 0x1163 */
       
   197 	0x3152,	    /* 0x1164 */
       
   198 	0x3153,	    /* 0x1165 */
       
   199 	0x3154,	    /* 0x1166 */
       
   200 	0x3155,	    /* 0x1167 */
       
   201 	0x3156,	    /* 0x1168 */
       
   202 	0x3157,	    /* 0x1169 */
       
   203 	0x3158,	    /* 0x116a */
       
   204 	0x3159,	    /* 0x116b */
       
   205 	0x315a,	    /* 0x116c */
       
   206 	0x315b,	    /* 0x116d */
       
   207 	0x315c,	    /* 0x116e */
       
   208 	0x315d,	    /* 0x116f */
       
   209 	0x315e,	    /* 0x1170 */
       
   210 	0x315f,	    /* 0x1171 */
       
   211 	0x3160,	    /* 0x1172 */
       
   212 	0x3161,	    /* 0x1173 */
       
   213 	0x3162,	    /* 0x1174 */
       
   214 	0x3163	    /* 0x1175 */
       
   215     };
       
   216 
       
   217     static 
       
   218 #ifdef __SYMBIAN32__
       
   219     const 
       
   220 #endif
       
   221     ucschar jongseong[] = {
       
   222 	0x3131,	    /* 0x11a8 */
       
   223 	0x3132,	    /* 0x11a9 */
       
   224 	0x3133,	    /* 0x11aa */
       
   225 	0x3134,	    /* 0x11ab */
       
   226 	0x3135,	    /* 0x11ac */
       
   227 	0x3136,	    /* 0x11ad */
       
   228 	0x3137,	    /* 0x11ae */
       
   229 	0x3139,	    /* 0x11af */
       
   230 	0x313a,	    /* 0x11b0 */
       
   231 	0x313b,	    /* 0x11b1 */
       
   232 	0x313c,	    /* 0x11b2 */
       
   233 	0x313d,	    /* 0x11b3 */
       
   234 	0x313e,	    /* 0x11b4 */
       
   235 	0x313f,	    /* 0x11b5 */
       
   236 	0x3140,	    /* 0x11b6 */
       
   237 	0x3141,	    /* 0x11b7 */
       
   238 	0x3142,	    /* 0x11b8 */
       
   239 	0x3144,	    /* 0x11b9 */
       
   240 	0x3145,	    /* 0x11ba */
       
   241 	0x3146,	    /* 0x11bb */
       
   242 	0x3147,	    /* 0x11bc */
       
   243 	0x3148,	    /* 0x11bd */
       
   244 	0x314a,	    /* 0x11be */
       
   245 	0x314b,	    /* 0x11bf */
       
   246 	0x314c,	    /* 0x11c0 */
       
   247 	0x314d,	    /* 0x11c1 */
       
   248 	0x314e	    /* 0x11c2 */
       
   249     };
       
   250 
       
   251     if (c >= 0x1100 && c <= 0x1112) {
       
   252 	return choseong[c - 0x1100];
       
   253     } else if (c >= 0x1161 && c <= 0x1175) {
       
   254 	return jungseong[c - 0x1161];
       
   255     } else if (c >= 0x11a8 && c <= 0x11c2) {
       
   256 	return jongseong[c - 0x11a8];
       
   257     }
       
   258 
       
   259     return c;
       
   260 }
       
   261 
       
   262 ucschar
       
   263 hangul_choseong_to_jongseong(ucschar c)
       
   264 {
       
   265     static 
       
   266 #ifdef __SYMBIAN32__
       
   267     const 
       
   268 #endif
       
   269     ucschar table[] = {
       
   270 	0x11a8,  /* choseong kiyeok      -> jongseong kiyeok      */
       
   271 	0x11a9,  /* choseong ssangkiyeok -> jongseong ssangkiyeok */
       
   272 	0x11ab,  /* choseong nieun       -> jongseong nieun       */
       
   273 	0x11ae,  /* choseong tikeut      -> jongseong tikeut      */
       
   274 	0x0,     /* choseong ssangtikeut -> jongseong tikeut      */
       
   275 	0x11af,  /* choseong rieul       -> jongseong rieul       */
       
   276 	0x11b7,  /* choseong mieum       -> jongseong mieum       */
       
   277 	0x11b8,  /* choseong pieup       -> jongseong pieup       */
       
   278 	0x0,     /* choseong ssangpieup  -> jongseong pieup       */
       
   279 	0x11ba,  /* choseong sios        -> jongseong sios        */
       
   280 	0x11bb,  /* choseong ssangsios   -> jongseong ssangsios   */
       
   281 	0x11bc,  /* choseong ieung       -> jongseong ieung       */
       
   282 	0x11bd,  /* choseong cieuc       -> jongseong cieuc       */
       
   283 	0x0,     /* choseong ssangcieuc  -> jongseong cieuc       */
       
   284 	0x11be,  /* choseong chieuch     -> jongseong chieuch     */
       
   285 	0x11bf,  /* choseong khieukh     -> jongseong khieukh     */
       
   286 	0x11c0,  /* choseong thieuth     -> jongseong thieuth     */
       
   287 	0x11c1,  /* choseong phieuph     -> jongseong phieuph     */
       
   288 	0x11c2   /* choseong hieuh       -> jongseong hieuh       */
       
   289     };
       
   290     if (c < 0x1100 || c > 0x1112)
       
   291 	return 0;
       
   292     return table[c - 0x1100];
       
   293 }
       
   294 
       
   295 ucschar
       
   296 hangul_jongseong_to_choseong(ucschar c)
       
   297 {
       
   298     static 
       
   299 #ifdef __SYMBIAN32__
       
   300     const 
       
   301 #endif
       
   302     ucschar table[] = {
       
   303       0x1100,  /* jongseong kiyeok        -> choseong kiyeok       */
       
   304       0x1101,  /* jongseong ssangkiyeok   -> choseong ssangkiyeok  */
       
   305       0x1109,  /* jongseong kiyeok-sios   -> choseong sios         */
       
   306       0x1102,  /* jongseong nieun         -> choseong nieun        */
       
   307       0x110c,  /* jongseong nieun-cieuc   -> choseong cieuc        */
       
   308       0x1112,  /* jongseong nieun-hieuh   -> choseong hieuh        */
       
   309       0x1103,  /* jongseong tikeut        -> choseong tikeut       */
       
   310       0x1105,  /* jongseong rieul         -> choseong rieul        */
       
   311       0x1100,  /* jongseong rieul-kiyeok  -> choseong kiyeok       */
       
   312       0x1106,  /* jongseong rieul-mieum   -> choseong mieum        */
       
   313       0x1107,  /* jongseong rieul-pieup   -> choseong pieup        */
       
   314       0x1109,  /* jongseong rieul-sios    -> choseong sios         */
       
   315       0x1110,  /* jongseong rieul-thieuth -> choseong thieuth      */
       
   316       0x1111,  /* jongseong rieul-phieuph -> choseong phieuph      */
       
   317       0x1112,  /* jongseong rieul-hieuh   -> choseong hieuh        */
       
   318       0x1106,  /* jongseong mieum         -> choseong mieum        */
       
   319       0x1107,  /* jongseong pieup         -> choseong pieup        */
       
   320       0x1109,  /* jongseong pieup-sios    -> choseong sios         */
       
   321       0x1109,  /* jongseong sios          -> choseong sios         */
       
   322       0x110a,  /* jongseong ssangsios     -> choseong ssangsios    */
       
   323       0x110b,  /* jongseong ieung         -> choseong ieung        */
       
   324       0x110c,  /* jongseong cieuc         -> choseong cieuc        */
       
   325       0x110e,  /* jongseong chieuch       -> choseong chieuch      */
       
   326       0x110f,  /* jongseong khieukh       -> choseong khieukh      */
       
   327       0x1110,  /* jongseong thieuth       -> choseong thieuth      */
       
   328       0x1111,  /* jongseong phieuph       -> choseong phieuph      */
       
   329       0x1112   /* jongseong hieuh         -> choseong hieuh        */
       
   330     };
       
   331     if (c < 0x11a8 || c > 0x11c2)
       
   332 	return 0;
       
   333     return table[c - 0x11a8];
       
   334 }
       
   335 
       
   336 void
       
   337 hangul_jongseong_dicompose(ucschar c, ucschar* jong, ucschar* cho)
       
   338 {
       
   339     static 
       
   340 #ifdef __SYMBIAN32__
       
   341     const 
       
   342 #endif
       
   343     ucschar table[][2] = {
       
   344     { 0,      0x1100 }, /* jong kiyeok	      = cho  kiyeok               */
       
   345     { 0x11a8, 0x1100 }, /* jong ssangkiyeok   = jong kiyeok + cho kiyeok  */
       
   346     { 0x11a8, 0x1109 }, /* jong kiyeok-sios   = jong kiyeok + cho sios    */
       
   347     { 0,      0x1102 }, /* jong nieun	      = cho  nieun                */
       
   348     { 0x11ab, 0x110c }, /* jong nieun-cieuc   = jong nieun  + cho cieuc   */
       
   349     { 0x11ab, 0x1112 }, /* jong nieun-hieuh   = jong nieun  + cho hieuh   */
       
   350     { 0,      0x1103 }, /* jong tikeut	      = cho  tikeut               */
       
   351     { 0,      0x1105 }, /* jong rieul         = cho  rieul                */
       
   352     { 0x11af, 0x1100 }, /* jong rieul-kiyeok  = jong rieul  + cho kiyeok  */
       
   353     { 0x11af, 0x1106 }, /* jong rieul-mieum   = jong rieul  + cho mieum   */
       
   354     { 0x11af, 0x1107 }, /* jong rieul-pieup   = jong rieul  + cho pieup   */
       
   355     { 0x11af, 0x1109 }, /* jong rieul-sios    = jong rieul  + cho sios    */
       
   356     { 0x11af, 0x1110 }, /* jong rieul-thieuth = jong rieul  + cho thieuth */
       
   357     { 0x11af, 0x1111 }, /* jong rieul-phieuph = jong rieul  + cho phieuph */
       
   358     { 0x11af, 0x1112 }, /* jong rieul-hieuh   = jong rieul  + cho hieuh   */
       
   359     { 0,      0x1106 }, /* jong mieum         = cho  mieum                */
       
   360     { 0,      0x1107 }, /* jong pieup         = cho  pieup                */
       
   361     { 0x11b8, 0x1109 }, /* jong pieup-sios    = jong pieup  + cho sios    */
       
   362     { 0,      0x1109 }, /* jong sios          = cho  sios                 */
       
   363     { 0x11ba, 0x1109 }, /* jong ssangsios     = jong sios   + cho sios    */
       
   364     { 0,      0x110b }, /* jong ieung         = cho  ieung                */
       
   365     { 0,      0x110c }, /* jong cieuc         = cho  cieuc                */
       
   366     { 0,      0x110e }, /* jong chieuch       = cho  chieuch              */
       
   367     { 0,      0x110f }, /* jong khieukh       = cho  khieukh              */
       
   368     { 0,      0x1110 }, /* jong thieuth       = cho  thieuth              */
       
   369     { 0,      0x1111 }, /* jong phieuph       = cho  phieuph              */
       
   370     { 0,      0x1112 }  /* jong hieuh         = cho  hieuh                */
       
   371     };
       
   372 
       
   373     *jong = table[c - 0x11a8][0];
       
   374     *cho  = table[c - 0x11a8][1];
       
   375 }
       
   376 
       
   377 /**
       
   378  * @brief compose a hangul syllable
       
   379  * @param choseong UCS4 code value
       
   380  * @param jungseong UCS4 code value
       
   381  * @param jongseong UCS4 code value
       
   382  * @return syllable code compose from choseong, jungseong and jongseong
       
   383  *
       
   384  * This function compose hangul jaso choseong, jungseong and jongseong and
       
   385  * return the syllable code.
       
   386  */
       
   387 ucschar
       
   388 hangul_jaso_to_syllable(ucschar choseong, ucschar jungseong, ucschar jongseong)
       
   389 {
       
   390     ucschar c;
       
   391 
       
   392     /* we use 0x11a7 like a Jongseong filler */
       
   393     if (jongseong == 0)
       
   394 	jongseong = 0x11a7;         /* Jongseong filler */
       
   395 
       
   396     if (!hangul_is_choseong_conjoinable(choseong))
       
   397 	return 0;
       
   398     if (!hangul_is_jungseong_conjoinable(jungseong))
       
   399 	return 0;
       
   400     if (!hangul_is_jongseong_conjoinable(jongseong))
       
   401 	return 0;
       
   402 
       
   403     choseong  -= choseong_base;
       
   404     jungseong -= jungseong_base;
       
   405     jongseong -= jongseong_base;
       
   406 
       
   407     c = ((choseong * njungseong) + jungseong) * njongseong + jongseong
       
   408 	+ syllable_base;
       
   409     return c;
       
   410 }
       
   411 
       
   412 void
       
   413 hangul_syllable_to_jaso(ucschar syllable,
       
   414 			ucschar* choseong,
       
   415 			ucschar* jungseong,
       
   416 			ucschar* jongseong)
       
   417 {
       
   418     if (jongseong != NULL)
       
   419 	*jongseong = 0;
       
   420     if (jungseong != NULL)
       
   421 	*jungseong = 0;
       
   422     if (choseong != NULL)
       
   423 	*choseong = 0;
       
   424 
       
   425     if (!hangul_is_syllable(syllable))
       
   426 	return;
       
   427 
       
   428     syllable -= syllable_base;
       
   429     if (jongseong != NULL) {
       
   430 	if (syllable % njongseong != 0)
       
   431 	    *jongseong = jongseong_base + syllable % njongseong;
       
   432     }
       
   433     syllable /= njongseong;
       
   434 
       
   435     if (jungseong != NULL) {
       
   436 	*jungseong = jungseong_base + syllable % njungseong;
       
   437     }
       
   438     syllable /= njungseong;
       
   439 
       
   440     if (choseong != NULL) {
       
   441 	*choseong = choseong_base + syllable;
       
   442     }
       
   443 }
       
   444 
       
   445 static
       
   446 #ifndef __SYMBIAN32__
       
   447 inline
       
   448 #endif
       
   449 bool 
       
   450 is_syllable_boundary(ucschar prev, ucschar next)
       
   451 {
       
   452     if (hangul_is_choseong(prev)) {
       
   453 	if (hangul_is_choseong(next))
       
   454 	    return false;
       
   455 	if (hangul_is_jungseong(next))
       
   456 	    return false;
       
   457 	if (hangul_is_syllable(next))
       
   458 	    return false;
       
   459 	if (hangul_is_combining_mark(next))
       
   460 	    return false;
       
   461 	if (next == HANGUL_JUNGSEONG_FILLER)
       
   462 	    return false;
       
   463     } else if (prev == HANGUL_CHOSEONG_FILLER) {
       
   464 	if (hangul_is_jungseong(next))
       
   465 	    return false;
       
   466 	if (next == HANGUL_JUNGSEONG_FILLER)
       
   467 	    return false;
       
   468     } else if (hangul_is_jungseong(prev)) {
       
   469 	if (hangul_is_jungseong(next))
       
   470 	    return false;
       
   471 	if (hangul_is_jongseong(next))
       
   472 	    return false;
       
   473 	if (hangul_is_combining_mark(next))
       
   474 	    return false;
       
   475     } else if (prev == HANGUL_JUNGSEONG_FILLER) {
       
   476 	if (hangul_is_jongseong(next))
       
   477 	    return false;
       
   478     } else if (hangul_is_jongseong(prev)) {
       
   479 	if (hangul_is_jongseong(next))
       
   480 	    return false;
       
   481 	if (hangul_is_combining_mark(next))
       
   482 	    return false;
       
   483     } else if (hangul_is_syllable(prev)) {
       
   484 	if ((prev - syllable_base) % njongseong == 0) {
       
   485 	    // 醫낆꽦�씠 �뾾�뒗 �쓬�젅: LV
       
   486 	    if (hangul_is_jungseong(next))
       
   487 		return false;
       
   488 	    if (hangul_is_jongseong(next))
       
   489 		return false;
       
   490 	} else {
       
   491 	    // 醫낆꽦�씠 �엳�뒗 �쓬�젅: LVT
       
   492 	    if (hangul_is_jongseong(next))
       
   493 		return false;
       
   494 	}
       
   495 	if (hangul_is_combining_mark(next))
       
   496 	    return false;
       
   497     }
       
   498     
       
   499     return true;
       
   500 }
       
   501 
       
   502 static 
       
   503 #ifndef __SYMBIAN32__
       
   504 inline 
       
   505 #endif
       
   506 ucschar
       
   507 choseong_compress(ucschar a, ucschar b)
       
   508 {
       
   509     if (a == 0)
       
   510 	return b;
       
   511 
       
   512     if (a == 0x1100 && b == 0x1100)
       
   513 	return 0x1101;
       
   514     if (a == 0x1103 && b == 0x1103)
       
   515 	return 0x1104;
       
   516     if (a == 0x1107 && b == 0x1107)
       
   517 	return 0x1108;
       
   518     if (a == 0x1109 && b == 0x1109)
       
   519 	return 0x110A;
       
   520     if (a == 0x110c && b == 0x110c)
       
   521 	return 0x110d;
       
   522     return 0;
       
   523 }
       
   524 
       
   525 static 
       
   526 #ifndef __SYMBIAN32__
       
   527 inline 
       
   528 #endif
       
   529 ucschar
       
   530 jungseong_compress(ucschar a, ucschar b)
       
   531 {
       
   532     if (a == 0)
       
   533 	return b;
       
   534 
       
   535     if (a == 0x1169) {
       
   536 	if (b == 0x1161)
       
   537 	    return 0x116a;
       
   538 	if (b == 0x1162)
       
   539 	    return 0x116b;
       
   540 	if (b == 0x1175)
       
   541 	    return 0x116c;
       
   542     }
       
   543     if (a == 0x116e) {
       
   544 	if (b == 0x1165)
       
   545 	    return 0x116f;
       
   546 	if (b == 0x1166)
       
   547 	    return 0x1170;
       
   548 	if (b == 0x1175)
       
   549 	    return 0x1171;
       
   550     }
       
   551     if (b == 0x1175) {
       
   552 	if (a == 0x1173)
       
   553 	    return 0x1174;
       
   554 	if (a == 0x1161)
       
   555 	    return 0x1162;
       
   556 	if (a == 0x1163)
       
   557 	    return 0x1164;
       
   558 	if (a == 0x1165)
       
   559 	    return 0x1166;
       
   560 	if (a == 0x1167)
       
   561 	    return 0x1168;
       
   562     }
       
   563 
       
   564     return 0;
       
   565 }
       
   566 
       
   567 static 
       
   568 #ifndef __SYMBIAN32__
       
   569 inline 
       
   570 #endif
       
   571 ucschar
       
   572 jongseong_compress(ucschar a, ucschar b)
       
   573 {
       
   574     if (a == 0)
       
   575 	return b;
       
   576     
       
   577     if (a == 0x11a8) {
       
   578 	if (b == 0x11a8)
       
   579 	    return 0x11a9;
       
   580 	if (b == 0x11ba)
       
   581 	    return 0x11aa;
       
   582     }
       
   583     if (a == 0x11ab) {
       
   584 	if (b == 0x11b0)
       
   585 	    return 0x11ab;
       
   586 	if (b == 0x11c2)
       
   587 	    return 0x11ad;
       
   588     }
       
   589     if (a == 0x11af) {
       
   590 	if (b == 0x11a8)
       
   591 	    return 0x11b0;
       
   592 	if (b == 0x11b7)
       
   593 	    return 0x11b1;
       
   594 	if (b == 0x11b8)
       
   595 	    return 0x11b2;
       
   596 	if (b == 0x11ba)
       
   597 	    return 0x11b3;
       
   598 	if (b == 0x11c0)
       
   599 	    return 0x11b4;
       
   600 	if (b == 0x11c1)
       
   601 	    return 0x11b5;
       
   602 	if (b == 0x11c2)
       
   603 	    return 0x11b6;
       
   604     }
       
   605     if (a == 0x11b8 && b == 0x11ba)
       
   606 	return 0x11b9;
       
   607     if (a == 0x11ba && b == 0x11ba)
       
   608 	return 0x11bb;
       
   609 
       
   610     return 0;
       
   611 }
       
   612 
       
   613 static 
       
   614 #ifndef __SYMBIAN32__
       
   615 inline 
       
   616 #endif
       
   617 ucschar
       
   618 build_syllable(const ucschar* str, size_t len)
       
   619 {
       
   620     int i;
       
   621     ucschar cho = 0, jung = 0, jong = 0;
       
   622 
       
   623     i = 0;
       
   624     while (i < len && hangul_is_choseong_conjoinable(str[i])) {
       
   625 	cho = choseong_compress(cho, str[i]);
       
   626 	if (cho == 0)
       
   627 	    return 0;
       
   628 	i++;
       
   629     }
       
   630 
       
   631     while (i < len && hangul_is_jungseong_conjoinable(str[i])) {
       
   632 	jung = jungseong_compress(jung, str[i]);
       
   633 	if (jung == 0)
       
   634 	    return 0;
       
   635 	i++;
       
   636     }
       
   637 
       
   638     while (i < len && hangul_is_jongseong_conjoinable(str[i])) {
       
   639 	jong = jongseong_compress(jong, str[i]);
       
   640 	if (jong == 0)
       
   641 	    return 0;
       
   642 	i++;
       
   643     }
       
   644 
       
   645     if (i < len)
       
   646 	return 0;
       
   647 
       
   648     return hangul_jaso_to_syllable(cho, jung, jong);
       
   649 }
       
   650 
       
   651 /**
       
   652  * @brief �븳 �쓬�젅�뿉 �빐�떦�븯�뒗 肄붾뱶�쓽 媛��닔瑜� 援ы븳�떎
       
   653  * @param str �쓬�젅�쓽 湲몄씠瑜� 援ы븷 �뒪�듃留�
       
   654  * @param max_len @a str �뿉�꽌 �씫�쓣 湲몄씠�쓽 �젣�븳媛�
       
   655  * @return �븳 �쓬�젅�뿉 �빐�떦�븯�뒗 肄붾뱶�쓽 媛��닔
       
   656  *
       
   657  * �씠 �븿�닔�뒗 @a str �뿉�꽌 �븳 �쓬�젅�뿉 �빐�떦�븯�뒗 肄붾뱶�쓽 媛��닔瑜� 援ы븳�떎. 
       
   658  * �븳 �쓬�젅�뿉 �빐�떦�븯�뒗 肄붾뱶�쓽 媛��닔媛� @a max_len 蹂대떎 留롫떎硫� @a max_len �쓣 
       
   659  * 諛섑솚�븳�떎. �븳 �쓬�젅�씠�씪怨� �뙋�떒�븯�뒗 湲곗����� L*V*T+ �뙣�꽩�뿉 �뵲瑜몃떎. �씠 �뙣�꽩���
       
   660  * regular expression�쓽 而⑤깽�뀡�쓣 �뵲瑜� 寃껋쑝濡�, 1媛� �씠�긽�쓽 珥덉꽦怨� 以묒꽦, 0媛�
       
   661  * �씠�긽�쓽 醫낆꽦�씠 紐⑥씤 �옄紐� �뒪�듃留곸쓣 �븳 �쓬�젅濡� �씤�떇�븳�떎�뒗 �쑜�씠�떎. �삁瑜� �뱾硫�
       
   662  * �떎�쓬怨� 媛숈�� �옄紐� �뒪�듃留곷룄 �븳 �쓬�젅濡� �씤�떇�븳�떎.
       
   663  *
       
   664  *  �삁) "�뀆 �뀆 �뀥 �뀛 �꽮 �꽦" -> "���"
       
   665  * 
       
   666  * �뵲�씪�꽌 �쐞 寃쎌슦�뿉�뒗 6�쓣 諛섑솚�븯寃� �맂�떎. 
       
   667  *
       
   668  * �씪諛섏쟻�쑝濡쒕뒗 諛⑹젏(U+302E, U+302F)源뚯�� �븳 �쓬�젅濡� �씤�떇�븯寃좎��留�, �씠 �븿�닔�뒗
       
   669  * �쓬�젅怨� �옄紐④컙 蹂��솚�쓣 �렪由ы븯寃� �븯湲� �쐞�빐 援ы쁽�맂 寃껋쑝濡� 諛⑹젏��� �떎瑜� �쓬�젅濡� 
       
   670  * �씤�떇�븳�떎.
       
   671  *
       
   672  * @a str �씠 �옄紐� 肄붾뱶�뿉 �빐�떦�븯吏� �븡�뒗 寃쎌슦�뿉�뒗 1�쓣 諛섑솚�븳�떎.
       
   673  *
       
   674  * �씠 �븿�닔�뒗 �옄紐� �뒪�듃留곸뿉�꽌 珥� �쓬�젅�쓽 媛��닔瑜� 援ы븯�뒗 �븿�닔媛� �븘�떂�뿉 二쇱쓽�븳�떎.
       
   675  */
       
   676 int
       
   677 hangul_syllable_len(const ucschar* str, int max_len)
       
   678 {
       
   679     int i = 0;
       
   680 
       
   681     if (max_len == 0)
       
   682 	return 0;
       
   683 
       
   684     if (str[i] != 0) {
       
   685 	for (i = 1; i < max_len; i++) {
       
   686 	    if (str[i] == 0)
       
   687 		break;
       
   688 
       
   689 	    if (is_syllable_boundary(str[i - 1], str[i]))
       
   690 		break;
       
   691 	}
       
   692     }
       
   693 
       
   694     return i;
       
   695 }
       
   696 
       
   697 /**
       
   698  * @brief @a iter瑜� 湲곗���쑝濡� �씠�쟾 �쓬�젅�쓽 泥レ옄紐� 湲��옄�뿉 ����븳 �룷�씤�꽣瑜� 援ы븳�떎.
       
   699  * @param iter �쁽�옱 �쐞移�
       
   700  * @param begin �뒪�듃留곸쓽 �떆�옉�쐞移�, �룷�씤�꽣媛� �씠�룞�븷 �븳怨꾧컪
       
   701  * @return �씠�쟾 �쓬�젅�쓽 泥ル쾲吏� �옄紐⑥뿉 ����븳 �룷�씤�꽣
       
   702  *
       
   703  * �씠 �븿�닔�뒗 @a iter濡� 二쇱뼱吏� �옄紐� �뒪�듃留곸쓽 �룷�씤�꽣瑜� 湲곗���쑝濡� �씠�쟾 �쓬�젅�쓽 
       
   704  * 泥ル쾲吏� �옄紐⑥뿉 ����븳 �룷�씤�꽣瑜� 由ы꽩�븳�떎. �쓬�젅�쓣 李얘린�쐞�빐�꽌 begin蹂대떎 
       
   705  * �븵履쎌쑝濡� �씠�룞�븯吏� �븡�뒗�떎. 
       
   706  *
       
   707  * �븳 �쓬�젅�씠�씪怨� �뙋�떒�븯�뒗 湲곗����� L*V*T+M? �뙣�꽩�뿉 �뵲瑜몃떎.
       
   708  */
       
   709 const ucschar*
       
   710 hangul_syllable_iterator_prev(const ucschar* iter, const ucschar* begin)
       
   711 {
       
   712     if (iter > begin)
       
   713 	iter--;
       
   714 
       
   715     while (iter > begin) {
       
   716 	ucschar prev = iter[-1];
       
   717 	ucschar curr = iter[0];
       
   718 	if (is_syllable_boundary(prev, curr))
       
   719 	    break;
       
   720 	iter--;
       
   721     }
       
   722 
       
   723     return iter;
       
   724 }
       
   725 
       
   726 /**
       
   727  * @brief @a iter瑜� 湲곗���쑝濡� �떎�쓬 �쓬�젅�쓽 泥レ옄紐� 湲��옄�뿉 ����븳 �룷�씤�꽣瑜� 援ы븳�떎.
       
   728  * @param iter �쁽�옱 �쐞移�
       
   729  * @param end �뒪�듃留곸쓽 �걹�쐞移�, �룷�씤�꽣媛� �씠�룞�븷 �븳怨꾧컪
       
   730  * @return �떎�쓬 �쓬�젅�쓽 泥ル쾲吏� �옄紐⑥뿉 ����븳 �룷�씤�꽣
       
   731  *
       
   732  * �씠 �븿�닔�뒗 @a iter濡� 二쇱뼱吏� �옄紐� �뒪�듃留곸쓽 �룷�씤�꽣瑜� 湲곗���쑝濡� �떎�쓬 �쓬�젅�쓽 
       
   733  * 泥ル쾲吏� �옄紐⑥뿉 ����븳 �룷�씤�꽣瑜� 由ы꽩�븳�떎. �쓬�젅�쓣 李얘린�쐞�빐�꽌 end瑜� �꽆�뼱
       
   734  * �씠�룞�븯吏� �븡�뒗�떎. 
       
   735  *
       
   736  * �븳 �쓬�젅�씠�씪怨� �뙋�떒�븯�뒗 湲곗����� L*V*T+M? �뙣�꽩�뿉 �뵲瑜몃떎.
       
   737  */
       
   738 const ucschar*
       
   739 hangul_syllable_iterator_next(const ucschar* iter, const ucschar* end)
       
   740 {
       
   741     if (iter < end)
       
   742 	iter++;
       
   743 
       
   744     while (iter < end) {
       
   745 	ucschar prev = iter[-1];
       
   746 	ucschar curr = iter[0];
       
   747 	if (is_syllable_boundary(prev, curr))
       
   748 	    break;
       
   749 	iter++;
       
   750     }
       
   751 
       
   752     return iter;
       
   753 }
       
   754 
       
   755 /**
       
   756  * @brief �옄紐� �뒪�듃留곸쓣 �쓬�젅 �뒪�듃留곸쓣 蹂��솚�븳�떎
       
   757  * @param dest �쓬�젅�삎�쑝濡� 蹂��솚�맂 寃곌낵媛� ����옣�맆 踰꾪띁
       
   758  * @param destlen 寃곌낵瑜� ����옣�븷 踰꾪띁�쓽 湲몄씠(ucschar 肄붾뱶 �떒�쐞)
       
   759  * @param src 蹂��솚�븷 �옄紐� �뒪�듃留�
       
   760  * @param srclen 蹂��솚�븷 �옄紐� �뒪�듃留곸쓽 湲몄씠(ucschar 肄붾뱶 �떒�쐞)
       
   761  * @return @a destlen �뿉 ����옣�븳 肄붾뱶�쓽 媛��닔
       
   762  *
       
   763  * �씠 �븿�닔�뒗 L+V+T*M? �뙣�꽩�뿉 �뵲�씪 �옄紐� �뒪�듃留� 蹂��솚�쓣 �떆�룄�븳�떎. �븳 �쓬�젅�쓣 
       
   764  * �뙋�떒�븯�뒗 湲곗����� @ref hangul_syllable_len �쓣 李몄“�븳�떎.
       
   765  * 留뚯씪 @a src 媛� �쟻�젅�븳 �쓬�젅�삎�깭濡� 蹂��솚�씠 遺덇���뒫�븳 寃쎌슦�뿉�뒗 �옄紐� �뒪�듃留곸씠
       
   766  * 洹몃��濡� 蹂듭궗�맂�떎.
       
   767  *
       
   768  * �씠 �븿�닔�뒗 �옄紐� �뒪�듃留� @a src 瑜� �쓬�젅�삎�쑝濡� 蹂��솚�븯�뿬 @a dest �뿉 ����옣�븳�떎.
       
   769  * @a srclen �뿉 吏��젙�맂 媛��닔留뚰겮 �씫怨�, @a destlen �뿉 吏��젙�맂 湲몄씠 �씠�긽 �벐吏�
       
   770  * �븡�뒗�떎.  @a srclen �씠 -1�씠�씪硫� @a src �뒗 0�쑝濡� �걹�굹�뒗 �뒪�듃留곸쑝濡� 媛��젙�븯怨�
       
   771  * 0�쓣 �젣�쇅�븳 湲몄씠源뚯�� 蹂��솚�쓣 �떆�룄�븳�떎. �뵲�씪�꽌 蹂��솚�맂 寃곌낵 �뒪�듃留곸�� 0�쑝濡� 
       
   772  * �걹�굹吏� �븡�뒗�떎. 留뚯씪 0�쑝濡� �걹�굹�뒗 �뒪�듃留곸쓣 留뚮뱾怨� �떢�떎硫� �떎�쓬怨� 媛숈씠 �븳�떎.
       
   773  *
       
   774  * @code
       
   775  * int n = hangul_jamos_to_syllables(dest, destlen, src, srclen);
       
   776  * dest[n] = 0;
       
   777  * @endcode
       
   778  */
       
   779 int
       
   780 hangul_jamos_to_syllables(ucschar* dest, int destlen, const ucschar* src, int srclen)
       
   781 {
       
   782     ucschar* d;
       
   783     const ucschar* s;
       
   784 
       
   785     int inleft;
       
   786     int outleft;
       
   787     int n;
       
   788 
       
   789     if (srclen < 0) {
       
   790 	s = src;
       
   791 	while (*s != 0)
       
   792 	    s++;
       
   793 	srclen = s - src;
       
   794     }
       
   795 
       
   796     s = src;
       
   797     d = dest;
       
   798     inleft = srclen;
       
   799     outleft = destlen;
       
   800 
       
   801     n = hangul_syllable_len(s, inleft);
       
   802     while (n > 0 && inleft > 0 && outleft > 0) {
       
   803 	ucschar c = build_syllable(s, n);
       
   804 	if (c != 0) {
       
   805 	    *d = c;
       
   806 	    d++;
       
   807 	    outleft--;
       
   808 	} else {
       
   809 	    int i;
       
   810 	    for (i = 0; i < n && i < outleft; i++) {
       
   811 		d[i] = s[i];
       
   812 	    }
       
   813 	    d += i;
       
   814 	    outleft -= i;
       
   815 	}
       
   816 
       
   817 	s += n;
       
   818 	inleft -= n;
       
   819 	n = hangul_syllable_len(s, inleft);
       
   820     }
       
   821 
       
   822     return destlen - outleft;
       
   823 }