|
1 /** |
|
2 * @file hangulctype.c |
|
3 * @brief hangulctype source file |
|
4 */ |
|
5 |
|
6 /* libhangul |
|
7 * Copyright (c) 2005,2006 Choe Hwanjin |
|
8 * All rights reserved. |
|
9 * This library is free software; you can redistribute it and/or |
|
10 * modify it under the terms of the GNU Lesser General Public |
|
11 * License as published by the Free Software Foundation; either |
|
12 * version 2.1 of the License, or (at your option) any later version. |
|
13 * |
|
14 * This library is distributed in the hope that it will be useful, |
|
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
17 * Lesser General Public License for more details. |
|
18 * |
|
19 * You should have received a copy of the GNU Lesser General Public |
|
20 * License along with this library; if not, write to the Free Software |
|
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
|
22 */ |
|
23 |
|
24 #ifdef HAVE_CONFIG_H |
|
25 #include <config.h> |
|
26 #endif |
|
27 |
|
28 #include <stdlib.h> |
|
29 |
|
30 #include "hangul.h" |
|
31 |
|
32 static const ucschar syllable_base = 0xac00; |
|
33 static const ucschar choseong_base = 0x1100; |
|
34 static const ucschar jungseong_base = 0x1161; |
|
35 static const ucschar jongseong_base = 0x11a7; |
|
36 static const int njungseong = 21; |
|
37 static const int njongseong = 28; |
|
38 |
|
39 /** |
|
40 * @brief check for a choseong |
|
41 * @param c ucs4 code value |
|
42 * @return true if the character c falls into choseong class |
|
43 * |
|
44 * This function check whether c, which must have ucs4 value, falls into |
|
45 * choseong (leading consonants) class. |
|
46 */ |
|
47 bool |
|
48 hangul_is_choseong(ucschar c) |
|
49 { |
|
50 return c >= 0x1100 && c <= 0x1159; |
|
51 } |
|
52 |
|
53 /** |
|
54 * @brief check for a jungseong |
|
55 * @param c ucs4 code value |
|
56 * @return true if the character c falls into jungseong class |
|
57 * |
|
58 * This function check whether c, which must have ucs4 value, falls into |
|
59 * jungseong (vowels) class. |
|
60 */ |
|
61 bool |
|
62 hangul_is_jungseong(ucschar c) |
|
63 { |
|
64 return c >= 0x1161 && c <= 0x11a2; |
|
65 } |
|
66 |
|
67 /** |
|
68 * @brief check for a jongseong |
|
69 * @param c ucs4 code value |
|
70 * @return true if the character c falls into jongseong class |
|
71 * |
|
72 * This function check whether c, which must have ucs4 value, falls into |
|
73 * jongseong (trailing consonants) class. |
|
74 */ |
|
75 bool |
|
76 hangul_is_jongseong(ucschar c) |
|
77 { |
|
78 return c >= 0x11a8 && c <= 0x11f9; |
|
79 } |
|
80 |
|
81 bool |
|
82 hangul_is_combining_mark(ucschar c) |
|
83 { |
|
84 return c == 0x302e || c == 0x302f || |
|
85 (c >= 0x0300 && c <= 0x036F) || |
|
86 (c >= 0x1dc0 && c <= 0x1dff) || |
|
87 (c >= 0xfe20 && c <= 0xfe2f); |
|
88 } |
|
89 |
|
90 bool |
|
91 hangul_is_choseong_conjoinable(ucschar c) |
|
92 { |
|
93 return c >= 0x1100 && c <= 0x1112; |
|
94 } |
|
95 |
|
96 bool |
|
97 hangul_is_jungseong_conjoinable(ucschar c) |
|
98 { |
|
99 return c >= 0x1161 && c <= 0x1175; |
|
100 } |
|
101 |
|
102 bool |
|
103 hangul_is_jongseong_conjoinable(ucschar c) |
|
104 { |
|
105 return c >= 0x11a7 && c <= 0x11c2; |
|
106 } |
|
107 |
|
108 /** |
|
109 * @brief check for a syllable |
|
110 * @param c ucs4 code value |
|
111 * @return true if the character c falls into syllable class |
|
112 * |
|
113 * This function check whether c, which must have ucs4 value, falls into |
|
114 * syllable class; that is from U+AC00 to 0xD7A3. |
|
115 */ |
|
116 bool |
|
117 hangul_is_syllable(ucschar c) |
|
118 { |
|
119 return c >= 0xac00 && c <= 0xd7a3; |
|
120 } |
|
121 |
|
122 /** |
|
123 * @brief check for a jaso |
|
124 * @param c ucs4 code value |
|
125 * @return true if the character c falls into jaso class |
|
126 * |
|
127 * This function check whether c, which must have ucs4 value, falls into |
|
128 * jaso class; that is choseong, jungseong or jongseong. |
|
129 */ |
|
130 bool |
|
131 hangul_is_jaso(ucschar c) |
|
132 { |
|
133 return hangul_is_choseong(c) || |
|
134 hangul_is_jungseong(c) || |
|
135 hangul_is_jongseong(c); |
|
136 } |
|
137 |
|
138 /** |
|
139 * @brief check for a compatibility jamo |
|
140 * @param c ucs4 code value |
|
141 * @return true if the character c falls into compatibility class |
|
142 * |
|
143 * This function check whether c, which must have ucs4 value, falls into |
|
144 * compatibility jamo class. |
|
145 */ |
|
146 bool |
|
147 hangul_is_jamo(ucschar c) |
|
148 { |
|
149 return c >= 0x3131 && c <= 0x318e; |
|
150 } |
|
151 |
|
152 /** |
|
153 * @brief convert a jaso to the compatibility jamo |
|
154 * @param c ucs4 code value |
|
155 * @return converted value, or c |
|
156 * |
|
157 * This function converts the jaso c, which must have ucs4 value, to |
|
158 * comaptibility jamo or c if the conversion is failed |
|
159 */ |
|
160 ucschar |
|
161 hangul_jaso_to_jamo(ucschar c) |
|
162 { |
|
163 static |
|
164 #ifdef __SYMBIAN32__ |
|
165 const |
|
166 #endif |
|
167 ucschar choseong[] = { |
|
168 0x3131, /* 0x1100 */ |
|
169 0x3132, /* 0x1101 */ |
|
170 0x3134, /* 0x1102 */ |
|
171 0x3137, /* 0x1103 */ |
|
172 0x3138, /* 0x1104 */ |
|
173 0x3139, /* 0x1105 */ |
|
174 0x3141, /* 0x1106 */ |
|
175 0x3142, /* 0x1107 */ |
|
176 0x3143, /* 0x1108 */ |
|
177 0x3145, /* 0x1109 */ |
|
178 0x3146, /* 0x110a */ |
|
179 0x3147, /* 0x110b */ |
|
180 0x3148, /* 0x110c */ |
|
181 0x3149, /* 0x110d */ |
|
182 0x314a, /* 0x110e */ |
|
183 0x314b, /* 0x110f */ |
|
184 0x314c, /* 0x1110 */ |
|
185 0x314d, /* 0x1111 */ |
|
186 0x314e, /* 0x1112 */ |
|
187 }; |
|
188 |
|
189 static |
|
190 #ifdef __SYMBIAN32__ |
|
191 const |
|
192 #endif |
|
193 ucschar jungseong[] = { |
|
194 0x314f, /* 0x1161 */ |
|
195 0x3150, /* 0x1162 */ |
|
196 0x3151, /* 0x1163 */ |
|
197 0x3152, /* 0x1164 */ |
|
198 0x3153, /* 0x1165 */ |
|
199 0x3154, /* 0x1166 */ |
|
200 0x3155, /* 0x1167 */ |
|
201 0x3156, /* 0x1168 */ |
|
202 0x3157, /* 0x1169 */ |
|
203 0x3158, /* 0x116a */ |
|
204 0x3159, /* 0x116b */ |
|
205 0x315a, /* 0x116c */ |
|
206 0x315b, /* 0x116d */ |
|
207 0x315c, /* 0x116e */ |
|
208 0x315d, /* 0x116f */ |
|
209 0x315e, /* 0x1170 */ |
|
210 0x315f, /* 0x1171 */ |
|
211 0x3160, /* 0x1172 */ |
|
212 0x3161, /* 0x1173 */ |
|
213 0x3162, /* 0x1174 */ |
|
214 0x3163 /* 0x1175 */ |
|
215 }; |
|
216 |
|
217 static |
|
218 #ifdef __SYMBIAN32__ |
|
219 const |
|
220 #endif |
|
221 ucschar jongseong[] = { |
|
222 0x3131, /* 0x11a8 */ |
|
223 0x3132, /* 0x11a9 */ |
|
224 0x3133, /* 0x11aa */ |
|
225 0x3134, /* 0x11ab */ |
|
226 0x3135, /* 0x11ac */ |
|
227 0x3136, /* 0x11ad */ |
|
228 0x3137, /* 0x11ae */ |
|
229 0x3139, /* 0x11af */ |
|
230 0x313a, /* 0x11b0 */ |
|
231 0x313b, /* 0x11b1 */ |
|
232 0x313c, /* 0x11b2 */ |
|
233 0x313d, /* 0x11b3 */ |
|
234 0x313e, /* 0x11b4 */ |
|
235 0x313f, /* 0x11b5 */ |
|
236 0x3140, /* 0x11b6 */ |
|
237 0x3141, /* 0x11b7 */ |
|
238 0x3142, /* 0x11b8 */ |
|
239 0x3144, /* 0x11b9 */ |
|
240 0x3145, /* 0x11ba */ |
|
241 0x3146, /* 0x11bb */ |
|
242 0x3147, /* 0x11bc */ |
|
243 0x3148, /* 0x11bd */ |
|
244 0x314a, /* 0x11be */ |
|
245 0x314b, /* 0x11bf */ |
|
246 0x314c, /* 0x11c0 */ |
|
247 0x314d, /* 0x11c1 */ |
|
248 0x314e /* 0x11c2 */ |
|
249 }; |
|
250 |
|
251 if (c >= 0x1100 && c <= 0x1112) { |
|
252 return choseong[c - 0x1100]; |
|
253 } else if (c >= 0x1161 && c <= 0x1175) { |
|
254 return jungseong[c - 0x1161]; |
|
255 } else if (c >= 0x11a8 && c <= 0x11c2) { |
|
256 return jongseong[c - 0x11a8]; |
|
257 } |
|
258 |
|
259 return c; |
|
260 } |
|
261 |
|
262 ucschar |
|
263 hangul_choseong_to_jongseong(ucschar c) |
|
264 { |
|
265 static |
|
266 #ifdef __SYMBIAN32__ |
|
267 const |
|
268 #endif |
|
269 ucschar table[] = { |
|
270 0x11a8, /* choseong kiyeok -> jongseong kiyeok */ |
|
271 0x11a9, /* choseong ssangkiyeok -> jongseong ssangkiyeok */ |
|
272 0x11ab, /* choseong nieun -> jongseong nieun */ |
|
273 0x11ae, /* choseong tikeut -> jongseong tikeut */ |
|
274 0x0, /* choseong ssangtikeut -> jongseong tikeut */ |
|
275 0x11af, /* choseong rieul -> jongseong rieul */ |
|
276 0x11b7, /* choseong mieum -> jongseong mieum */ |
|
277 0x11b8, /* choseong pieup -> jongseong pieup */ |
|
278 0x0, /* choseong ssangpieup -> jongseong pieup */ |
|
279 0x11ba, /* choseong sios -> jongseong sios */ |
|
280 0x11bb, /* choseong ssangsios -> jongseong ssangsios */ |
|
281 0x11bc, /* choseong ieung -> jongseong ieung */ |
|
282 0x11bd, /* choseong cieuc -> jongseong cieuc */ |
|
283 0x0, /* choseong ssangcieuc -> jongseong cieuc */ |
|
284 0x11be, /* choseong chieuch -> jongseong chieuch */ |
|
285 0x11bf, /* choseong khieukh -> jongseong khieukh */ |
|
286 0x11c0, /* choseong thieuth -> jongseong thieuth */ |
|
287 0x11c1, /* choseong phieuph -> jongseong phieuph */ |
|
288 0x11c2 /* choseong hieuh -> jongseong hieuh */ |
|
289 }; |
|
290 if (c < 0x1100 || c > 0x1112) |
|
291 return 0; |
|
292 return table[c - 0x1100]; |
|
293 } |
|
294 |
|
295 ucschar |
|
296 hangul_jongseong_to_choseong(ucschar c) |
|
297 { |
|
298 static |
|
299 #ifdef __SYMBIAN32__ |
|
300 const |
|
301 #endif |
|
302 ucschar table[] = { |
|
303 0x1100, /* jongseong kiyeok -> choseong kiyeok */ |
|
304 0x1101, /* jongseong ssangkiyeok -> choseong ssangkiyeok */ |
|
305 0x1109, /* jongseong kiyeok-sios -> choseong sios */ |
|
306 0x1102, /* jongseong nieun -> choseong nieun */ |
|
307 0x110c, /* jongseong nieun-cieuc -> choseong cieuc */ |
|
308 0x1112, /* jongseong nieun-hieuh -> choseong hieuh */ |
|
309 0x1103, /* jongseong tikeut -> choseong tikeut */ |
|
310 0x1105, /* jongseong rieul -> choseong rieul */ |
|
311 0x1100, /* jongseong rieul-kiyeok -> choseong kiyeok */ |
|
312 0x1106, /* jongseong rieul-mieum -> choseong mieum */ |
|
313 0x1107, /* jongseong rieul-pieup -> choseong pieup */ |
|
314 0x1109, /* jongseong rieul-sios -> choseong sios */ |
|
315 0x1110, /* jongseong rieul-thieuth -> choseong thieuth */ |
|
316 0x1111, /* jongseong rieul-phieuph -> choseong phieuph */ |
|
317 0x1112, /* jongseong rieul-hieuh -> choseong hieuh */ |
|
318 0x1106, /* jongseong mieum -> choseong mieum */ |
|
319 0x1107, /* jongseong pieup -> choseong pieup */ |
|
320 0x1109, /* jongseong pieup-sios -> choseong sios */ |
|
321 0x1109, /* jongseong sios -> choseong sios */ |
|
322 0x110a, /* jongseong ssangsios -> choseong ssangsios */ |
|
323 0x110b, /* jongseong ieung -> choseong ieung */ |
|
324 0x110c, /* jongseong cieuc -> choseong cieuc */ |
|
325 0x110e, /* jongseong chieuch -> choseong chieuch */ |
|
326 0x110f, /* jongseong khieukh -> choseong khieukh */ |
|
327 0x1110, /* jongseong thieuth -> choseong thieuth */ |
|
328 0x1111, /* jongseong phieuph -> choseong phieuph */ |
|
329 0x1112 /* jongseong hieuh -> choseong hieuh */ |
|
330 }; |
|
331 if (c < 0x11a8 || c > 0x11c2) |
|
332 return 0; |
|
333 return table[c - 0x11a8]; |
|
334 } |
|
335 |
|
336 void |
|
337 hangul_jongseong_dicompose(ucschar c, ucschar* jong, ucschar* cho) |
|
338 { |
|
339 static |
|
340 #ifdef __SYMBIAN32__ |
|
341 const |
|
342 #endif |
|
343 ucschar table[][2] = { |
|
344 { 0, 0x1100 }, /* jong kiyeok = cho kiyeok */ |
|
345 { 0x11a8, 0x1100 }, /* jong ssangkiyeok = jong kiyeok + cho kiyeok */ |
|
346 { 0x11a8, 0x1109 }, /* jong kiyeok-sios = jong kiyeok + cho sios */ |
|
347 { 0, 0x1102 }, /* jong nieun = cho nieun */ |
|
348 { 0x11ab, 0x110c }, /* jong nieun-cieuc = jong nieun + cho cieuc */ |
|
349 { 0x11ab, 0x1112 }, /* jong nieun-hieuh = jong nieun + cho hieuh */ |
|
350 { 0, 0x1103 }, /* jong tikeut = cho tikeut */ |
|
351 { 0, 0x1105 }, /* jong rieul = cho rieul */ |
|
352 { 0x11af, 0x1100 }, /* jong rieul-kiyeok = jong rieul + cho kiyeok */ |
|
353 { 0x11af, 0x1106 }, /* jong rieul-mieum = jong rieul + cho mieum */ |
|
354 { 0x11af, 0x1107 }, /* jong rieul-pieup = jong rieul + cho pieup */ |
|
355 { 0x11af, 0x1109 }, /* jong rieul-sios = jong rieul + cho sios */ |
|
356 { 0x11af, 0x1110 }, /* jong rieul-thieuth = jong rieul + cho thieuth */ |
|
357 { 0x11af, 0x1111 }, /* jong rieul-phieuph = jong rieul + cho phieuph */ |
|
358 { 0x11af, 0x1112 }, /* jong rieul-hieuh = jong rieul + cho hieuh */ |
|
359 { 0, 0x1106 }, /* jong mieum = cho mieum */ |
|
360 { 0, 0x1107 }, /* jong pieup = cho pieup */ |
|
361 { 0x11b8, 0x1109 }, /* jong pieup-sios = jong pieup + cho sios */ |
|
362 { 0, 0x1109 }, /* jong sios = cho sios */ |
|
363 { 0x11ba, 0x1109 }, /* jong ssangsios = jong sios + cho sios */ |
|
364 { 0, 0x110b }, /* jong ieung = cho ieung */ |
|
365 { 0, 0x110c }, /* jong cieuc = cho cieuc */ |
|
366 { 0, 0x110e }, /* jong chieuch = cho chieuch */ |
|
367 { 0, 0x110f }, /* jong khieukh = cho khieukh */ |
|
368 { 0, 0x1110 }, /* jong thieuth = cho thieuth */ |
|
369 { 0, 0x1111 }, /* jong phieuph = cho phieuph */ |
|
370 { 0, 0x1112 } /* jong hieuh = cho hieuh */ |
|
371 }; |
|
372 |
|
373 *jong = table[c - 0x11a8][0]; |
|
374 *cho = table[c - 0x11a8][1]; |
|
375 } |
|
376 |
|
377 /** |
|
378 * @brief compose a hangul syllable |
|
379 * @param choseong UCS4 code value |
|
380 * @param jungseong UCS4 code value |
|
381 * @param jongseong UCS4 code value |
|
382 * @return syllable code compose from choseong, jungseong and jongseong |
|
383 * |
|
384 * This function compose hangul jaso choseong, jungseong and jongseong and |
|
385 * return the syllable code. |
|
386 */ |
|
387 ucschar |
|
388 hangul_jaso_to_syllable(ucschar choseong, ucschar jungseong, ucschar jongseong) |
|
389 { |
|
390 ucschar c; |
|
391 |
|
392 /* we use 0x11a7 like a Jongseong filler */ |
|
393 if (jongseong == 0) |
|
394 jongseong = 0x11a7; /* Jongseong filler */ |
|
395 |
|
396 if (!hangul_is_choseong_conjoinable(choseong)) |
|
397 return 0; |
|
398 if (!hangul_is_jungseong_conjoinable(jungseong)) |
|
399 return 0; |
|
400 if (!hangul_is_jongseong_conjoinable(jongseong)) |
|
401 return 0; |
|
402 |
|
403 choseong -= choseong_base; |
|
404 jungseong -= jungseong_base; |
|
405 jongseong -= jongseong_base; |
|
406 |
|
407 c = ((choseong * njungseong) + jungseong) * njongseong + jongseong |
|
408 + syllable_base; |
|
409 return c; |
|
410 } |
|
411 |
|
412 void |
|
413 hangul_syllable_to_jaso(ucschar syllable, |
|
414 ucschar* choseong, |
|
415 ucschar* jungseong, |
|
416 ucschar* jongseong) |
|
417 { |
|
418 if (jongseong != NULL) |
|
419 *jongseong = 0; |
|
420 if (jungseong != NULL) |
|
421 *jungseong = 0; |
|
422 if (choseong != NULL) |
|
423 *choseong = 0; |
|
424 |
|
425 if (!hangul_is_syllable(syllable)) |
|
426 return; |
|
427 |
|
428 syllable -= syllable_base; |
|
429 if (jongseong != NULL) { |
|
430 if (syllable % njongseong != 0) |
|
431 *jongseong = jongseong_base + syllable % njongseong; |
|
432 } |
|
433 syllable /= njongseong; |
|
434 |
|
435 if (jungseong != NULL) { |
|
436 *jungseong = jungseong_base + syllable % njungseong; |
|
437 } |
|
438 syllable /= njungseong; |
|
439 |
|
440 if (choseong != NULL) { |
|
441 *choseong = choseong_base + syllable; |
|
442 } |
|
443 } |
|
444 |
|
445 static |
|
446 #ifndef __SYMBIAN32__ |
|
447 inline |
|
448 #endif |
|
449 bool |
|
450 is_syllable_boundary(ucschar prev, ucschar next) |
|
451 { |
|
452 if (hangul_is_choseong(prev)) { |
|
453 if (hangul_is_choseong(next)) |
|
454 return false; |
|
455 if (hangul_is_jungseong(next)) |
|
456 return false; |
|
457 if (hangul_is_syllable(next)) |
|
458 return false; |
|
459 if (hangul_is_combining_mark(next)) |
|
460 return false; |
|
461 if (next == HANGUL_JUNGSEONG_FILLER) |
|
462 return false; |
|
463 } else if (prev == HANGUL_CHOSEONG_FILLER) { |
|
464 if (hangul_is_jungseong(next)) |
|
465 return false; |
|
466 if (next == HANGUL_JUNGSEONG_FILLER) |
|
467 return false; |
|
468 } else if (hangul_is_jungseong(prev)) { |
|
469 if (hangul_is_jungseong(next)) |
|
470 return false; |
|
471 if (hangul_is_jongseong(next)) |
|
472 return false; |
|
473 if (hangul_is_combining_mark(next)) |
|
474 return false; |
|
475 } else if (prev == HANGUL_JUNGSEONG_FILLER) { |
|
476 if (hangul_is_jongseong(next)) |
|
477 return false; |
|
478 } else if (hangul_is_jongseong(prev)) { |
|
479 if (hangul_is_jongseong(next)) |
|
480 return false; |
|
481 if (hangul_is_combining_mark(next)) |
|
482 return false; |
|
483 } else if (hangul_is_syllable(prev)) { |
|
484 if ((prev - syllable_base) % njongseong == 0) { |
|
485 // 醫낆꽦�씠 �뾾�뒗 �쓬�젅: LV |
|
486 if (hangul_is_jungseong(next)) |
|
487 return false; |
|
488 if (hangul_is_jongseong(next)) |
|
489 return false; |
|
490 } else { |
|
491 // 醫낆꽦�씠 �엳�뒗 �쓬�젅: LVT |
|
492 if (hangul_is_jongseong(next)) |
|
493 return false; |
|
494 } |
|
495 if (hangul_is_combining_mark(next)) |
|
496 return false; |
|
497 } |
|
498 |
|
499 return true; |
|
500 } |
|
501 |
|
502 static |
|
503 #ifndef __SYMBIAN32__ |
|
504 inline |
|
505 #endif |
|
506 ucschar |
|
507 choseong_compress(ucschar a, ucschar b) |
|
508 { |
|
509 if (a == 0) |
|
510 return b; |
|
511 |
|
512 if (a == 0x1100 && b == 0x1100) |
|
513 return 0x1101; |
|
514 if (a == 0x1103 && b == 0x1103) |
|
515 return 0x1104; |
|
516 if (a == 0x1107 && b == 0x1107) |
|
517 return 0x1108; |
|
518 if (a == 0x1109 && b == 0x1109) |
|
519 return 0x110A; |
|
520 if (a == 0x110c && b == 0x110c) |
|
521 return 0x110d; |
|
522 return 0; |
|
523 } |
|
524 |
|
525 static |
|
526 #ifndef __SYMBIAN32__ |
|
527 inline |
|
528 #endif |
|
529 ucschar |
|
530 jungseong_compress(ucschar a, ucschar b) |
|
531 { |
|
532 if (a == 0) |
|
533 return b; |
|
534 |
|
535 if (a == 0x1169) { |
|
536 if (b == 0x1161) |
|
537 return 0x116a; |
|
538 if (b == 0x1162) |
|
539 return 0x116b; |
|
540 if (b == 0x1175) |
|
541 return 0x116c; |
|
542 } |
|
543 if (a == 0x116e) { |
|
544 if (b == 0x1165) |
|
545 return 0x116f; |
|
546 if (b == 0x1166) |
|
547 return 0x1170; |
|
548 if (b == 0x1175) |
|
549 return 0x1171; |
|
550 } |
|
551 if (b == 0x1175) { |
|
552 if (a == 0x1173) |
|
553 return 0x1174; |
|
554 if (a == 0x1161) |
|
555 return 0x1162; |
|
556 if (a == 0x1163) |
|
557 return 0x1164; |
|
558 if (a == 0x1165) |
|
559 return 0x1166; |
|
560 if (a == 0x1167) |
|
561 return 0x1168; |
|
562 } |
|
563 |
|
564 return 0; |
|
565 } |
|
566 |
|
567 static |
|
568 #ifndef __SYMBIAN32__ |
|
569 inline |
|
570 #endif |
|
571 ucschar |
|
572 jongseong_compress(ucschar a, ucschar b) |
|
573 { |
|
574 if (a == 0) |
|
575 return b; |
|
576 |
|
577 if (a == 0x11a8) { |
|
578 if (b == 0x11a8) |
|
579 return 0x11a9; |
|
580 if (b == 0x11ba) |
|
581 return 0x11aa; |
|
582 } |
|
583 if (a == 0x11ab) { |
|
584 if (b == 0x11b0) |
|
585 return 0x11ab; |
|
586 if (b == 0x11c2) |
|
587 return 0x11ad; |
|
588 } |
|
589 if (a == 0x11af) { |
|
590 if (b == 0x11a8) |
|
591 return 0x11b0; |
|
592 if (b == 0x11b7) |
|
593 return 0x11b1; |
|
594 if (b == 0x11b8) |
|
595 return 0x11b2; |
|
596 if (b == 0x11ba) |
|
597 return 0x11b3; |
|
598 if (b == 0x11c0) |
|
599 return 0x11b4; |
|
600 if (b == 0x11c1) |
|
601 return 0x11b5; |
|
602 if (b == 0x11c2) |
|
603 return 0x11b6; |
|
604 } |
|
605 if (a == 0x11b8 && b == 0x11ba) |
|
606 return 0x11b9; |
|
607 if (a == 0x11ba && b == 0x11ba) |
|
608 return 0x11bb; |
|
609 |
|
610 return 0; |
|
611 } |
|
612 |
|
613 static |
|
614 #ifndef __SYMBIAN32__ |
|
615 inline |
|
616 #endif |
|
617 ucschar |
|
618 build_syllable(const ucschar* str, size_t len) |
|
619 { |
|
620 int i; |
|
621 ucschar cho = 0, jung = 0, jong = 0; |
|
622 |
|
623 i = 0; |
|
624 while (i < len && hangul_is_choseong_conjoinable(str[i])) { |
|
625 cho = choseong_compress(cho, str[i]); |
|
626 if (cho == 0) |
|
627 return 0; |
|
628 i++; |
|
629 } |
|
630 |
|
631 while (i < len && hangul_is_jungseong_conjoinable(str[i])) { |
|
632 jung = jungseong_compress(jung, str[i]); |
|
633 if (jung == 0) |
|
634 return 0; |
|
635 i++; |
|
636 } |
|
637 |
|
638 while (i < len && hangul_is_jongseong_conjoinable(str[i])) { |
|
639 jong = jongseong_compress(jong, str[i]); |
|
640 if (jong == 0) |
|
641 return 0; |
|
642 i++; |
|
643 } |
|
644 |
|
645 if (i < len) |
|
646 return 0; |
|
647 |
|
648 return hangul_jaso_to_syllable(cho, jung, jong); |
|
649 } |
|
650 |
|
651 /** |
|
652 * @brief �븳 �쓬�젅�뿉 �빐�떦�븯�뒗 肄붾뱶�쓽 媛��닔瑜� 援ы븳�떎 |
|
653 * @param str �쓬�젅�쓽 湲몄씠瑜� 援ы븷 �뒪�듃留� |
|
654 * @param max_len @a str �뿉�꽌 �씫�쓣 湲몄씠�쓽 �젣�븳媛� |
|
655 * @return �븳 �쓬�젅�뿉 �빐�떦�븯�뒗 肄붾뱶�쓽 媛��닔 |
|
656 * |
|
657 * �씠 �븿�닔�뒗 @a str �뿉�꽌 �븳 �쓬�젅�뿉 �빐�떦�븯�뒗 肄붾뱶�쓽 媛��닔瑜� 援ы븳�떎. |
|
658 * �븳 �쓬�젅�뿉 �빐�떦�븯�뒗 肄붾뱶�쓽 媛��닔媛� @a max_len 蹂대떎 留롫떎硫� @a max_len �쓣 |
|
659 * 諛섑솚�븳�떎. �븳 �쓬�젅�씠�씪怨� �뙋�떒�븯�뒗 湲곗����� L*V*T+ �뙣�꽩�뿉 �뵲瑜몃떎. �씠 �뙣�꽩��� |
|
660 * regular expression�쓽 而⑤깽�뀡�쓣 �뵲瑜� 寃껋쑝濡�, 1媛� �씠�긽�쓽 珥덉꽦怨� 以묒꽦, 0媛� |
|
661 * �씠�긽�쓽 醫낆꽦�씠 紐⑥씤 �옄紐� �뒪�듃留곸쓣 �븳 �쓬�젅濡� �씤�떇�븳�떎�뒗 �쑜�씠�떎. �삁瑜� �뱾硫� |
|
662 * �떎�쓬怨� 媛숈�� �옄紐� �뒪�듃留곷룄 �븳 �쓬�젅濡� �씤�떇�븳�떎. |
|
663 * |
|
664 * �삁) "�뀆 �뀆 �뀥 �뀛 �꽮 �꽦" -> "���" |
|
665 * |
|
666 * �뵲�씪�꽌 �쐞 寃쎌슦�뿉�뒗 6�쓣 諛섑솚�븯寃� �맂�떎. |
|
667 * |
|
668 * �씪諛섏쟻�쑝濡쒕뒗 諛⑹젏(U+302E, U+302F)源뚯�� �븳 �쓬�젅濡� �씤�떇�븯寃좎��留�, �씠 �븿�닔�뒗 |
|
669 * �쓬�젅怨� �옄紐④컙 蹂��솚�쓣 �렪由ы븯寃� �븯湲� �쐞�빐 援ы쁽�맂 寃껋쑝濡� 諛⑹젏��� �떎瑜� �쓬�젅濡� |
|
670 * �씤�떇�븳�떎. |
|
671 * |
|
672 * @a str �씠 �옄紐� 肄붾뱶�뿉 �빐�떦�븯吏� �븡�뒗 寃쎌슦�뿉�뒗 1�쓣 諛섑솚�븳�떎. |
|
673 * |
|
674 * �씠 �븿�닔�뒗 �옄紐� �뒪�듃留곸뿉�꽌 珥� �쓬�젅�쓽 媛��닔瑜� 援ы븯�뒗 �븿�닔媛� �븘�떂�뿉 二쇱쓽�븳�떎. |
|
675 */ |
|
676 int |
|
677 hangul_syllable_len(const ucschar* str, int max_len) |
|
678 { |
|
679 int i = 0; |
|
680 |
|
681 if (max_len == 0) |
|
682 return 0; |
|
683 |
|
684 if (str[i] != 0) { |
|
685 for (i = 1; i < max_len; i++) { |
|
686 if (str[i] == 0) |
|
687 break; |
|
688 |
|
689 if (is_syllable_boundary(str[i - 1], str[i])) |
|
690 break; |
|
691 } |
|
692 } |
|
693 |
|
694 return i; |
|
695 } |
|
696 |
|
697 /** |
|
698 * @brief @a iter瑜� 湲곗���쑝濡� �씠�쟾 �쓬�젅�쓽 泥レ옄紐� 湲��옄�뿉 ����븳 �룷�씤�꽣瑜� 援ы븳�떎. |
|
699 * @param iter �쁽�옱 �쐞移� |
|
700 * @param begin �뒪�듃留곸쓽 �떆�옉�쐞移�, �룷�씤�꽣媛� �씠�룞�븷 �븳怨꾧컪 |
|
701 * @return �씠�쟾 �쓬�젅�쓽 泥ル쾲吏� �옄紐⑥뿉 ����븳 �룷�씤�꽣 |
|
702 * |
|
703 * �씠 �븿�닔�뒗 @a iter濡� 二쇱뼱吏� �옄紐� �뒪�듃留곸쓽 �룷�씤�꽣瑜� 湲곗���쑝濡� �씠�쟾 �쓬�젅�쓽 |
|
704 * 泥ル쾲吏� �옄紐⑥뿉 ����븳 �룷�씤�꽣瑜� 由ы꽩�븳�떎. �쓬�젅�쓣 李얘린�쐞�빐�꽌 begin蹂대떎 |
|
705 * �븵履쎌쑝濡� �씠�룞�븯吏� �븡�뒗�떎. |
|
706 * |
|
707 * �븳 �쓬�젅�씠�씪怨� �뙋�떒�븯�뒗 湲곗����� L*V*T+M? �뙣�꽩�뿉 �뵲瑜몃떎. |
|
708 */ |
|
709 const ucschar* |
|
710 hangul_syllable_iterator_prev(const ucschar* iter, const ucschar* begin) |
|
711 { |
|
712 if (iter > begin) |
|
713 iter--; |
|
714 |
|
715 while (iter > begin) { |
|
716 ucschar prev = iter[-1]; |
|
717 ucschar curr = iter[0]; |
|
718 if (is_syllable_boundary(prev, curr)) |
|
719 break; |
|
720 iter--; |
|
721 } |
|
722 |
|
723 return iter; |
|
724 } |
|
725 |
|
726 /** |
|
727 * @brief @a iter瑜� 湲곗���쑝濡� �떎�쓬 �쓬�젅�쓽 泥レ옄紐� 湲��옄�뿉 ����븳 �룷�씤�꽣瑜� 援ы븳�떎. |
|
728 * @param iter �쁽�옱 �쐞移� |
|
729 * @param end �뒪�듃留곸쓽 �걹�쐞移�, �룷�씤�꽣媛� �씠�룞�븷 �븳怨꾧컪 |
|
730 * @return �떎�쓬 �쓬�젅�쓽 泥ル쾲吏� �옄紐⑥뿉 ����븳 �룷�씤�꽣 |
|
731 * |
|
732 * �씠 �븿�닔�뒗 @a iter濡� 二쇱뼱吏� �옄紐� �뒪�듃留곸쓽 �룷�씤�꽣瑜� 湲곗���쑝濡� �떎�쓬 �쓬�젅�쓽 |
|
733 * 泥ル쾲吏� �옄紐⑥뿉 ����븳 �룷�씤�꽣瑜� 由ы꽩�븳�떎. �쓬�젅�쓣 李얘린�쐞�빐�꽌 end瑜� �꽆�뼱 |
|
734 * �씠�룞�븯吏� �븡�뒗�떎. |
|
735 * |
|
736 * �븳 �쓬�젅�씠�씪怨� �뙋�떒�븯�뒗 湲곗����� L*V*T+M? �뙣�꽩�뿉 �뵲瑜몃떎. |
|
737 */ |
|
738 const ucschar* |
|
739 hangul_syllable_iterator_next(const ucschar* iter, const ucschar* end) |
|
740 { |
|
741 if (iter < end) |
|
742 iter++; |
|
743 |
|
744 while (iter < end) { |
|
745 ucschar prev = iter[-1]; |
|
746 ucschar curr = iter[0]; |
|
747 if (is_syllable_boundary(prev, curr)) |
|
748 break; |
|
749 iter++; |
|
750 } |
|
751 |
|
752 return iter; |
|
753 } |
|
754 |
|
755 /** |
|
756 * @brief �옄紐� �뒪�듃留곸쓣 �쓬�젅 �뒪�듃留곸쓣 蹂��솚�븳�떎 |
|
757 * @param dest �쓬�젅�삎�쑝濡� 蹂��솚�맂 寃곌낵媛� ����옣�맆 踰꾪띁 |
|
758 * @param destlen 寃곌낵瑜� ����옣�븷 踰꾪띁�쓽 湲몄씠(ucschar 肄붾뱶 �떒�쐞) |
|
759 * @param src 蹂��솚�븷 �옄紐� �뒪�듃留� |
|
760 * @param srclen 蹂��솚�븷 �옄紐� �뒪�듃留곸쓽 湲몄씠(ucschar 肄붾뱶 �떒�쐞) |
|
761 * @return @a destlen �뿉 ����옣�븳 肄붾뱶�쓽 媛��닔 |
|
762 * |
|
763 * �씠 �븿�닔�뒗 L+V+T*M? �뙣�꽩�뿉 �뵲�씪 �옄紐� �뒪�듃留� 蹂��솚�쓣 �떆�룄�븳�떎. �븳 �쓬�젅�쓣 |
|
764 * �뙋�떒�븯�뒗 湲곗����� @ref hangul_syllable_len �쓣 李몄“�븳�떎. |
|
765 * 留뚯씪 @a src 媛� �쟻�젅�븳 �쓬�젅�삎�깭濡� 蹂��솚�씠 遺덇���뒫�븳 寃쎌슦�뿉�뒗 �옄紐� �뒪�듃留곸씠 |
|
766 * 洹몃��濡� 蹂듭궗�맂�떎. |
|
767 * |
|
768 * �씠 �븿�닔�뒗 �옄紐� �뒪�듃留� @a src 瑜� �쓬�젅�삎�쑝濡� 蹂��솚�븯�뿬 @a dest �뿉 ����옣�븳�떎. |
|
769 * @a srclen �뿉 吏��젙�맂 媛��닔留뚰겮 �씫怨�, @a destlen �뿉 吏��젙�맂 湲몄씠 �씠�긽 �벐吏� |
|
770 * �븡�뒗�떎. @a srclen �씠 -1�씠�씪硫� @a src �뒗 0�쑝濡� �걹�굹�뒗 �뒪�듃留곸쑝濡� 媛��젙�븯怨� |
|
771 * 0�쓣 �젣�쇅�븳 湲몄씠源뚯�� 蹂��솚�쓣 �떆�룄�븳�떎. �뵲�씪�꽌 蹂��솚�맂 寃곌낵 �뒪�듃留곸�� 0�쑝濡� |
|
772 * �걹�굹吏� �븡�뒗�떎. 留뚯씪 0�쑝濡� �걹�굹�뒗 �뒪�듃留곸쓣 留뚮뱾怨� �떢�떎硫� �떎�쓬怨� 媛숈씠 �븳�떎. |
|
773 * |
|
774 * @code |
|
775 * int n = hangul_jamos_to_syllables(dest, destlen, src, srclen); |
|
776 * dest[n] = 0; |
|
777 * @endcode |
|
778 */ |
|
779 int |
|
780 hangul_jamos_to_syllables(ucschar* dest, int destlen, const ucschar* src, int srclen) |
|
781 { |
|
782 ucschar* d; |
|
783 const ucschar* s; |
|
784 |
|
785 int inleft; |
|
786 int outleft; |
|
787 int n; |
|
788 |
|
789 if (srclen < 0) { |
|
790 s = src; |
|
791 while (*s != 0) |
|
792 s++; |
|
793 srclen = s - src; |
|
794 } |
|
795 |
|
796 s = src; |
|
797 d = dest; |
|
798 inleft = srclen; |
|
799 outleft = destlen; |
|
800 |
|
801 n = hangul_syllable_len(s, inleft); |
|
802 while (n > 0 && inleft > 0 && outleft > 0) { |
|
803 ucschar c = build_syllable(s, n); |
|
804 if (c != 0) { |
|
805 *d = c; |
|
806 d++; |
|
807 outleft--; |
|
808 } else { |
|
809 int i; |
|
810 for (i = 0; i < n && i < outleft; i++) { |
|
811 d[i] = s[i]; |
|
812 } |
|
813 d += i; |
|
814 outleft -= i; |
|
815 } |
|
816 |
|
817 s += n; |
|
818 inleft -= n; |
|
819 n = hangul_syllable_len(s, inleft); |
|
820 } |
|
821 |
|
822 return destlen - outleft; |
|
823 } |