1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
|
2 <html xmlns="http://www.w3.org/1999/xhtml"> |
|
3 <head> |
|
4 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/> |
|
5 <title>TB9.2 Example Applications: examples/PIPS/antiword/src/utf8.c Source File</title> |
|
6 <link href="tabs.css" rel="stylesheet" type="text/css"/> |
|
7 <link href="doxygen.css" rel="stylesheet" type="text/css"/> |
|
8 </head> |
|
9 <body> |
|
10 <!-- Generated by Doxygen 1.6.2 --> |
|
11 <h1>examples/PIPS/antiword/src/utf8.c</h1><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/*</span> |
|
12 <a name="l00002"></a>00002 <span class="comment"> * utf8.c</span> |
|
13 <a name="l00003"></a>00003 <span class="comment"> * Copyright (C) 2001-2004 A.J. van Os; Released under GPL</span> |
|
14 <a name="l00004"></a>00004 <span class="comment"> *</span> |
|
15 <a name="l00005"></a>00005 <span class="comment"> *====================================================================</span> |
|
16 <a name="l00006"></a>00006 <span class="comment"> * This part of the software is based on:</span> |
|
17 <a name="l00007"></a>00007 <span class="comment"> * An implementation of wcwidth() as defined in</span> |
|
18 <a name="l00008"></a>00008 <span class="comment"> * "The Single UNIX Specification, Version 2, The Open Group, 1997"</span> |
|
19 <a name="l00009"></a>00009 <span class="comment"> * <http://www.UNIX-systems.org/online.html></span> |
|
20 <a name="l00010"></a>00010 <span class="comment"> * Markus Kuhn -- 2001-01-12 -- public domain</span> |
|
21 <a name="l00011"></a>00011 <span class="comment"> *====================================================================</span> |
|
22 <a name="l00012"></a>00012 <span class="comment"> * The credit should go to him, but all the bugs are mine.</span> |
|
23 <a name="l00013"></a>00013 <span class="comment"> */</span> |
|
24 <a name="l00014"></a>00014 |
|
25 <a name="l00015"></a>00015 <span class="preprocessor">#include <stdlib.h></span> |
|
26 <a name="l00016"></a>00016 <span class="preprocessor">#include <string.h></span> |
|
27 <a name="l00017"></a>00017 <span class="preprocessor">#include "antiword.h"</span> |
|
28 <a name="l00018"></a>00018 |
|
29 <a name="l00019"></a>00019 <span class="keyword">struct </span>interval { |
|
30 <a name="l00020"></a>00020 USHORT first; |
|
31 <a name="l00021"></a>00021 USHORT last; |
|
32 <a name="l00022"></a>00022 }; |
|
33 <a name="l00023"></a>00023 <span class="comment">/* Sorted list of non-overlapping intervals of non-spacing characters */</span> |
|
34 <a name="l00024"></a>00024 <span class="keyword">static</span> <span class="keyword">const</span> <span class="keyword">struct </span>interval combining[] = { |
|
35 <a name="l00025"></a>00025 { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 }, |
|
36 <a name="l00026"></a>00026 { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 }, |
|
37 <a name="l00027"></a>00027 { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, |
|
38 <a name="l00028"></a>00028 { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 }, |
|
39 <a name="l00029"></a>00029 { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, |
|
40 <a name="l00030"></a>00030 { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A }, |
|
41 <a name="l00031"></a>00031 { 0x07A6, 0x07B0 }, { 0x0901, 0x0902 }, { 0x093C, 0x093C }, |
|
42 <a name="l00032"></a>00032 { 0x0941, 0x0948 }, { 0x094D, 0x094D }, { 0x0951, 0x0954 }, |
|
43 <a name="l00033"></a>00033 { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, |
|
44 <a name="l00034"></a>00034 { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, |
|
45 <a name="l00035"></a>00035 { 0x0A02, 0x0A02 }, { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, |
|
46 <a name="l00036"></a>00036 { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 }, |
|
47 <a name="l00037"></a>00037 { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, |
|
48 <a name="l00038"></a>00038 { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 }, |
|
49 <a name="l00039"></a>00039 { 0x0B3C, 0x0B3C }, { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 }, |
|
50 <a name="l00040"></a>00040 { 0x0B4D, 0x0B4D }, { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 }, |
|
51 <a name="l00041"></a>00041 { 0x0BC0, 0x0BC0 }, { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 }, |
|
52 <a name="l00042"></a>00042 { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 }, |
|
53 <a name="l00043"></a>00043 { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, |
|
54 <a name="l00044"></a>00044 { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA }, |
|
55 <a name="l00045"></a>00045 { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, |
|
56 <a name="l00046"></a>00046 { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, |
|
57 <a name="l00047"></a>00047 { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, |
|
58 <a name="l00048"></a>00048 { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, |
|
59 <a name="l00049"></a>00049 { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, |
|
60 <a name="l00050"></a>00050 { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC }, |
|
61 <a name="l00051"></a>00051 { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, { 0x1032, 0x1032 }, |
|
62 <a name="l00052"></a>00052 { 0x1036, 0x1037 }, { 0x1039, 0x1039 }, { 0x1058, 0x1059 }, |
|
63 <a name="l00053"></a>00053 { 0x1160, 0x11FF }, { 0x17B7, 0x17BD }, { 0x17C6, 0x17C6 }, |
|
64 <a name="l00054"></a>00054 { 0x17C9, 0x17D3 }, { 0x180B, 0x180E }, { 0x18A9, 0x18A9 }, |
|
65 <a name="l00055"></a>00055 { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x206A, 0x206F }, |
|
66 <a name="l00056"></a>00056 { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A }, |
|
67 <a name="l00057"></a>00057 { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF }, |
|
68 <a name="l00058"></a>00058 { 0xFFF9, 0xFFFB } |
|
69 <a name="l00059"></a>00059 }; |
|
70 <a name="l00060"></a>00060 |
|
71 <a name="l00061"></a>00061 <span class="comment">/* Auxiliary function for binary search in interval table */</span> |
|
72 <a name="l00062"></a>00062 <span class="keyword">static</span> BOOL |
|
73 <a name="l00063"></a>00063 bIsZeroWidthChar(ULONG ucs) |
|
74 <a name="l00064"></a>00064 { |
|
75 <a name="l00065"></a>00065 <span class="keywordtype">int</span> low = 0; |
|
76 <a name="l00066"></a>00066 <span class="keywordtype">int</span> high = elementsof(combining) - 1; |
|
77 <a name="l00067"></a>00067 <span class="keywordtype">int</span> mid; |
|
78 <a name="l00068"></a>00068 |
|
79 <a name="l00069"></a>00069 <span class="keywordflow">if</span> (ucs < (ULONG)combining[low].first || |
|
80 <a name="l00070"></a>00070 ucs > (ULONG)combining[high].last) { |
|
81 <a name="l00071"></a>00071 <span class="keywordflow">return</span> FALSE; |
|
82 <a name="l00072"></a>00072 } |
|
83 <a name="l00073"></a>00073 |
|
84 <a name="l00074"></a>00074 <span class="keywordflow">while</span> (high >= low) { |
|
85 <a name="l00075"></a>00075 mid = (low + high) / 2; |
|
86 <a name="l00076"></a>00076 <span class="keywordflow">if</span> (ucs > (ULONG)combining[mid].last) { |
|
87 <a name="l00077"></a>00077 low = mid + 1; |
|
88 <a name="l00078"></a>00078 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (ucs < (ULONG)combining[mid].first) { |
|
89 <a name="l00079"></a>00079 high = mid - 1; |
|
90 <a name="l00080"></a>00080 } <span class="keywordflow">else</span> { |
|
91 <a name="l00081"></a>00081 <span class="keywordflow">return</span> TRUE; |
|
92 <a name="l00082"></a>00082 } |
|
93 <a name="l00083"></a>00083 } |
|
94 <a name="l00084"></a>00084 <span class="keywordflow">return</span> FALSE; |
|
95 <a name="l00085"></a>00085 } <span class="comment">/* end of bIsZeroWidthChar */</span> |
|
96 <a name="l00086"></a>00086 |
|
97 <a name="l00087"></a>00087 <span class="comment">/* The following functions define the column width of an ISO 10646</span> |
|
98 <a name="l00088"></a>00088 <span class="comment"> * character as follows:</span> |
|
99 <a name="l00089"></a>00089 <span class="comment"> *</span> |
|
100 <a name="l00090"></a>00090 <span class="comment"> * - The null character (U+0000) has a column width of 0.</span> |
|
101 <a name="l00091"></a>00091 <span class="comment"> *</span> |
|
102 <a name="l00092"></a>00092 <span class="comment"> * - Other C0/C1 control characters and DEL will lead to a return</span> |
|
103 <a name="l00093"></a>00093 <span class="comment"> * value of -1.</span> |
|
104 <a name="l00094"></a>00094 <span class="comment"> *</span> |
|
105 <a name="l00095"></a>00095 <span class="comment"> * - Non-spacing and enclosing combining characters (general</span> |
|
106 <a name="l00096"></a>00096 <span class="comment"> * category code Mn or Me in the Unicode database) have a</span> |
|
107 <a name="l00097"></a>00097 <span class="comment"> * column width of 0.</span> |
|
108 <a name="l00098"></a>00098 <span class="comment"> *</span> |
|
109 <a name="l00099"></a>00099 <span class="comment"> * - Other format characters (general category code Cf in the Unicode</span> |
|
110 <a name="l00100"></a>00100 <span class="comment"> * database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.</span> |
|
111 <a name="l00101"></a>00101 <span class="comment"> *</span> |
|
112 <a name="l00102"></a>00102 <span class="comment"> * - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)</span> |
|
113 <a name="l00103"></a>00103 <span class="comment"> * have a column width of 0.</span> |
|
114 <a name="l00104"></a>00104 <span class="comment"> *</span> |
|
115 <a name="l00105"></a>00105 <span class="comment"> * - Spacing characters in the East Asian Wide (W) or East Asian</span> |
|
116 <a name="l00106"></a>00106 <span class="comment"> * FullWidth (F) category as defined in Unicode Technical</span> |
|
117 <a name="l00107"></a>00107 <span class="comment"> * Report #11 have a column width of 2.</span> |
|
118 <a name="l00108"></a>00108 <span class="comment"> *</span> |
|
119 <a name="l00109"></a>00109 <span class="comment"> * - All remaining characters (including all printable</span> |
|
120 <a name="l00110"></a>00110 <span class="comment"> * ISO 8859-1 and WGL4 characters, Unicode control characters,</span> |
|
121 <a name="l00111"></a>00111 <span class="comment"> * etc.) have a column width of 1.</span> |
|
122 <a name="l00112"></a>00112 <span class="comment"> *</span> |
|
123 <a name="l00113"></a>00113 <span class="comment"> * This implementation assumes that all characters are encoded</span> |
|
124 <a name="l00114"></a>00114 <span class="comment"> * in ISO 10646.</span> |
|
125 <a name="l00115"></a>00115 <span class="comment"> *</span> |
|
126 <a name="l00116"></a>00116 <span class="comment"> * This function is not named wcwidth() to prevent name clashes</span> |
|
127 <a name="l00117"></a>00117 <span class="comment"> */</span> |
|
128 <a name="l00118"></a>00118 <span class="keyword">static</span> <span class="keywordtype">int</span> |
|
129 <a name="l00119"></a>00119 iWcWidth(ULONG ucs) |
|
130 <a name="l00120"></a>00120 { |
|
131 <a name="l00121"></a>00121 <span class="comment">/* Test for 8-bit control characters */</span> |
|
132 <a name="l00122"></a>00122 <span class="keywordflow">if</span> (ucs == 0) { |
|
133 <a name="l00123"></a>00123 <span class="keywordflow">return</span> 0; |
|
134 <a name="l00124"></a>00124 } |
|
135 <a name="l00125"></a>00125 <span class="keywordflow">if</span> (ucs < 0x20 || (ucs >= 0x7f && ucs < 0xa0)) { |
|
136 <a name="l00126"></a>00126 NO_DBG_HEX(ucs); |
|
137 <a name="l00127"></a>00127 <span class="keywordflow">return</span> -1; |
|
138 <a name="l00128"></a>00128 } |
|
139 <a name="l00129"></a>00129 |
|
140 <a name="l00130"></a>00130 <span class="comment">/* Binary search in table of non-spacing characters */</span> |
|
141 <a name="l00131"></a>00131 <span class="keywordflow">if</span> (bIsZeroWidthChar(ucs)) { |
|
142 <a name="l00132"></a>00132 <span class="keywordflow">return</span> 0; |
|
143 <a name="l00133"></a>00133 } |
|
144 <a name="l00134"></a>00134 |
|
145 <a name="l00135"></a>00135 <span class="comment">/* Ucs is not a combining or C0/C1 control character */</span> |
|
146 <a name="l00136"></a>00136 |
|
147 <a name="l00137"></a>00137 <span class="keywordflow">return</span> 1 + |
|
148 <a name="l00138"></a>00138 (ucs >= 0x1100 && |
|
149 <a name="l00139"></a>00139 (ucs <= 0x115f || <span class="comment">/* Hangul Jamo init. consonants */</span> |
|
150 <a name="l00140"></a>00140 (ucs >= 0x2e80 && ucs <= 0xa4cf && (ucs & ~0x0011) != 0x300a && |
|
151 <a name="l00141"></a>00141 ucs != 0x303f) || <span class="comment">/* CJK ... Yi */</span> |
|
152 <a name="l00142"></a>00142 (ucs >= 0xac00 && ucs <= 0xd7a3) || <span class="comment">/* Hangul Syllables */</span> |
|
153 <a name="l00143"></a>00143 (ucs >= 0xf900 && ucs <= 0xfaff) || <span class="comment">/* CJK Compatibility Ideographs */</span> |
|
154 <a name="l00144"></a>00144 (ucs >= 0xfe30 && ucs <= 0xfe6f) || <span class="comment">/* CJK Compatibility Forms */</span> |
|
155 <a name="l00145"></a>00145 (ucs >= 0xff00 && ucs <= 0xff5f) || <span class="comment">/* Fullwidth Forms */</span> |
|
156 <a name="l00146"></a>00146 (ucs >= 0xffe0 && ucs <= 0xffe6) || |
|
157 <a name="l00147"></a>00147 (ucs >= 0x20000 && ucs <= 0x2ffff))); |
|
158 <a name="l00148"></a>00148 } <span class="comment">/* end of iWcWidth */</span> |
|
159 <a name="l00149"></a>00149 |
|
160 <a name="l00150"></a>00150 <span class="comment">/*</span> |
|
161 <a name="l00151"></a>00151 <span class="comment"> * utf8_to_ucs - convert from UTF-8 to UCS</span> |
|
162 <a name="l00152"></a>00152 <span class="comment"> *</span> |
|
163 <a name="l00153"></a>00153 <span class="comment"> * Returns the UCS character,</span> |
|
164 <a name="l00154"></a>00154 <span class="comment"> * Fills in the number of bytes in the UTF-8 character</span> |
|
165 <a name="l00155"></a>00155 <span class="comment"> */</span> |
|
166 <a name="l00156"></a>00156 <span class="keyword">static</span> ULONG |
|
167 <a name="l00157"></a>00157 utf8_to_ucs(<span class="keyword">const</span> <span class="keywordtype">char</span> *p, <span class="keywordtype">int</span> iStrLen, <span class="keywordtype">int</span> *piUtfLen) |
|
168 <a name="l00158"></a>00158 { |
|
169 <a name="l00159"></a>00159 ULONG ulUcs; |
|
170 <a name="l00160"></a>00160 <span class="keywordtype">int</span> iIndex, iCharLen; |
|
171 <a name="l00161"></a>00161 |
|
172 <a name="l00162"></a>00162 fail(p == NULL || piUtfLen == NULL); |
|
173 <a name="l00163"></a>00163 fail(iStrLen < 1); |
|
174 <a name="l00164"></a>00164 |
|
175 <a name="l00165"></a>00165 ulUcs = (ULONG)(UCHAR)p[0]; |
|
176 <a name="l00166"></a>00166 |
|
177 <a name="l00167"></a>00167 <span class="keywordflow">if</span> (ulUcs < 0x80) { |
|
178 <a name="l00168"></a>00168 *piUtfLen = 1; |
|
179 <a name="l00169"></a>00169 <span class="keywordflow">return</span> ulUcs; |
|
180 <a name="l00170"></a>00170 } |
|
181 <a name="l00171"></a>00171 |
|
182 <a name="l00172"></a>00172 <span class="keywordflow">if</span> (ulUcs < 0xe0){ |
|
183 <a name="l00173"></a>00173 iCharLen = 2; |
|
184 <a name="l00174"></a>00174 ulUcs &= 0x1f; |
|
185 <a name="l00175"></a>00175 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (ulUcs < 0xf0){ |
|
186 <a name="l00176"></a>00176 iCharLen = 3; |
|
187 <a name="l00177"></a>00177 ulUcs &= 0x0f; |
|
188 <a name="l00178"></a>00178 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (ulUcs < 0xf8){ |
|
189 <a name="l00179"></a>00179 iCharLen = 4; |
|
190 <a name="l00180"></a>00180 ulUcs &= 0x07; |
|
191 <a name="l00181"></a>00181 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (ulUcs < 0xfc){ |
|
192 <a name="l00182"></a>00182 iCharLen = 5; |
|
193 <a name="l00183"></a>00183 ulUcs &= 0x03; |
|
194 <a name="l00184"></a>00184 } <span class="keywordflow">else</span> { |
|
195 <a name="l00185"></a>00185 iCharLen = 6; |
|
196 <a name="l00186"></a>00186 ulUcs &= 0x01; |
|
197 <a name="l00187"></a>00187 } |
|
198 <a name="l00188"></a>00188 <span class="keywordflow">for</span> (iIndex = 1; iIndex < iCharLen; iIndex++) { |
|
199 <a name="l00189"></a>00189 ulUcs <<= 6; |
|
200 <a name="l00190"></a>00190 <span class="keywordflow">if</span> (iIndex < iStrLen) { |
|
201 <a name="l00191"></a>00191 ulUcs |= (ULONG)(UCHAR)p[iIndex] & 0x3f; |
|
202 <a name="l00192"></a>00192 } |
|
203 <a name="l00193"></a>00193 } |
|
204 <a name="l00194"></a>00194 *piUtfLen = iCharLen; |
|
205 <a name="l00195"></a>00195 <span class="keywordflow">return</span> ulUcs; |
|
206 <a name="l00196"></a>00196 } <span class="comment">/* end of utf8_to_ucs */</span> |
|
207 <a name="l00197"></a>00197 |
|
208 <a name="l00198"></a>00198 <span class="comment">/*</span> |
|
209 <a name="l00199"></a>00199 <span class="comment"> * utf8_strwidth - compute the string width of an UTF-8 string</span> |
|
210 <a name="l00200"></a>00200 <span class="comment"> *</span> |
|
211 <a name="l00201"></a>00201 <span class="comment"> * Returns the string width in columns</span> |
|
212 <a name="l00202"></a>00202 <span class="comment"> */</span> |
|
213 <a name="l00203"></a>00203 <span class="keywordtype">long</span> |
|
214 <a name="l00204"></a>00204 utf8_strwidth(<span class="keyword">const</span> <span class="keywordtype">char</span> *pcString, <span class="keywordtype">size_t</span> tNumchars) |
|
215 <a name="l00205"></a>00205 { |
|
216 <a name="l00206"></a>00206 ULONG ulUcs; |
|
217 <a name="l00207"></a>00207 <span class="keywordtype">long</span> lTotal; |
|
218 <a name="l00208"></a>00208 <span class="keywordtype">int</span> iToGo, iWidth, iUtflen; |
|
219 <a name="l00209"></a>00209 |
|
220 <a name="l00210"></a>00210 fail(pcString == NULL || tNumchars > (<span class="keywordtype">size_t</span>)INT_MAX); |
|
221 <a name="l00211"></a>00211 |
|
222 <a name="l00212"></a>00212 lTotal = 0; |
|
223 <a name="l00213"></a>00213 iToGo = (int)tNumchars; |
|
224 <a name="l00214"></a>00214 |
|
225 <a name="l00215"></a>00215 <span class="keywordflow">while</span> (iToGo > 0 && *pcString != <span class="charliteral">'\0'</span>) { |
|
226 <a name="l00216"></a>00216 ulUcs = utf8_to_ucs(pcString, iToGo, &iUtflen); |
|
227 <a name="l00217"></a>00217 iWidth = iWcWidth(ulUcs); |
|
228 <a name="l00218"></a>00218 <span class="keywordflow">if</span> (iWidth > 0) { |
|
229 <a name="l00219"></a>00219 lTotal += iWidth; |
|
230 <a name="l00220"></a>00220 } |
|
231 <a name="l00221"></a>00221 pcString += iUtflen; |
|
232 <a name="l00222"></a>00222 iToGo -= iUtflen; |
|
233 <a name="l00223"></a>00223 } |
|
234 <a name="l00224"></a>00224 NO_DBG_DEC(lTotal); |
|
235 <a name="l00225"></a>00225 <span class="keywordflow">return</span> lTotal; |
|
236 <a name="l00226"></a>00226 } <span class="comment">/* end of utf8_strwidth */</span> |
|
237 <a name="l00227"></a>00227 |
|
238 <a name="l00228"></a>00228 <span class="comment">/*</span> |
|
239 <a name="l00229"></a>00229 <span class="comment"> * utf8_chrlength - get the number of bytes in an UTF-8 character</span> |
|
240 <a name="l00230"></a>00230 <span class="comment"> *</span> |
|
241 <a name="l00231"></a>00231 <span class="comment"> * Returns the number of bytes</span> |
|
242 <a name="l00232"></a>00232 <span class="comment"> */</span> |
|
243 <a name="l00233"></a>00233 <span class="keywordtype">int</span> |
|
244 <a name="l00234"></a>00234 utf8_chrlength(<span class="keyword">const</span> <span class="keywordtype">char</span> *p) |
|
245 <a name="l00235"></a>00235 { |
|
246 <a name="l00236"></a>00236 <span class="keywordtype">int</span> iUtflen; |
|
247 <a name="l00237"></a>00237 |
|
248 <a name="l00238"></a>00238 fail(p == NULL); |
|
249 <a name="l00239"></a>00239 |
|
250 <a name="l00240"></a>00240 iUtflen = -1; <span class="comment">/* Just to make sure */</span> |
|
251 <a name="l00241"></a>00241 (void)utf8_to_ucs(p, INT_MAX, &iUtflen); |
|
252 <a name="l00242"></a>00242 NO_DBG_DEC(iUtflen); |
|
253 <a name="l00243"></a>00243 <span class="keywordflow">return</span> iUtflen; |
|
254 <a name="l00244"></a>00244 } <span class="comment">/* end of utf8_chrlength */</span> |
|
255 <a name="l00245"></a>00245 |
|
256 <a name="l00246"></a>00246 <span class="comment">/*</span> |
|
257 <a name="l00247"></a>00247 <span class="comment"> * is_locale_utf8 - return TRUE if the locale is UTF-8</span> |
|
258 <a name="l00248"></a>00248 <span class="comment"> */</span> |
|
259 <a name="l00249"></a>00249 BOOL |
|
260 <a name="l00250"></a>00250 is_locale_utf8(<span class="keywordtype">void</span>) |
|
261 <a name="l00251"></a>00251 { |
|
262 <a name="l00252"></a>00252 <span class="keywordtype">char</span> szCodeset[20]; |
|
263 <a name="l00253"></a>00253 |
|
264 <a name="l00254"></a>00254 szCodeset[0] = <span class="charliteral">'\0'</span>; |
|
265 <a name="l00255"></a>00255 <span class="keywordflow">if</span> (!bGetNormalizedCodeset(szCodeset, <span class="keyword">sizeof</span>(szCodeset), NULL)) { |
|
266 <a name="l00256"></a>00256 <span class="keywordflow">return</span> FALSE; |
|
267 <a name="l00257"></a>00257 } |
|
268 <a name="l00258"></a>00258 DBG_MSG(szCodeset); |
|
269 <a name="l00259"></a>00259 <span class="keywordflow">return</span> STREQ(szCodeset, <span class="stringliteral">"utf8"</span>); |
|
270 <a name="l00260"></a>00260 } <span class="comment">/* end of is_locale_utf8 */</span> |
|
271 </pre></div></div> |
|
272 <hr size="1"/><address style="text-align: right;"><small>Generated by |
|
273 <a href="http://www.doxygen.org/index.html"> |
|
274 <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.6.2 </small></address> |
|
275 </body> |
|
276 </html> |
|