Symbian3/Examples/guid-6013a680-57f9-415b-8851-c4fa63356636/utf8_8c_source.html
changeset 10 d4524d6a4472
parent 9 59758314f811
child 11 5072524fcc79
equal deleted inserted replaced
9:59758314f811 10:d4524d6a4472
     1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
       
     2 <html xmlns="http://www.w3.org/1999/xhtml">
       
     3 <head>
       
     4 <meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
       
     5 <title>TB9.2 Example Applications: examples/PIPS/antiword/src/utf8.c Source File</title>
       
     6 <link href="tabs.css" rel="stylesheet" type="text/css"/>
       
     7 <link href="doxygen.css" rel="stylesheet" type="text/css"/>
       
     8 </head>
       
     9 <body>
       
    10 <!-- Generated by Doxygen 1.6.2 -->
       
    11 <h1>examples/PIPS/antiword/src/utf8.c</h1><div class="fragment"><pre class="fragment"><a name="l00001"></a>00001 <span class="comment">/*</span>
       
    12 <a name="l00002"></a>00002 <span class="comment"> * utf8.c</span>
       
    13 <a name="l00003"></a>00003 <span class="comment"> * Copyright (C) 2001-2004 A.J. van Os; Released under GPL</span>
       
    14 <a name="l00004"></a>00004 <span class="comment"> *</span>
       
    15 <a name="l00005"></a>00005 <span class="comment"> *====================================================================</span>
       
    16 <a name="l00006"></a>00006 <span class="comment"> * This part of the software is based on:</span>
       
    17 <a name="l00007"></a>00007 <span class="comment"> * An implementation of wcwidth() as defined in</span>
       
    18 <a name="l00008"></a>00008 <span class="comment"> * &quot;The Single UNIX Specification, Version 2, The Open Group, 1997&quot;</span>
       
    19 <a name="l00009"></a>00009 <span class="comment"> * &lt;http://www.UNIX-systems.org/online.html&gt;</span>
       
    20 <a name="l00010"></a>00010 <span class="comment"> * Markus Kuhn -- 2001-01-12 -- public domain</span>
       
    21 <a name="l00011"></a>00011 <span class="comment"> *====================================================================</span>
       
    22 <a name="l00012"></a>00012 <span class="comment"> * The credit should go to him, but all the bugs are mine.</span>
       
    23 <a name="l00013"></a>00013 <span class="comment"> */</span>
       
    24 <a name="l00014"></a>00014 
       
    25 <a name="l00015"></a>00015 <span class="preprocessor">#include &lt;stdlib.h&gt;</span>
       
    26 <a name="l00016"></a>00016 <span class="preprocessor">#include &lt;string.h&gt;</span>
       
    27 <a name="l00017"></a>00017 <span class="preprocessor">#include &quot;antiword.h&quot;</span>
       
    28 <a name="l00018"></a>00018 
       
    29 <a name="l00019"></a>00019 <span class="keyword">struct </span>interval {
       
    30 <a name="l00020"></a>00020         USHORT  first;
       
    31 <a name="l00021"></a>00021         USHORT  last;
       
    32 <a name="l00022"></a>00022 };
       
    33 <a name="l00023"></a>00023 <span class="comment">/* Sorted list of non-overlapping intervals of non-spacing characters */</span>
       
    34 <a name="l00024"></a>00024 <span class="keyword">static</span> <span class="keyword">const</span> <span class="keyword">struct </span>interval combining[] = {
       
    35 <a name="l00025"></a>00025         { 0x0300, 0x034E }, { 0x0360, 0x0362 }, { 0x0483, 0x0486 },
       
    36 <a name="l00026"></a>00026         { 0x0488, 0x0489 }, { 0x0591, 0x05A1 }, { 0x05A3, 0x05B9 },
       
    37 <a name="l00027"></a>00027         { 0x05BB, 0x05BD }, { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 },
       
    38 <a name="l00028"></a>00028         { 0x05C4, 0x05C4 }, { 0x064B, 0x0655 }, { 0x0670, 0x0670 },
       
    39 <a name="l00029"></a>00029         { 0x06D6, 0x06E4 }, { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED },
       
    40 <a name="l00030"></a>00030         { 0x070F, 0x070F }, { 0x0711, 0x0711 }, { 0x0730, 0x074A },
       
    41 <a name="l00031"></a>00031         { 0x07A6, 0x07B0 }, { 0x0901, 0x0902 }, { 0x093C, 0x093C },
       
    42 <a name="l00032"></a>00032         { 0x0941, 0x0948 }, { 0x094D, 0x094D }, { 0x0951, 0x0954 },
       
    43 <a name="l00033"></a>00033         { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, { 0x09BC, 0x09BC },
       
    44 <a name="l00034"></a>00034         { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 },
       
    45 <a name="l00035"></a>00035         { 0x0A02, 0x0A02 }, { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 },
       
    46 <a name="l00036"></a>00036         { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A70, 0x0A71 },
       
    47 <a name="l00037"></a>00037         { 0x0A81, 0x0A82 }, { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 },
       
    48 <a name="l00038"></a>00038         { 0x0AC7, 0x0AC8 }, { 0x0ACD, 0x0ACD }, { 0x0B01, 0x0B01 },
       
    49 <a name="l00039"></a>00039         { 0x0B3C, 0x0B3C }, { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B43 },
       
    50 <a name="l00040"></a>00040         { 0x0B4D, 0x0B4D }, { 0x0B56, 0x0B56 }, { 0x0B82, 0x0B82 },
       
    51 <a name="l00041"></a>00041         { 0x0BC0, 0x0BC0 }, { 0x0BCD, 0x0BCD }, { 0x0C3E, 0x0C40 },
       
    52 <a name="l00042"></a>00042         { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, { 0x0C55, 0x0C56 },
       
    53 <a name="l00043"></a>00043         { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD },
       
    54 <a name="l00044"></a>00044         { 0x0D41, 0x0D43 }, { 0x0D4D, 0x0D4D }, { 0x0DCA, 0x0DCA },
       
    55 <a name="l00045"></a>00045         { 0x0DD2, 0x0DD4 }, { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 },
       
    56 <a name="l00046"></a>00046         { 0x0E34, 0x0E3A }, { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 },
       
    57 <a name="l00047"></a>00047         { 0x0EB4, 0x0EB9 }, { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD },
       
    58 <a name="l00048"></a>00048         { 0x0F18, 0x0F19 }, { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 },
       
    59 <a name="l00049"></a>00049         { 0x0F39, 0x0F39 }, { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 },
       
    60 <a name="l00050"></a>00050         { 0x0F86, 0x0F87 }, { 0x0F90, 0x0F97 }, { 0x0F99, 0x0FBC },
       
    61 <a name="l00051"></a>00051         { 0x0FC6, 0x0FC6 }, { 0x102D, 0x1030 }, { 0x1032, 0x1032 },
       
    62 <a name="l00052"></a>00052         { 0x1036, 0x1037 }, { 0x1039, 0x1039 }, { 0x1058, 0x1059 },
       
    63 <a name="l00053"></a>00053         { 0x1160, 0x11FF }, { 0x17B7, 0x17BD }, { 0x17C6, 0x17C6 },
       
    64 <a name="l00054"></a>00054         { 0x17C9, 0x17D3 }, { 0x180B, 0x180E }, { 0x18A9, 0x18A9 },
       
    65 <a name="l00055"></a>00055         { 0x200B, 0x200F }, { 0x202A, 0x202E }, { 0x206A, 0x206F },
       
    66 <a name="l00056"></a>00056         { 0x20D0, 0x20E3 }, { 0x302A, 0x302F }, { 0x3099, 0x309A },
       
    67 <a name="l00057"></a>00057         { 0xFB1E, 0xFB1E }, { 0xFE20, 0xFE23 }, { 0xFEFF, 0xFEFF },
       
    68 <a name="l00058"></a>00058         { 0xFFF9, 0xFFFB }
       
    69 <a name="l00059"></a>00059 };
       
    70 <a name="l00060"></a>00060 
       
    71 <a name="l00061"></a>00061 <span class="comment">/* Auxiliary function for binary search in interval table */</span>
       
    72 <a name="l00062"></a>00062 <span class="keyword">static</span> BOOL
       
    73 <a name="l00063"></a>00063 bIsZeroWidthChar(ULONG ucs)
       
    74 <a name="l00064"></a>00064 {
       
    75 <a name="l00065"></a>00065         <span class="keywordtype">int</span> low = 0;
       
    76 <a name="l00066"></a>00066         <span class="keywordtype">int</span> high = elementsof(combining) - 1;
       
    77 <a name="l00067"></a>00067         <span class="keywordtype">int</span> mid;
       
    78 <a name="l00068"></a>00068 
       
    79 <a name="l00069"></a>00069         <span class="keywordflow">if</span> (ucs &lt; (ULONG)combining[low].first ||
       
    80 <a name="l00070"></a>00070             ucs &gt; (ULONG)combining[high].last) {
       
    81 <a name="l00071"></a>00071                 <span class="keywordflow">return</span> FALSE;
       
    82 <a name="l00072"></a>00072         }
       
    83 <a name="l00073"></a>00073 
       
    84 <a name="l00074"></a>00074         <span class="keywordflow">while</span> (high &gt;= low) {
       
    85 <a name="l00075"></a>00075                 mid = (low + high) / 2;
       
    86 <a name="l00076"></a>00076                 <span class="keywordflow">if</span> (ucs &gt; (ULONG)combining[mid].last) {
       
    87 <a name="l00077"></a>00077                         low = mid + 1;
       
    88 <a name="l00078"></a>00078                 } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (ucs &lt; (ULONG)combining[mid].first) {
       
    89 <a name="l00079"></a>00079                         high = mid - 1;
       
    90 <a name="l00080"></a>00080                 } <span class="keywordflow">else</span> {
       
    91 <a name="l00081"></a>00081                         <span class="keywordflow">return</span> TRUE;
       
    92 <a name="l00082"></a>00082                 }
       
    93 <a name="l00083"></a>00083         }
       
    94 <a name="l00084"></a>00084         <span class="keywordflow">return</span> FALSE;
       
    95 <a name="l00085"></a>00085 } <span class="comment">/* end of bIsZeroWidthChar */</span>
       
    96 <a name="l00086"></a>00086 
       
    97 <a name="l00087"></a>00087 <span class="comment">/* The following functions define the column width of an ISO 10646</span>
       
    98 <a name="l00088"></a>00088 <span class="comment"> * character as follows:</span>
       
    99 <a name="l00089"></a>00089 <span class="comment"> *</span>
       
   100 <a name="l00090"></a>00090 <span class="comment"> *    - The null character (U+0000) has a column width of 0.</span>
       
   101 <a name="l00091"></a>00091 <span class="comment"> *</span>
       
   102 <a name="l00092"></a>00092 <span class="comment"> *    - Other C0/C1 control characters and DEL will lead to a return</span>
       
   103 <a name="l00093"></a>00093 <span class="comment"> *      value of -1.</span>
       
   104 <a name="l00094"></a>00094 <span class="comment"> *</span>
       
   105 <a name="l00095"></a>00095 <span class="comment"> *    - Non-spacing and enclosing combining characters (general</span>
       
   106 <a name="l00096"></a>00096 <span class="comment"> *      category code Mn or Me in the Unicode database) have a</span>
       
   107 <a name="l00097"></a>00097 <span class="comment"> *      column width of 0.</span>
       
   108 <a name="l00098"></a>00098 <span class="comment"> *</span>
       
   109 <a name="l00099"></a>00099 <span class="comment"> *    - Other format characters (general category code Cf in the Unicode</span>
       
   110 <a name="l00100"></a>00100 <span class="comment"> *      database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.</span>
       
   111 <a name="l00101"></a>00101 <span class="comment"> *</span>
       
   112 <a name="l00102"></a>00102 <span class="comment"> *    - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)</span>
       
   113 <a name="l00103"></a>00103 <span class="comment"> *      have a column width of 0.</span>
       
   114 <a name="l00104"></a>00104 <span class="comment"> *</span>
       
   115 <a name="l00105"></a>00105 <span class="comment"> *    - Spacing characters in the East Asian Wide (W) or East Asian</span>
       
   116 <a name="l00106"></a>00106 <span class="comment"> *      FullWidth (F) category as defined in Unicode Technical</span>
       
   117 <a name="l00107"></a>00107 <span class="comment"> *      Report #11 have a column width of 2.</span>
       
   118 <a name="l00108"></a>00108 <span class="comment"> *</span>
       
   119 <a name="l00109"></a>00109 <span class="comment"> *    - All remaining characters (including all printable</span>
       
   120 <a name="l00110"></a>00110 <span class="comment"> *      ISO 8859-1 and WGL4 characters, Unicode control characters,</span>
       
   121 <a name="l00111"></a>00111 <span class="comment"> *      etc.) have a column width of 1.</span>
       
   122 <a name="l00112"></a>00112 <span class="comment"> *</span>
       
   123 <a name="l00113"></a>00113 <span class="comment"> * This implementation assumes that all characters are encoded</span>
       
   124 <a name="l00114"></a>00114 <span class="comment"> * in ISO 10646.</span>
       
   125 <a name="l00115"></a>00115 <span class="comment"> *</span>
       
   126 <a name="l00116"></a>00116 <span class="comment"> * This function is not named wcwidth() to prevent name clashes</span>
       
   127 <a name="l00117"></a>00117 <span class="comment"> */</span>
       
   128 <a name="l00118"></a>00118 <span class="keyword">static</span> <span class="keywordtype">int</span>
       
   129 <a name="l00119"></a>00119 iWcWidth(ULONG ucs)
       
   130 <a name="l00120"></a>00120 {
       
   131 <a name="l00121"></a>00121         <span class="comment">/* Test for 8-bit control characters */</span>
       
   132 <a name="l00122"></a>00122         <span class="keywordflow">if</span> (ucs == 0) {
       
   133 <a name="l00123"></a>00123                 <span class="keywordflow">return</span> 0;
       
   134 <a name="l00124"></a>00124         }
       
   135 <a name="l00125"></a>00125         <span class="keywordflow">if</span> (ucs &lt; 0x20 || (ucs &gt;= 0x7f &amp;&amp; ucs &lt; 0xa0)) {
       
   136 <a name="l00126"></a>00126                 NO_DBG_HEX(ucs);
       
   137 <a name="l00127"></a>00127                 <span class="keywordflow">return</span> -1;
       
   138 <a name="l00128"></a>00128         }
       
   139 <a name="l00129"></a>00129 
       
   140 <a name="l00130"></a>00130         <span class="comment">/* Binary search in table of non-spacing characters */</span>
       
   141 <a name="l00131"></a>00131         <span class="keywordflow">if</span> (bIsZeroWidthChar(ucs)) {
       
   142 <a name="l00132"></a>00132                 <span class="keywordflow">return</span> 0;
       
   143 <a name="l00133"></a>00133         }
       
   144 <a name="l00134"></a>00134 
       
   145 <a name="l00135"></a>00135         <span class="comment">/* Ucs is not a combining or C0/C1 control character */</span>
       
   146 <a name="l00136"></a>00136 
       
   147 <a name="l00137"></a>00137         <span class="keywordflow">return</span> 1 +
       
   148 <a name="l00138"></a>00138         (ucs &gt;= 0x1100 &amp;&amp;
       
   149 <a name="l00139"></a>00139          (ucs &lt;= 0x115f ||                    <span class="comment">/* Hangul Jamo init. consonants */</span>
       
   150 <a name="l00140"></a>00140           (ucs &gt;= 0x2e80 &amp;&amp; ucs &lt;= 0xa4cf &amp;&amp; (ucs &amp; ~0x0011) != 0x300a &amp;&amp;
       
   151 <a name="l00141"></a>00141            ucs != 0x303f) ||                  <span class="comment">/* CJK ... Yi */</span>
       
   152 <a name="l00142"></a>00142           (ucs &gt;= 0xac00 &amp;&amp; ucs &lt;= 0xd7a3) || <span class="comment">/* Hangul Syllables */</span>
       
   153 <a name="l00143"></a>00143           (ucs &gt;= 0xf900 &amp;&amp; ucs &lt;= 0xfaff) || <span class="comment">/* CJK Compatibility Ideographs */</span>
       
   154 <a name="l00144"></a>00144           (ucs &gt;= 0xfe30 &amp;&amp; ucs &lt;= 0xfe6f) || <span class="comment">/* CJK Compatibility Forms */</span>
       
   155 <a name="l00145"></a>00145           (ucs &gt;= 0xff00 &amp;&amp; ucs &lt;= 0xff5f) || <span class="comment">/* Fullwidth Forms */</span>
       
   156 <a name="l00146"></a>00146           (ucs &gt;= 0xffe0 &amp;&amp; ucs &lt;= 0xffe6) ||
       
   157 <a name="l00147"></a>00147           (ucs &gt;= 0x20000 &amp;&amp; ucs &lt;= 0x2ffff)));
       
   158 <a name="l00148"></a>00148 } <span class="comment">/* end of iWcWidth */</span>
       
   159 <a name="l00149"></a>00149 
       
   160 <a name="l00150"></a>00150 <span class="comment">/*</span>
       
   161 <a name="l00151"></a>00151 <span class="comment"> * utf8_to_ucs - convert from UTF-8 to UCS</span>
       
   162 <a name="l00152"></a>00152 <span class="comment"> *</span>
       
   163 <a name="l00153"></a>00153 <span class="comment"> * Returns the UCS character,</span>
       
   164 <a name="l00154"></a>00154 <span class="comment"> * Fills in the number of bytes in the UTF-8 character</span>
       
   165 <a name="l00155"></a>00155 <span class="comment"> */</span>
       
   166 <a name="l00156"></a>00156 <span class="keyword">static</span> ULONG
       
   167 <a name="l00157"></a>00157 utf8_to_ucs(<span class="keyword">const</span> <span class="keywordtype">char</span> *p, <span class="keywordtype">int</span> iStrLen, <span class="keywordtype">int</span> *piUtfLen)
       
   168 <a name="l00158"></a>00158 {
       
   169 <a name="l00159"></a>00159         ULONG   ulUcs;
       
   170 <a name="l00160"></a>00160         <span class="keywordtype">int</span>     iIndex, iCharLen;
       
   171 <a name="l00161"></a>00161 
       
   172 <a name="l00162"></a>00162         fail(p == NULL || piUtfLen == NULL);
       
   173 <a name="l00163"></a>00163         fail(iStrLen &lt; 1);
       
   174 <a name="l00164"></a>00164 
       
   175 <a name="l00165"></a>00165         ulUcs = (ULONG)(UCHAR)p[0];
       
   176 <a name="l00166"></a>00166 
       
   177 <a name="l00167"></a>00167         <span class="keywordflow">if</span> (ulUcs &lt; 0x80) {
       
   178 <a name="l00168"></a>00168                 *piUtfLen = 1;
       
   179 <a name="l00169"></a>00169                 <span class="keywordflow">return</span> ulUcs;
       
   180 <a name="l00170"></a>00170         }
       
   181 <a name="l00171"></a>00171 
       
   182 <a name="l00172"></a>00172         <span class="keywordflow">if</span> (ulUcs &lt; 0xe0){
       
   183 <a name="l00173"></a>00173                 iCharLen = 2;
       
   184 <a name="l00174"></a>00174                 ulUcs &amp;= 0x1f;
       
   185 <a name="l00175"></a>00175         } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (ulUcs &lt; 0xf0){
       
   186 <a name="l00176"></a>00176                 iCharLen = 3;
       
   187 <a name="l00177"></a>00177                 ulUcs &amp;= 0x0f;
       
   188 <a name="l00178"></a>00178         } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (ulUcs &lt; 0xf8){
       
   189 <a name="l00179"></a>00179                 iCharLen = 4;
       
   190 <a name="l00180"></a>00180                 ulUcs &amp;= 0x07;
       
   191 <a name="l00181"></a>00181         } <span class="keywordflow">else</span> <span class="keywordflow">if</span> (ulUcs &lt; 0xfc){
       
   192 <a name="l00182"></a>00182                 iCharLen = 5;
       
   193 <a name="l00183"></a>00183                 ulUcs &amp;= 0x03;
       
   194 <a name="l00184"></a>00184         } <span class="keywordflow">else</span> {
       
   195 <a name="l00185"></a>00185                 iCharLen = 6;
       
   196 <a name="l00186"></a>00186                 ulUcs &amp;= 0x01;
       
   197 <a name="l00187"></a>00187         }
       
   198 <a name="l00188"></a>00188         <span class="keywordflow">for</span> (iIndex = 1; iIndex &lt; iCharLen; iIndex++) {
       
   199 <a name="l00189"></a>00189                 ulUcs &lt;&lt;= 6;
       
   200 <a name="l00190"></a>00190                 <span class="keywordflow">if</span> (iIndex &lt; iStrLen) {
       
   201 <a name="l00191"></a>00191                         ulUcs |= (ULONG)(UCHAR)p[iIndex] &amp; 0x3f;
       
   202 <a name="l00192"></a>00192                 }
       
   203 <a name="l00193"></a>00193         }
       
   204 <a name="l00194"></a>00194         *piUtfLen = iCharLen;
       
   205 <a name="l00195"></a>00195         <span class="keywordflow">return</span> ulUcs;
       
   206 <a name="l00196"></a>00196 } <span class="comment">/* end of utf8_to_ucs */</span>
       
   207 <a name="l00197"></a>00197 
       
   208 <a name="l00198"></a>00198 <span class="comment">/*</span>
       
   209 <a name="l00199"></a>00199 <span class="comment"> * utf8_strwidth - compute the string width of an UTF-8 string</span>
       
   210 <a name="l00200"></a>00200 <span class="comment"> *</span>
       
   211 <a name="l00201"></a>00201 <span class="comment"> * Returns the string width in columns</span>
       
   212 <a name="l00202"></a>00202 <span class="comment"> */</span>
       
   213 <a name="l00203"></a>00203 <span class="keywordtype">long</span>
       
   214 <a name="l00204"></a>00204 utf8_strwidth(<span class="keyword">const</span> <span class="keywordtype">char</span> *pcString, <span class="keywordtype">size_t</span> tNumchars)
       
   215 <a name="l00205"></a>00205 {
       
   216 <a name="l00206"></a>00206         ULONG   ulUcs;
       
   217 <a name="l00207"></a>00207         <span class="keywordtype">long</span>    lTotal;
       
   218 <a name="l00208"></a>00208         <span class="keywordtype">int</span>     iToGo, iWidth, iUtflen;
       
   219 <a name="l00209"></a>00209 
       
   220 <a name="l00210"></a>00210         fail(pcString == NULL || tNumchars &gt; (<span class="keywordtype">size_t</span>)INT_MAX);
       
   221 <a name="l00211"></a>00211 
       
   222 <a name="l00212"></a>00212         lTotal = 0;
       
   223 <a name="l00213"></a>00213         iToGo = (int)tNumchars;
       
   224 <a name="l00214"></a>00214 
       
   225 <a name="l00215"></a>00215         <span class="keywordflow">while</span> (iToGo &gt; 0 &amp;&amp; *pcString != <span class="charliteral">&#39;\0&#39;</span>) {
       
   226 <a name="l00216"></a>00216                 ulUcs = utf8_to_ucs(pcString, iToGo, &amp;iUtflen);
       
   227 <a name="l00217"></a>00217                 iWidth = iWcWidth(ulUcs);
       
   228 <a name="l00218"></a>00218                 <span class="keywordflow">if</span> (iWidth &gt; 0) {
       
   229 <a name="l00219"></a>00219                         lTotal += iWidth;
       
   230 <a name="l00220"></a>00220                 }
       
   231 <a name="l00221"></a>00221                 pcString += iUtflen;
       
   232 <a name="l00222"></a>00222                 iToGo -= iUtflen;
       
   233 <a name="l00223"></a>00223         }
       
   234 <a name="l00224"></a>00224         NO_DBG_DEC(lTotal);
       
   235 <a name="l00225"></a>00225         <span class="keywordflow">return</span> lTotal;
       
   236 <a name="l00226"></a>00226 } <span class="comment">/* end of utf8_strwidth */</span>
       
   237 <a name="l00227"></a>00227 
       
   238 <a name="l00228"></a>00228 <span class="comment">/*</span>
       
   239 <a name="l00229"></a>00229 <span class="comment"> * utf8_chrlength - get the number of bytes in an UTF-8 character</span>
       
   240 <a name="l00230"></a>00230 <span class="comment"> *</span>
       
   241 <a name="l00231"></a>00231 <span class="comment"> * Returns the number of bytes</span>
       
   242 <a name="l00232"></a>00232 <span class="comment"> */</span>
       
   243 <a name="l00233"></a>00233 <span class="keywordtype">int</span>
       
   244 <a name="l00234"></a>00234 utf8_chrlength(<span class="keyword">const</span> <span class="keywordtype">char</span> *p)
       
   245 <a name="l00235"></a>00235 {
       
   246 <a name="l00236"></a>00236         <span class="keywordtype">int</span>     iUtflen;
       
   247 <a name="l00237"></a>00237 
       
   248 <a name="l00238"></a>00238         fail(p == NULL);
       
   249 <a name="l00239"></a>00239 
       
   250 <a name="l00240"></a>00240         iUtflen = -1;           <span class="comment">/* Just to make sure */</span>
       
   251 <a name="l00241"></a>00241         (void)utf8_to_ucs(p, INT_MAX, &amp;iUtflen);
       
   252 <a name="l00242"></a>00242         NO_DBG_DEC(iUtflen);
       
   253 <a name="l00243"></a>00243         <span class="keywordflow">return</span> iUtflen;
       
   254 <a name="l00244"></a>00244 } <span class="comment">/* end of utf8_chrlength */</span>
       
   255 <a name="l00245"></a>00245 
       
   256 <a name="l00246"></a>00246 <span class="comment">/*</span>
       
   257 <a name="l00247"></a>00247 <span class="comment"> * is_locale_utf8 - return TRUE if the locale is UTF-8</span>
       
   258 <a name="l00248"></a>00248 <span class="comment"> */</span>
       
   259 <a name="l00249"></a>00249 BOOL
       
   260 <a name="l00250"></a>00250 is_locale_utf8(<span class="keywordtype">void</span>)
       
   261 <a name="l00251"></a>00251 {
       
   262 <a name="l00252"></a>00252         <span class="keywordtype">char</span>    szCodeset[20];
       
   263 <a name="l00253"></a>00253 
       
   264 <a name="l00254"></a>00254         szCodeset[0] = <span class="charliteral">&#39;\0&#39;</span>;
       
   265 <a name="l00255"></a>00255         <span class="keywordflow">if</span> (!bGetNormalizedCodeset(szCodeset, <span class="keyword">sizeof</span>(szCodeset), NULL)) {
       
   266 <a name="l00256"></a>00256                 <span class="keywordflow">return</span> FALSE;
       
   267 <a name="l00257"></a>00257         }
       
   268 <a name="l00258"></a>00258         DBG_MSG(szCodeset);
       
   269 <a name="l00259"></a>00259         <span class="keywordflow">return</span> STREQ(szCodeset, <span class="stringliteral">&quot;utf8&quot;</span>);
       
   270 <a name="l00260"></a>00260 } <span class="comment">/* end of is_locale_utf8 */</span>
       
   271 </pre></div></div>
       
   272 <hr size="1"/><address style="text-align: right;"><small>Generated by&nbsp;
       
   273 <a href="http://www.doxygen.org/index.html">
       
   274 <img class="footer" src="doxygen.png" alt="doxygen"/></a> 1.6.2 </small></address>
       
   275 </body>
       
   276 </html>