|
1 /* |
|
2 * Copyright (c) 1995-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of the License "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 |
|
18 #include <stdio.h> |
|
19 #include <stdlib.h> |
|
20 #include <string.h> |
|
21 #include <string> |
|
22 #include <new> |
|
23 |
|
24 #ifdef WIN32 |
|
25 #include <windows.h> |
|
26 #else |
|
27 #include <strings.h> |
|
28 #include <iconv.h> |
|
29 #endif |
|
30 |
|
31 #include "uniconv.hpp" |
|
32 |
|
33 |
|
34 int UniConv::DefaultCodePage2UTF8(const char* DCPStringRef, unsigned int DCPLength, char** UTF8StringRef, unsigned int* UTFLength) throw () |
|
35 { |
|
36 int reslen = -1; |
|
37 |
|
38 if(!UTF8StringRef || !UTFLength || !DCPStringRef) |
|
39 return (int)-1; |
|
40 |
|
41 #ifdef WIN32 |
|
42 //make Unicode string from its default code page |
|
43 reslen = MultiByteToWideChar(CP_ACP, 0, DCPStringRef, DCPLength, NULL, 0); |
|
44 if(0 == reslen) |
|
45 return (int)-1; |
|
46 WCHAR* unistr = new (std::nothrow) WCHAR[reslen+1]; |
|
47 if(!unistr) |
|
48 return (int)-1; |
|
49 |
|
50 reslen = MultiByteToWideChar(CP_ACP, 0, DCPStringRef, DCPLength, unistr, reslen); |
|
51 if(0 == reslen) |
|
52 { |
|
53 delete[] unistr; |
|
54 return (int)-1; |
|
55 } |
|
56 |
|
57 //make UTF-8 string from its Unicode encoding |
|
58 unsigned int realutflen = 0; |
|
59 realutflen = WideCharToMultiByte(CP_UTF8, 0, unistr, reslen, NULL, 0, NULL, NULL); |
|
60 if(0 == realutflen) |
|
61 { |
|
62 delete[] unistr; |
|
63 return (int)-1; |
|
64 } |
|
65 if(realutflen+1 > *UTFLength) |
|
66 { |
|
67 if(*UTF8StringRef) |
|
68 delete[] *UTF8StringRef; |
|
69 *UTF8StringRef = new (std::nothrow) char[realutflen+1]; |
|
70 if(!*UTF8StringRef) |
|
71 { |
|
72 delete[] unistr; |
|
73 *UTFLength = 0; |
|
74 return (int)-1; |
|
75 } |
|
76 } |
|
77 *UTFLength = realutflen; |
|
78 reslen = WideCharToMultiByte(CP_UTF8, 0, unistr, reslen, *UTF8StringRef, *UTFLength, NULL, NULL); |
|
79 (*UTF8StringRef)[realutflen] = 0; |
|
80 |
|
81 if(0 == reslen) |
|
82 reslen = (int)-1; |
|
83 |
|
84 //clean up temporarily allocated resources |
|
85 delete[] unistr; |
|
86 #else |
|
87 //character set format: language[_territory][.codeset][@modifier] |
|
88 char* dcp = getenv("LANG"); |
|
89 if(!dcp) |
|
90 return (int)-1; |
|
91 char* dot = strstr(dcp, "."); |
|
92 if(dot) |
|
93 dcp += ((dot-dcp) + 1); |
|
94 char* atmark = strstr(dcp, "@"); |
|
95 if(atmark) |
|
96 *(atmark) = 0; |
|
97 if(strcasecmp(dcp, "UTF-8") == 0) |
|
98 { |
|
99 strcpy(*UTF8StringRef, DCPStringRef); |
|
100 *UTFLength = DCPLength; |
|
101 return DCPLength; |
|
102 } |
|
103 iconv_t convhan = iconv_open("UTF-8", dcp); |
|
104 if((iconv_t)(-1) == convhan) |
|
105 return (int)-1; |
|
106 char* utf8str = new (std::nothrow) char[DCPLength*4]; |
|
107 if(!utf8str) |
|
108 { |
|
109 iconv_close(convhan); |
|
110 return (int)-1; |
|
111 } |
|
112 int realutflen = DCPLength*4; |
|
113 int origLen = realutflen; |
|
114 char* pout = utf8str; |
|
115 if(iconv(convhan, const_cast<char**>(&DCPStringRef), (size_t*)&DCPLength, &pout, (size_t*)&realutflen) < 0) |
|
116 { |
|
117 iconv_close(convhan); |
|
118 delete[] utf8str; |
|
119 return (int)-1; |
|
120 } |
|
121 realutflen = origLen - realutflen; |
|
122 if((unsigned int)(realutflen+1) > *UTFLength) |
|
123 { |
|
124 if(*UTF8StringRef) |
|
125 delete[] *UTF8StringRef; |
|
126 *UTF8StringRef = new (std::nothrow) char[realutflen+1]; |
|
127 if(!*UTF8StringRef) |
|
128 { |
|
129 delete[] utf8str; |
|
130 iconv_close(convhan); |
|
131 return (int)-1; |
|
132 } |
|
133 } |
|
134 strncpy(*UTF8StringRef, utf8str, realutflen); |
|
135 (*UTF8StringRef)[realutflen] = 0; |
|
136 *UTFLength = realutflen; |
|
137 reslen = realutflen; |
|
138 delete[] utf8str; |
|
139 iconv_close(convhan); |
|
140 #endif |
|
141 |
|
142 return reslen; |
|
143 } |
|
144 |
|
145 int UniConv::UTF82DefaultCodePage(const char* UTF8StringRef, unsigned int UTFLength, char** DCPStringRef, unsigned int* DCPLength) throw () |
|
146 { |
|
147 int reslen = -1; |
|
148 |
|
149 if(!DCPStringRef || !DCPLength || !UTF8StringRef) |
|
150 return (int)-1; |
|
151 |
|
152 #ifdef WIN32 |
|
153 //make Unicode string from its UTF-8 encoding |
|
154 reslen = MultiByteToWideChar(CP_UTF8, 0, UTF8StringRef, UTFLength, NULL, 0); |
|
155 if(0 == reslen) |
|
156 return (int)-1; |
|
157 WCHAR* unistr = new (std::nothrow) WCHAR[reslen+1]; |
|
158 if(!unistr) |
|
159 return (int)-1; |
|
160 |
|
161 reslen = MultiByteToWideChar(CP_UTF8, 0, UTF8StringRef, UTFLength, unistr, reslen); |
|
162 if(0 == reslen) |
|
163 { |
|
164 delete[] unistr; |
|
165 return (int)-1; |
|
166 } |
|
167 |
|
168 //make default code paged string from its Unicode encoding |
|
169 unsigned int realdcplen = 0; |
|
170 realdcplen = WideCharToMultiByte(CP_ACP, 0, unistr, reslen, NULL, 0, NULL, NULL); |
|
171 if(0 == realdcplen) |
|
172 { |
|
173 delete[] unistr; |
|
174 return (int)-1; |
|
175 } |
|
176 if(realdcplen+1 > *DCPLength) |
|
177 { |
|
178 if(*DCPStringRef) |
|
179 delete[] *DCPStringRef; |
|
180 *DCPStringRef = new (std::nothrow) char[realdcplen+1]; |
|
181 if(!*DCPStringRef) |
|
182 { |
|
183 delete[] unistr; |
|
184 *DCPLength = 0; |
|
185 return (int)-1; |
|
186 } |
|
187 } |
|
188 *DCPLength = realdcplen; |
|
189 reslen = WideCharToMultiByte(CP_ACP, 0, unistr, reslen, *DCPStringRef, *DCPLength, NULL, NULL); |
|
190 (*DCPStringRef)[realdcplen] = 0; |
|
191 |
|
192 if(0 == reslen) |
|
193 reslen = (int)-1; |
|
194 |
|
195 //clean up temporarily allocated resources |
|
196 delete[] unistr; |
|
197 #else |
|
198 //character set format: language[_territory][.codeset][@modifier] |
|
199 char* dcp = getenv("LANG"); |
|
200 if(!dcp) |
|
201 return (int)-1; |
|
202 |
|
203 char* dot = strstr(dcp, "."); |
|
204 if(dot) |
|
205 dcp += ((dot-dcp) + 1); |
|
206 char* atmark = strstr(dcp, "@"); |
|
207 if(atmark) |
|
208 *(atmark) = 0; |
|
209 iconv_t convhan = iconv_open(dcp, "UTF-8"); |
|
210 if((iconv_t)(-1) == convhan) |
|
211 return (int)-1; |
|
212 char* dcpstr = new (std::nothrow) char[UTFLength*4]; |
|
213 if(!dcpstr) |
|
214 { |
|
215 iconv_close(convhan); |
|
216 return (int)-1; |
|
217 } |
|
218 int realdcplen = UTFLength*4; |
|
219 int origLen = realdcplen; |
|
220 char* pout = dcpstr; |
|
221 if(iconv(convhan, const_cast<char**>(&UTF8StringRef), (size_t*)&UTFLength, &pout, (size_t*)&realdcplen) < 0) |
|
222 { |
|
223 iconv_close(convhan); |
|
224 delete[] dcpstr; |
|
225 return (int)-1; |
|
226 } |
|
227 realdcplen = origLen - realdcplen; |
|
228 if((unsigned int)(realdcplen+1) > *DCPLength) |
|
229 { |
|
230 if(*DCPStringRef) |
|
231 delete[] *DCPStringRef; |
|
232 *DCPStringRef = new (std::nothrow) char[realdcplen+1]; |
|
233 if(!*DCPStringRef) |
|
234 { |
|
235 delete[] dcpstr; |
|
236 iconv_close(convhan); |
|
237 return (int)-1; |
|
238 } |
|
239 } |
|
240 strncpy(*DCPStringRef, dcpstr, realdcplen); |
|
241 (*DCPStringRef)[realdcplen] = 0; |
|
242 *DCPLength = realdcplen; |
|
243 reslen = realdcplen; |
|
244 delete[] dcpstr; |
|
245 iconv_close(convhan); |
|
246 #endif |
|
247 |
|
248 return reslen; |
|
249 } |
|
250 |
|
251 |
|
252 bool UniConv::IsPureASCIITextStream(const char* StringRef) throw () |
|
253 { |
|
254 while (*StringRef && !(*StringRef++ & 0x80)) |
|
255 ; |
|
256 if (*StringRef) |
|
257 return false; |
|
258 else |
|
259 return true; |
|
260 } |