|
1 /* |
|
2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of the License "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * Note: This file may contain code to generate corrupt files for test purposes. |
|
16 * Such code is excluded from production builds by use of compiler defines; |
|
17 * it is recommended that such code should be removed if this code is ever published publicly. |
|
18 * INCLUDES |
|
19 * |
|
20 */ |
|
21 |
|
22 |
|
23 /** |
|
24 @file |
|
25 @internalComponent |
|
26 @released |
|
27 */ |
|
28 |
|
29 #include "utils.h" |
|
30 #include <wchar.h> |
|
31 #include "utf8.h" |
|
32 #include "utility_interface.h" |
|
33 |
|
34 #define TMP_FILE_STUB L"~si" |
|
35 |
|
36 // =========================================================================== |
|
37 // GLOBAL UTILS FUNCTIONS |
|
38 // =========================================================================== |
|
39 |
|
40 bool FileIsUnicode(LPCWSTR fileName, TEncodingScheme& encScheme) |
|
41 // check whether a text file is in UNICODE format & whether little/big-endian or utf8 |
|
42 { |
|
43 BYTE pBuf[3] = { 0,0,0 }; |
|
44 DWORD dwNumBytes; |
|
45 DWORD ok; |
|
46 const BYTE cUtf8[] = { 0xEF,0xBB, 0xBF }; |
|
47 const BYTE cUcs2LE[] = { 0xFF,0xFE }; |
|
48 const BYTE cUcs2BE[] = { 0xFE,0xFF }; |
|
49 |
|
50 HANDLE hFile = ::MakeSISOpenFile(fileName, GENERIC_READ, OPEN_EXISTING); |
|
51 if(hFile == INVALID_HANDLE_VALUE) |
|
52 throw ErrCannotOpenFile; |
|
53 |
|
54 // Make sure we're at the beginning of the file |
|
55 ::SetFilePointer(hFile, 0L, NULL, FILE_BEGIN); |
|
56 |
|
57 ok=::ReadFile(hFile, (LPVOID)pBuf, sizeof(pBuf), &dwNumBytes, NULL); |
|
58 |
|
59 ::CloseHandle(hFile); |
|
60 |
|
61 if (!ok) throw ErrCannotReadFile; |
|
62 |
|
63 if (dwNumBytes>=2 && memcmp(pBuf,cUcs2LE,sizeof(cUcs2LE)) == 0) |
|
64 { |
|
65 encScheme=EUcs2LE; |
|
66 return true; |
|
67 } |
|
68 else if (dwNumBytes>=2 && memcmp(pBuf,cUcs2BE,sizeof(cUcs2BE)) == 0) |
|
69 { |
|
70 encScheme=EUcs2BE; |
|
71 return true; |
|
72 } |
|
73 else |
|
74 { |
|
75 if (dwNumBytes==sizeof(pBuf) && memcmp(pBuf,cUtf8,sizeof(pBuf)) == 0) |
|
76 encScheme=EUtf8; |
|
77 else |
|
78 encScheme=EAscii; |
|
79 return false; |
|
80 } |
|
81 } |
|
82 |
|
83 LPWSTR ConvertUCS2FileToUCS4(LPCWSTR fileName) |
|
84 // convert a UCS-2 file to UCS-4 format |
|
85 { |
|
86 LPWSTR pszTempSource; |
|
87 DWORD dwNumBytes; |
|
88 HANDLE hFile; |
|
89 DWORD fileSize; |
|
90 UTF16 *pBuf; |
|
91 BOOL ok; |
|
92 |
|
93 // open file & get file size |
|
94 hFile = ::MakeSISOpenFile(fileName, GENERIC_READ, OPEN_EXISTING); |
|
95 if(hFile == INVALID_HANDLE_VALUE) throw ErrCannotOpenFile; |
|
96 fileSize = ::GetFileSize(hFile, NULL); |
|
97 |
|
98 // read in whole file |
|
99 pBuf = new UTF16 [fileSize/2+1]; |
|
100 ok=::ReadFile(hFile, (LPVOID)pBuf, fileSize, &dwNumBytes, NULL); |
|
101 ::CloseHandle(hFile); |
|
102 if (!ok) throw ErrCannotReadFile; |
|
103 |
|
104 int targetLength = (fileSize/2)*sizeof(WCHAR); |
|
105 UCS4 *ptrUCS4 = new UCS4[targetLength + 1]; |
|
106 UTF16* sourceStart = reinterpret_cast<UTF16*>(pBuf); |
|
107 UTF16* sourceEnd = sourceStart + (fileSize/2)*(sizeof(WCHAR)/2); |
|
108 UCS4* targetStart = reinterpret_cast<UCS4*>(ptrUCS4); |
|
109 UCS4* targetEnd = targetStart + targetLength; |
|
110 ConvertUTF16toUCS4(&sourceStart, sourceEnd, &targetStart, targetEnd); |
|
111 int endOffset = (UCS4*)targetStart - ptrUCS4; |
|
112 targetStart = reinterpret_cast<UCS4*>(ptrUCS4) + endOffset; |
|
113 *targetStart = 0; |
|
114 |
|
115 |
|
116 // write to new temporary file |
|
117 pszTempSource=TempFileName(); |
|
118 hFile = ::MakeSISOpenFile(pszTempSource, GENERIC_WRITE|GENERIC_READ, CREATE_ALWAYS); |
|
119 if (hFile==INVALID_HANDLE_VALUE) throw ErrCannotOpenFile; |
|
120 |
|
121 ok = ::WriteFile(hFile, (LPVOID)ptrUCS4, targetLength, &dwNumBytes, NULL); |
|
122 ::CloseHandle(hFile); |
|
123 |
|
124 delete [] pBuf; |
|
125 |
|
126 if (!ok) |
|
127 { |
|
128 _wunlink(pszTempSource); |
|
129 throw ErrCannotConvertFile; |
|
130 } |
|
131 return pszTempSource; |
|
132 } |
|
133 |
|
134 |
|
135 LPWSTR ConvertUCS2FileToLittleEndianUnicode(LPCWSTR fileName) |
|
136 // convert a UCS-2 big-endian UNICODE file to a little-endian UNICODE file |
|
137 { |
|
138 LPWSTR pszTempSource; |
|
139 DWORD dwNumBytes; |
|
140 HANDLE hFile; |
|
141 DWORD fileSize; |
|
142 LPWSTR pBuf; |
|
143 BOOL ok; |
|
144 |
|
145 // open file & get file size |
|
146 hFile = ::MakeSISOpenFile(fileName, GENERIC_READ, OPEN_EXISTING); |
|
147 if(hFile == INVALID_HANDLE_VALUE) throw ErrCannotOpenFile; |
|
148 fileSize = ::GetFileSize(hFile, NULL); |
|
149 |
|
150 // read in whole file |
|
151 pBuf = new WCHAR [fileSize/2+1]; |
|
152 ok=::ReadFile(hFile, (LPVOID)pBuf, fileSize, &dwNumBytes, NULL); |
|
153 ::CloseHandle(hFile); |
|
154 if (!ok) throw ErrCannotReadFile; |
|
155 |
|
156 // convert text to little endian unicode |
|
157 for (DWORD i=0; i<(fileSize/2); i++) |
|
158 pBuf[i]=(WCHAR)(((pBuf[i]&0xFF00)>>8) | ((pBuf[i]&0xFF)<<8)); |
|
159 |
|
160 // write to new temporary file |
|
161 pszTempSource=TempFileName(); |
|
162 hFile = ::MakeSISOpenFile(pszTempSource, GENERIC_WRITE|GENERIC_READ, CREATE_ALWAYS); |
|
163 if (hFile==INVALID_HANDLE_VALUE) throw ErrCannotOpenFile; |
|
164 ok=::WriteFile(hFile, (LPVOID)pBuf, fileSize, &dwNumBytes, NULL); |
|
165 ::CloseHandle(hFile); |
|
166 delete [] pBuf; |
|
167 if (!ok) |
|
168 { |
|
169 _wunlink(pszTempSource); |
|
170 throw ErrCannotWriteFile; |
|
171 } |
|
172 return pszTempSource; |
|
173 } |
|
174 |
|
175 LPWSTR ConvertFileToUnicode(LPCWSTR fileName, TEncodingScheme encScheme) |
|
176 // convert text file to UNICODE |
|
177 { |
|
178 LPWSTR pszTempSource; |
|
179 DWORD dwNumBytes; |
|
180 HANDLE hFile; |
|
181 DWORD fileSize; |
|
182 LPSTR pNarrowBuf; |
|
183 LPWSTR pBufU; |
|
184 BOOL ok; |
|
185 |
|
186 // open file & get file size |
|
187 hFile = ::MakeSISOpenFile(fileName, GENERIC_READ, OPEN_EXISTING); |
|
188 if(hFile == INVALID_HANDLE_VALUE) throw ErrCannotOpenFile; |
|
189 fileSize = ::GetFileSize(hFile, NULL); |
|
190 |
|
191 // read in whole file |
|
192 // Linux needs mbstowcs(NULL,src,0)+1 for buffer allocation so added +1. |
|
193 pNarrowBuf = new CHAR [fileSize+1]; |
|
194 ok=::ReadFile(hFile, (LPVOID)pNarrowBuf, fileSize, &dwNumBytes, NULL); |
|
195 ::CloseHandle(hFile); |
|
196 if (!ok) throw ErrCannotReadFile; |
|
197 |
|
198 if ( EAscii == encScheme ) |
|
199 { |
|
200 if ( !CorrectUTF8(pNarrowBuf,dwNumBytes) ) |
|
201 throw ErrCannotConvertFile; |
|
202 } |
|
203 DWORD dwConvCount = ConvertMultiByteToWideChar(pNarrowBuf,dwNumBytes,0,0); |
|
204 pBufU=new WCHAR [dwConvCount]; |
|
205 |
|
206 // make sure the buffer is zeroed |
|
207 memset(pBufU,0,sizeof(WCHAR)*dwConvCount); |
|
208 |
|
209 dwConvCount = ConvertMultiByteToWideChar(pNarrowBuf,dwNumBytes,pBufU,dwConvCount); |
|
210 if ( !dwConvCount ) throw ErrCannotConvertFile; |
|
211 |
|
212 // write to new temporary file |
|
213 pszTempSource=TempFileName(); |
|
214 hFile = ::MakeSISOpenFile(pszTempSource, GENERIC_WRITE|GENERIC_READ, CREATE_ALWAYS); |
|
215 if (hFile==INVALID_HANDLE_VALUE) throw ErrCannotOpenFile; |
|
216 |
|
217 ok = ::WriteFile(hFile, (LPVOID)pBufU, dwConvCount*sizeof(WCHAR), &dwNumBytes, NULL); |
|
218 ::CloseHandle(hFile); |
|
219 |
|
220 delete [] pNarrowBuf; |
|
221 delete [] pBufU; |
|
222 if (!ok) |
|
223 { |
|
224 _wunlink(pszTempSource); |
|
225 throw ErrCannotConvertFile; |
|
226 } |
|
227 |
|
228 return pszTempSource; |
|
229 } |
|
230 |
|
231 LPWSTR TempFileName() |
|
232 // generate a unique temporary filename |
|
233 // creates a sub-directory in TEMP and uses fileName as a guide for the |
|
234 // filename |
|
235 // returns name of temporary directory if fileName is NULL |
|
236 { |
|
237 static WCHAR tmpFileName[PATHMAX]={'\0'}; |
|
238 static WCHAR tmpPath[PATHMAX]={'\0'}; |
|
239 if (*tmpPath=='\0') |
|
240 { |
|
241 GetTempPathW(PATHMAX,tmpPath); |
|
242 tmpFileName [0] = 0; |
|
243 GetTempFileNameW (tmpPath, TMP_FILE_STUB, 0, tmpFileName); |
|
244 } |
|
245 return tmpFileName; |
|
246 } |
|
247 |
|
248 bool CorrectUTF8(LPSTR pNarrowBuf, DWORD dwNumBytes) |
|
249 { |
|
250 bool bUtf8Text = true; |
|
251 int nBadUtf = 0; |
|
252 wchar_t szDbg[255] = {L"0"}; |
|
253 for (DWORD i = 1; i < dwNumBytes; i++) |
|
254 { |
|
255 if ( (pNarrowBuf[i] & 0xC0) == 0x80 ) |
|
256 // if the uppermost bit in current byte is set... |
|
257 { |
|
258 if ( (pNarrowBuf[i-1] & 0x80) == 0x00 ) |
|
259 // but previous byte has it reset... |
|
260 { |
|
261 // if current byte is not 'No-Break Space' char... |
|
262 if ( pNarrowBuf[i] != (CHAR)0xA0 ) |
|
263 { |
|
264 nBadUtf ++; |
|
265 wprintf(L"Detecting illegal UTF8 character at position 0x%02X : 0x%02X \n",i,pNarrowBuf[i]); |
|
266 } |
|
267 } |
|
268 } |
|
269 else |
|
270 // Overlong encoding: lead-byte of a 2 byte sequence, but code point <= 127 |
|
271 { |
|
272 if ( (pNarrowBuf[i-1] & 0xC0) == 0xC0 ) |
|
273 { |
|
274 nBadUtf ++; |
|
275 wprintf(L"Detecting illegal UTF8 character at position 0x%02X: 0x%02X\n",i,pNarrowBuf[i]); |
|
276 } |
|
277 } |
|
278 } |
|
279 if ( nBadUtf ) |
|
280 bUtf8Text = false; |
|
281 |
|
282 return bUtf8Text; |
|
283 } |
|
284 |