|
1 /* |
|
2 * Copyright (c) 2007-2009 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of the License "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * Unicode converter codes |
|
16 * |
|
17 */ |
|
18 |
|
19 |
|
20 /** |
|
21 @file |
|
22 @internalComponent |
|
23 */ |
|
24 |
|
25 #ifndef __UNICODE_CONVERTOR_H__ |
|
26 #define __UNICODE_CONVERTOR_H__ |
|
27 #if _MSC_VER <= 1200 |
|
28 #define FIX_VC6_BROKEN_FSTREAM |
|
29 #endif |
|
30 |
|
31 // The default C++ wide streams narrow all wchar_t to char before writing to disk!!!! |
|
32 // This file defeats the narrowing, whilst still mapping NL <-> CR/NL. |
|
33 |
|
34 // This code is hard coded to use little endian UTF-16. |
|
35 |
|
36 // You probably want to #define FIX_VC6_BROKEN_FSTREAM then |
|
37 // include this file, and do the following to create an output stream:- |
|
38 // |
|
39 // std::wofstream out; |
|
40 // locale loc = _ADDFAC(locale::classic(), new NullCodecvt); |
|
41 // |
|
42 // out.imbue(loc); |
|
43 // out.open("c:\\test.txt", std::ios_base::out|std::ios_base::trunc|std::ios_base::binary); |
|
44 // out << wchar_t(0xfeff); // UTF-16 BOM - swapped by NullCondecvt to ff,fe == little endian |
|
45 // |
|
46 // Input streams should work as well, but the user will have to discard the BOM (if present). |
|
47 |
|
48 #include <iostream> |
|
49 #ifdef FIX_VC6_BROKEN_FSTREAM |
|
50 #define fwrite(a,b,c,d) fwrite_hack(a,b,c,d) |
|
51 #endif |
|
52 #include <fstream> |
|
53 #ifdef FIX_VC6_BROKEN_FSTREAM |
|
54 #undef fwrite |
|
55 static int fwrite_hack(const void *buffer, size_t size, size_t num, FILE *stream) |
|
56 { |
|
57 // fstream _Fputc does a call of the form fwrite(p, N, 1, fs) == 1 for noconv data |
|
58 // fstream uses call of the form fwrite(p, N, 1, fs) == N for partial or ok data. |
|
59 // The second call is incorrect! |
|
60 // As a hack for VC6 we always say we converted (so _FPutc is not used), |
|
61 // and swap the arguments to fwrite... |
|
62 return fwrite(buffer, num, size, stream); // write num 1 byte elements |
|
63 } |
|
64 #endif |
|
65 |
|
66 using namespace std; |
|
67 |
|
68 typedef codecvt<wchar_t, char, mbstate_t> NullCodecvtBase; |
|
69 |
|
70 // CLASS NullCodecvt |
|
71 class NullCodecvt : public NullCodecvtBase |
|
72 { |
|
73 public: |
|
74 explicit NullCodecvt(size_t _R = 0) |
|
75 : NullCodecvtBase(_R) {} |
|
76 |
|
77 protected: |
|
78 virtual result do_in(mbstate_t& _State, |
|
79 const char *fromStart, const char *fromEnd, const char *& fromNext, |
|
80 wchar_t *toStart, wchar_t *toLimit, wchar_t *& toNext) const |
|
81 |
|
82 { |
|
83 fromNext = fromStart; |
|
84 toNext = toStart; |
|
85 |
|
86 result r = noconv; |
|
87 |
|
88 int fromBytes = fromEnd - fromStart; |
|
89 int toWChars = toLimit - toStart; |
|
90 while((fromBytes >= 2) && (toWChars >= 1)) |
|
91 { |
|
92 wchar_t wch = (fromNext[0] << 8) | fromNext[1]; |
|
93 fromNext += 2; |
|
94 fromBytes -= 2; |
|
95 |
|
96 if(wch == '\r') |
|
97 { |
|
98 // Drop CR characters, so did at least partial conversion |
|
99 r = partial; |
|
100 continue; |
|
101 } |
|
102 |
|
103 *toNext++ = wch; |
|
104 --toWChars; |
|
105 } |
|
106 |
|
107 if(r == partial) |
|
108 { |
|
109 // Dropped some CR characters |
|
110 if(fromBytes == 0) |
|
111 { |
|
112 // and converted all input |
|
113 r = ok; |
|
114 } |
|
115 } |
|
116 return r; |
|
117 } |
|
118 |
|
119 virtual result do_out(mbstate_t& _State, |
|
120 const wchar_t *fromStart, const wchar_t *fromEnd, const wchar_t *& fromNext, |
|
121 char *toStart, char *toLimit, char *& toNext) const |
|
122 { |
|
123 fromNext = fromStart; |
|
124 toNext = toStart; |
|
125 |
|
126 result r = noconv; |
|
127 #ifdef FIX_VC6_BROKEN_FSTREAM |
|
128 r = partial; |
|
129 #endif |
|
130 |
|
131 int fromWChars = fromEnd - fromStart; |
|
132 int toBytes = toLimit - toStart; |
|
133 while((fromWChars >= 1) && (toBytes >= 2)) |
|
134 { |
|
135 if(*fromNext == '\r') |
|
136 { |
|
137 r = partial; |
|
138 // Do not expect CR internally, skip |
|
139 ++fromNext; |
|
140 --fromWChars; |
|
141 continue; |
|
142 } |
|
143 |
|
144 if(*fromNext == '\n') |
|
145 { |
|
146 r = partial; |
|
147 if(toBytes < 4) |
|
148 { |
|
149 // Not enough space!!!! |
|
150 // Hopefully they will call us again with at least do_max_length() bytes (ie. 4) |
|
151 break; |
|
152 } |
|
153 *toNext++ = 0x0d; --toBytes; // 16 bit CR (little endian) |
|
154 *toNext++ = 0x00; --toBytes; |
|
155 *toNext++ = 0x0a; --toBytes; // 16 bit NL |
|
156 *toNext++ = 0x00; --toBytes; |
|
157 ++fromNext; |
|
158 --fromWChars; |
|
159 continue; |
|
160 } |
|
161 |
|
162 *toNext++ = (*fromNext & 0x00ff); --toBytes; // little endian |
|
163 *toNext++ = (*fromNext & 0xff00) >> 8; --toBytes; |
|
164 ++fromNext; |
|
165 --fromWChars; |
|
166 } |
|
167 |
|
168 if( (r==partial) && (fromWChars == 0)) |
|
169 { |
|
170 r = ok; |
|
171 } |
|
172 return r; |
|
173 } |
|
174 |
|
175 virtual result do_unshift(mbstate_t& _State, |
|
176 char *_F2, char *_L2, char *& _Mid2) const |
|
177 |
|
178 { return noconv; } |
|
179 |
|
180 virtual int do_length(mbstate_t& _State, const char *from, |
|
181 const char *fromEnd, size_t maxInternal) const _THROW0() |
|
182 { |
|
183 int conWChars = 0; |
|
184 const char *fromNext = from; |
|
185 while(((fromEnd-fromNext) >= 2) && (conWChars < maxInternal)) |
|
186 { |
|
187 wchar_t wch = (fromNext[0] << 8) | fromNext[1]; |
|
188 fromNext += 2; |
|
189 |
|
190 if(wch == '\r') |
|
191 { |
|
192 // Drop CR characters, so did at least partial conversion |
|
193 continue; |
|
194 } |
|
195 |
|
196 ++conWChars; |
|
197 } |
|
198 return fromNext - from; |
|
199 } |
|
200 |
|
201 virtual bool do_always_noconv() const _THROW0() |
|
202 { |
|
203 // Sometimes we do CRNL -> NL conversion |
|
204 return false; |
|
205 } |
|
206 |
|
207 virtual int do_max_length() const _THROW0() |
|
208 { |
|
209 // This is documented as the max number of external chars (bytes) that could |
|
210 // be consumed to create a single wchar_t..... |
|
211 // We convert 1 to 1, except for dropping CRs. This probably only, at worst, |
|
212 // converts 4 external bytes (16bit CR, 16bit NL) to a single 16 bit NL, |
|
213 // but we could consume infinite chars.... |
|
214 // We will, at worst, convert a single wchar_t to 4 external chars (bytes) |
|
215 return 4; |
|
216 } |
|
217 |
|
218 virtual int do_encoding() const _THROW0() |
|
219 { |
|
220 return 0; // Variable length encodings |
|
221 } |
|
222 |
|
223 }; |
|
224 |
|
225 |
|
226 #endif |