1 /* |
|
2 * Copyright (c) 2000-2005 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 /* ================================================================ */ |
|
18 /* |
|
19 File: ConvertUTF7.c |
|
20 Author: David B. Goldsmith |
|
21 Copyright (C) 1994, 1996 Taligent, Inc. All rights reserved. |
|
22 |
|
23 This code is copyrighted. Under the copyright laws, this code may not |
|
24 be copied, in whole or part, without prior written consent of Taligent. |
|
25 |
|
26 Taligent grants the right to use this code as long as this ENTIRE |
|
27 copyright notice is reproduced in the code. The code is provided |
|
28 AS-IS, AND TALIGENT DISCLAIMS ALL WARRANTIES, EITHER EXPRESS OR |
|
29 IMPLIED, INCLUDING, BUT NOT LIMITED TO IMPLIED WARRANTIES OF |
|
30 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT |
|
31 WILL TALIGENT BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING, |
|
32 WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS |
|
33 INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY |
|
34 LOSS) ARISING OUT OF THE USE OR INABILITY TO USE THIS CODE, EVEN |
|
35 IF TALIGENT HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. |
|
36 BECAUSE SOME STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF |
|
37 LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE |
|
38 LIMITATION MAY NOT APPLY TO YOU. |
|
39 |
|
40 RESTRICTED RIGHTS LEGEND: Use, duplication, or disclosure by the |
|
41 government is subject to restrictions as set forth in subparagraph |
|
42 (c)(l)(ii) of the Rights in Technical Data and Computer Software |
|
43 clause at DFARS 252.227-7013 and FAR 52.227-19. |
|
44 |
|
45 This code may be protected by one or more U.S. and International |
|
46 Patents. |
|
47 |
|
48 TRADEMARKS: Taligent and the Taligent Design Mark are registered |
|
49 trademarks of Taligent, Inc. |
|
50 */ |
|
51 |
|
52 // #include "CVTUTF7.H" // commented out by DPB |
|
53 #include "UTF7.H" // added by DPB |
|
54 #pragma warning (disable: 4706) // added by DPB (warning disabled: "assignment within conditional expression") |
|
55 |
|
56 static const char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; // "const" added by DPB |
|
57 // static short invbase64[128]; // commented out by DPB |
|
58 |
|
59 static const char direct[] = // "const" added by DPB |
|
60 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"; |
|
61 static const char optional[] = "!\"#$%&*;<=>@[]^_`{|}"; // "const" added by DPB |
|
62 static const char spaces[] = " \011\015\012"; /* space, tab, return, line feed */ // "const" added by DPB |
|
63 // static char mustshiftsafe[128]; // commented out by DPB |
|
64 // static char mustshiftopt[128]; // commented out by DPB |
|
65 |
|
66 // static int needtables = 1; // commented out by DPB |
|
67 |
|
68 #define SHIFT_IN '+' |
|
69 #define SHIFT_OUT '-' |
|
70 |
|
71 int strlen(const char* aZeroTerminatedString) // added by DPB |
|
72 { // added by DPB |
|
73 const char* character=aZeroTerminatedString; // added by DPB |
|
74 while (*character!='\0') // added by DPB |
|
75 { // added by DPB |
|
76 ++character; // added by DPB |
|
77 } // added by DPB |
|
78 return character-aZeroTerminatedString; // added by DPB |
|
79 } // added by DPB |
|
80 |
|
81 static void |
|
82 // tabinit() // commented out by DPB |
|
83 tabinit(short invbase64[], char mustshiftsafe[], char mustshiftopt[]) // added by DPB |
|
84 { |
|
85 int i, limit; |
|
86 |
|
87 for (i = 0; i < 128; ++i) |
|
88 { |
|
89 mustshiftopt[i] = mustshiftsafe[i] = 1; |
|
90 invbase64[i] = -1; |
|
91 } |
|
92 limit = strlen(direct); |
|
93 for (i = 0; i < limit; ++i) |
|
94 mustshiftopt[direct[i]] = mustshiftsafe[direct[i]] = 0; |
|
95 limit = strlen(spaces); |
|
96 for (i = 0; i < limit; ++i) |
|
97 mustshiftopt[spaces[i]] = mustshiftsafe[spaces[i]] = 0; |
|
98 limit = strlen(optional); |
|
99 for (i = 0; i < limit; ++i) |
|
100 mustshiftopt[optional[i]] = 0; |
|
101 limit = strlen(base64); |
|
102 for (i = 0; i < limit; ++i) |
|
103 invbase64[base64[i]] = (short)i; // cast added by DPB |
|
104 |
|
105 // needtables = 0; // commented out by DPB |
|
106 } |
|
107 |
|
108 #define DECLARE_BIT_BUFFER register unsigned long BITbuffer = 0, buffertemp = 0; int bufferbits = 0 |
|
109 #define BITS_IN_BUFFER bufferbits |
|
110 #define WRITE_N_BITS(x, n) ((BITbuffer |= ( ((x) & ~(-1L<<(n))) << (32-(n)-bufferbits) ) ), bufferbits += (n) ) |
|
111 #define READ_N_BITS(n) ((buffertemp = (BITbuffer >> (32-(n)))), (BITbuffer <<= (n)), (bufferbits -= (n)), buffertemp) |
|
112 #define TARGETCHECK {if (target >= targetEnd) {result = targetExhausted; break;}} |
|
113 |
|
114 EXPORT_C // added by DPB |
|
115 ConversionResult ConvertUCS2toUTF7( |
|
116 UCS2** sourceStart, UCS2* sourceEnd, |
|
117 char** targetStart, char* targetEnd, |
|
118 int optional, int verbose) |
|
119 { |
|
120 ConversionResult result = ok; |
|
121 DECLARE_BIT_BUFFER; |
|
122 int shifted = 0, needshift = 0, done = 0; |
|
123 register UCS2 *source = *sourceStart; |
|
124 register char *target = *targetStart; |
|
125 char *mustshift; |
|
126 |
|
127 short invbase64[128]; // added by DPB |
|
128 char mustshiftsafe[128]; // added by DPB |
|
129 char mustshiftopt[128]; // added by DPB |
|
130 // if (needtables) // commented out by DPB |
|
131 // tabinit(); // commented out by DPB |
|
132 tabinit(invbase64, mustshiftsafe, mustshiftopt); // added by DPB |
|
133 |
|
134 if (optional) |
|
135 mustshift = mustshiftopt; |
|
136 else |
|
137 mustshift = mustshiftsafe; |
|
138 |
|
139 do |
|
140 { |
|
141 register UCS2 r=0; // initialised to 0 by DPB (to avoid GCC warning) |
|
142 |
|
143 done = source >= sourceEnd; |
|
144 if (!done) |
|
145 r = *source++; |
|
146 needshift = (!done && ((r > 0x7f) || mustshift[r])); |
|
147 |
|
148 if (needshift && !shifted) |
|
149 { |
|
150 TARGETCHECK; |
|
151 *target++ = SHIFT_IN; |
|
152 /* Special case handling of the SHIFT_IN character */ |
|
153 if (r == (UCS2)SHIFT_IN) { |
|
154 TARGETCHECK; |
|
155 *target++ = SHIFT_OUT; |
|
156 } |
|
157 else |
|
158 shifted = 1; |
|
159 } |
|
160 |
|
161 if (shifted) |
|
162 { |
|
163 /* Either write the character to the bit buffer, or pad |
|
164 the bit buffer out to a full base64 character. |
|
165 */ |
|
166 if (needshift) |
|
167 WRITE_N_BITS(r, 16); |
|
168 else |
|
169 WRITE_N_BITS(0, (6 - (BITS_IN_BUFFER % 6))%6); |
|
170 |
|
171 /* Flush out as many full base64 characters as possible |
|
172 from the bit buffer. |
|
173 */ |
|
174 while ((target < targetEnd) && BITS_IN_BUFFER >= 6) |
|
175 { |
|
176 int temp=READ_N_BITS(6); // added by DPB |
|
177 // *target++ = base64[READ_N_BITS(6)]; // commented out by DPB |
|
178 *target++ = base64[temp]; // added by DPB |
|
179 } |
|
180 |
|
181 if (BITS_IN_BUFFER >= 6) |
|
182 TARGETCHECK; |
|
183 |
|
184 if (!needshift) |
|
185 { |
|
186 /* Write the explicit shift out character if |
|
187 1) The caller has requested we always do it, or |
|
188 2) The directly encoded character is in the |
|
189 base64 set, or |
|
190 3) The directly encoded character is SHIFT_OUT. |
|
191 */ |
|
192 if (verbose || ((!done) && (invbase64[r] >=0 || r == SHIFT_OUT))) |
|
193 { |
|
194 TARGETCHECK; |
|
195 *target++ = SHIFT_OUT; |
|
196 } |
|
197 shifted = 0; |
|
198 } |
|
199 } |
|
200 |
|
201 /* The character can be directly encoded as ASCII. */ |
|
202 if (!needshift && !done) |
|
203 { |
|
204 TARGETCHECK; |
|
205 *target++ = (char) r; |
|
206 } |
|
207 |
|
208 } |
|
209 while (!done); |
|
210 |
|
211 *sourceStart = source; |
|
212 *targetStart = target; |
|
213 return result; |
|
214 } |
|
215 |
|
216 EXPORT_C // added by DPB |
|
217 ConversionResult ConvertUTF7toUCS2( |
|
218 char** sourceStart, char* sourceEnd, |
|
219 UCS2** targetStart, UCS2* targetEnd) |
|
220 { |
|
221 ConversionResult result = ok; |
|
222 DECLARE_BIT_BUFFER; |
|
223 int shifted = 0, first = 0, wroteone = 0, base64EOF=0, base64value=0, done=0; // "base64EOF", "base64value" and "done" initialised to 0 by DPB (to avoid GCC warning) |
|
224 unsigned int c=0, prevc; // "c" initialised to 0 by DPB (to avoid GCC warning) |
|
225 unsigned long junk; |
|
226 register char *source = *sourceStart; |
|
227 register UCS2 *target = *targetStart; |
|
228 |
|
229 short invbase64[128]; // added by DPB |
|
230 char mustshiftsafe[128]; // added by DPB |
|
231 char mustshiftopt[128]; // added by DPB |
|
232 // if (needtables) // commented out by DPB |
|
233 // tabinit(); // commented out by DPB |
|
234 tabinit(invbase64, mustshiftsafe, mustshiftopt); // added by DPB |
|
235 |
|
236 do |
|
237 { |
|
238 /* read an ASCII character c */ |
|
239 done = source >= sourceEnd; |
|
240 if (!done) |
|
241 c = *source++; |
|
242 if (shifted) |
|
243 { |
|
244 /* We're done with a base64 string if we hit EOF, it's not a valid |
|
245 ASCII character, or it's not in the base64 set. |
|
246 */ |
|
247 base64EOF = done || (c > 0x7f) || (base64value = invbase64[c]) < 0; |
|
248 if (base64EOF) |
|
249 { |
|
250 shifted = 0; |
|
251 /* If the character causing us to drop out was SHIFT_IN or |
|
252 SHIFT_OUT, it may be a special escape for SHIFT_IN. The |
|
253 test for SHIFT_IN is not necessary, but allows an alternate |
|
254 form of UTF-7 where SHIFT_IN is escaped by SHIFT_IN. This |
|
255 only works for some values of SHIFT_IN. |
|
256 */ |
|
257 if (!done && (c == SHIFT_IN || c == SHIFT_OUT)) |
|
258 { |
|
259 /* get another character c */ |
|
260 prevc = c; |
|
261 done = source >= sourceEnd; |
|
262 if (!done) |
|
263 c = *source++; |
|
264 /* If no base64 characters were encountered, and the |
|
265 character terminating the shift sequence was |
|
266 SHIFT_OUT, then it's a special escape for SHIFT_IN. |
|
267 */ |
|
268 if (first && prevc == SHIFT_OUT) |
|
269 { |
|
270 /* write SHIFT_IN unicode */ |
|
271 TARGETCHECK; |
|
272 *target++ = (UCS2)SHIFT_IN; |
|
273 } |
|
274 else if (!wroteone) |
|
275 { |
|
276 result = sourceCorrupt; |
|
277 /* fprintf(stderr, "UTF7: empty sequence near byte %ld in input\n", source-sourceStart) */; |
|
278 } |
|
279 } |
|
280 else if (!wroteone) |
|
281 { |
|
282 result = sourceCorrupt; |
|
283 /* fprintf(stderr, "UTF7: empty sequence near byte %ld in input\n", source-sourceStart) */; |
|
284 } |
|
285 } |
|
286 else |
|
287 { |
|
288 /* Add another 6 bits of base64 to the bit buffer. */ |
|
289 WRITE_N_BITS(base64value, 6); |
|
290 first = 0; |
|
291 } |
|
292 |
|
293 /* Extract as many full 16 bit characters as possible from the |
|
294 bit buffer. |
|
295 */ |
|
296 while (BITS_IN_BUFFER >= 16 && (target < targetEnd)) |
|
297 { |
|
298 /* write a unicode */ |
|
299 *target++ = (UCS2)READ_N_BITS(16); // cast added by DPB |
|
300 wroteone = 1; |
|
301 } |
|
302 |
|
303 if (BITS_IN_BUFFER >= 16) |
|
304 TARGETCHECK; |
|
305 |
|
306 if (base64EOF) |
|
307 { |
|
308 junk = READ_N_BITS(BITS_IN_BUFFER); |
|
309 if (junk) |
|
310 { |
|
311 result = sourceCorrupt; |
|
312 /* fprintf(stderr, "UTF7: non-zero pad bits near byte %ld in input\n", source-sourceStart) */; |
|
313 } |
|
314 } |
|
315 } |
|
316 |
|
317 if (!shifted && !done) |
|
318 { |
|
319 if (c == SHIFT_IN) |
|
320 { |
|
321 shifted = 1; |
|
322 first = 1; |
|
323 wroteone = 0; |
|
324 } |
|
325 else |
|
326 { |
|
327 /* It must be a directly encoded character. */ |
|
328 if (c > 0x7f) |
|
329 { |
|
330 result = sourceCorrupt; |
|
331 /* fprintf(stderr, "UTF7: non-ASCII character near byte %ld in input\n", source-sourceStart) */; |
|
332 } |
|
333 /* write a unicode */ |
|
334 TARGETCHECK; |
|
335 *target++ = (UCS2)c; // cast added by DPB |
|
336 } |
|
337 } |
|
338 } |
|
339 while (!done); |
|
340 |
|
341 *sourceStart = source; |
|
342 *targetStart = target; |
|
343 return result; |
|
344 } |
|