17
|
1 |
/* ================================================================ */
|
|
2 |
/*
|
|
3 |
File: ConvertUTF7.c
|
|
4 |
Author: David B. Goldsmith
|
|
5 |
Copyright (C) 1994, 1996 Taligent, Inc. All rights reserved.
|
|
6 |
|
|
7 |
This code is copyrighted. Under the copyright laws, this code may not
|
|
8 |
be copied, in whole or part, without prior written consent of Taligent.
|
|
9 |
|
|
10 |
Taligent grants the right to use this code as long as this ENTIRE
|
|
11 |
copyright notice is reproduced in the code. The code is provided
|
|
12 |
AS-IS, AND TALIGENT DISCLAIMS ALL WARRANTIES, EITHER EXPRESS OR
|
|
13 |
IMPLIED, INCLUDING, BUT NOT LIMITED TO IMPLIED WARRANTIES OF
|
|
14 |
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
|
|
15 |
WILL TALIGENT BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING,
|
|
16 |
WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS
|
|
17 |
INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY
|
|
18 |
LOSS) ARISING OUT OF THE USE OR INABILITY TO USE THIS CODE, EVEN
|
|
19 |
IF TALIGENT HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
|
|
20 |
BECAUSE SOME STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF
|
|
21 |
LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE
|
|
22 |
LIMITATION MAY NOT APPLY TO YOU.
|
|
23 |
|
|
24 |
RESTRICTED RIGHTS LEGEND: Use, duplication, or disclosure by the
|
|
25 |
government is subject to restrictions as set forth in subparagraph
|
|
26 |
(c)(l)(ii) of the Rights in Technical Data and Computer Software
|
|
27 |
clause at DFARS 252.227-7013 and FAR 52.227-19.
|
|
28 |
|
|
29 |
This code may be protected by one or more U.S. and International
|
|
30 |
Patents.
|
|
31 |
|
|
32 |
TRADEMARKS: Taligent and the Taligent Design Mark are registered
|
|
33 |
trademarks of Taligent, Inc.
|
|
34 |
*/
|
|
35 |
|
|
36 |
// #include "CVTUTF7.H" // commented out by DPB
|
|
37 |
#include "UTF7.H" // added by DPB
|
|
38 |
#pragma warning (disable: 4706) // added by DPB (warning disabled: "assignment within conditional expression")
|
|
39 |
|
|
40 |
static const char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; // "const" added by DPB
|
|
41 |
// static short invbase64[128]; // commented out by DPB
|
|
42 |
|
|
43 |
static const char direct[] = // "const" added by DPB
|
|
44 |
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?";
|
|
45 |
static const char optional[] = "!\"#$%&*;<=>@[]^_`{|}"; // "const" added by DPB
|
|
46 |
static const char spaces[] = " \011\015\012"; /* space, tab, return, line feed */ // "const" added by DPB
|
|
47 |
// static char mustshiftsafe[128]; // commented out by DPB
|
|
48 |
// static char mustshiftopt[128]; // commented out by DPB
|
|
49 |
|
|
50 |
// static int needtables = 1; // commented out by DPB
|
|
51 |
|
|
52 |
#define SHIFT_IN '+'
|
|
53 |
#define SHIFT_OUT '-'
|
|
54 |
|
|
55 |
int strlen(const char* aZeroTerminatedString) // added by DPB
|
|
56 |
{ // added by DPB
|
|
57 |
const char* character=aZeroTerminatedString; // added by DPB
|
|
58 |
while (*character!='\0') // added by DPB
|
|
59 |
{ // added by DPB
|
|
60 |
++character; // added by DPB
|
|
61 |
} // added by DPB
|
|
62 |
return character-aZeroTerminatedString; // added by DPB
|
|
63 |
} // added by DPB
|
|
64 |
|
|
65 |
static void
|
|
66 |
// tabinit() // commented out by DPB
|
|
67 |
tabinit(short invbase64[], char mustshiftsafe[], char mustshiftopt[]) // added by DPB
|
|
68 |
{
|
|
69 |
int i, limit;
|
|
70 |
|
|
71 |
for (i = 0; i < 128; ++i)
|
|
72 |
{
|
|
73 |
mustshiftopt[i] = mustshiftsafe[i] = 1;
|
|
74 |
invbase64[i] = -1;
|
|
75 |
}
|
|
76 |
limit = strlen(direct);
|
|
77 |
for (i = 0; i < limit; ++i)
|
|
78 |
mustshiftopt[direct[i]] = mustshiftsafe[direct[i]] = 0;
|
|
79 |
limit = strlen(spaces);
|
|
80 |
for (i = 0; i < limit; ++i)
|
|
81 |
mustshiftopt[spaces[i]] = mustshiftsafe[spaces[i]] = 0;
|
|
82 |
limit = strlen(optional);
|
|
83 |
for (i = 0; i < limit; ++i)
|
|
84 |
mustshiftopt[optional[i]] = 0;
|
|
85 |
limit = strlen(base64);
|
|
86 |
for (i = 0; i < limit; ++i)
|
|
87 |
invbase64[base64[i]] = (short)i; // cast added by DPB
|
|
88 |
|
|
89 |
// needtables = 0; // commented out by DPB
|
|
90 |
}
|
|
91 |
|
|
92 |
#define DECLARE_BIT_BUFFER register unsigned long BITbuffer = 0, buffertemp = 0; int bufferbits = 0
|
|
93 |
#define BITS_IN_BUFFER bufferbits
|
|
94 |
#define WRITE_N_BITS(x, n) ((BITbuffer |= ( ((x) & ~(-1L<<(n))) << (32-(n)-bufferbits) ) ), bufferbits += (n) )
|
|
95 |
#define READ_N_BITS(n) ((buffertemp = (BITbuffer >> (32-(n)))), (BITbuffer <<= (n)), (bufferbits -= (n)), buffertemp)
|
|
96 |
#define TARGETCHECK {if (target >= targetEnd) {result = targetExhausted; break;}}
|
|
97 |
|
|
98 |
EXPORT_C // added by DPB
|
|
99 |
ConversionResult ConvertUCS2toUTF7(
|
|
100 |
UCS2** sourceStart, UCS2* sourceEnd,
|
|
101 |
char** targetStart, char* targetEnd,
|
|
102 |
int optional, int verbose)
|
|
103 |
{
|
|
104 |
ConversionResult result = ok;
|
|
105 |
DECLARE_BIT_BUFFER;
|
|
106 |
int shifted = 0, needshift = 0, done = 0;
|
|
107 |
register UCS2 *source = *sourceStart;
|
|
108 |
register char *target = *targetStart;
|
|
109 |
char *mustshift;
|
|
110 |
|
|
111 |
short invbase64[128]; // added by DPB
|
|
112 |
char mustshiftsafe[128]; // added by DPB
|
|
113 |
char mustshiftopt[128]; // added by DPB
|
|
114 |
// if (needtables) // commented out by DPB
|
|
115 |
// tabinit(); // commented out by DPB
|
|
116 |
tabinit(invbase64, mustshiftsafe, mustshiftopt); // added by DPB
|
|
117 |
|
|
118 |
if (optional)
|
|
119 |
mustshift = mustshiftopt;
|
|
120 |
else
|
|
121 |
mustshift = mustshiftsafe;
|
|
122 |
|
|
123 |
do
|
|
124 |
{
|
|
125 |
register UCS2 r=0; // initialised to 0 by DPB (to avoid GCC warning)
|
|
126 |
|
|
127 |
done = source >= sourceEnd;
|
|
128 |
if (!done)
|
|
129 |
r = *source++;
|
|
130 |
needshift = (!done && ((r > 0x7f) || mustshift[r]));
|
|
131 |
|
|
132 |
if (needshift && !shifted)
|
|
133 |
{
|
|
134 |
TARGETCHECK;
|
|
135 |
*target++ = SHIFT_IN;
|
|
136 |
/* Special case handling of the SHIFT_IN character */
|
|
137 |
if (r == (UCS2)SHIFT_IN) {
|
|
138 |
TARGETCHECK;
|
|
139 |
*target++ = SHIFT_OUT;
|
|
140 |
}
|
|
141 |
else
|
|
142 |
shifted = 1;
|
|
143 |
}
|
|
144 |
|
|
145 |
if (shifted)
|
|
146 |
{
|
|
147 |
/* Either write the character to the bit buffer, or pad
|
|
148 |
the bit buffer out to a full base64 character.
|
|
149 |
*/
|
|
150 |
if (needshift)
|
|
151 |
WRITE_N_BITS(r, 16);
|
|
152 |
else
|
|
153 |
WRITE_N_BITS(0, (6 - (BITS_IN_BUFFER % 6))%6);
|
|
154 |
|
|
155 |
/* Flush out as many full base64 characters as possible
|
|
156 |
from the bit buffer.
|
|
157 |
*/
|
|
158 |
while ((target < targetEnd) && BITS_IN_BUFFER >= 6)
|
|
159 |
{
|
|
160 |
int temp=READ_N_BITS(6); // added by DPB
|
|
161 |
// *target++ = base64[READ_N_BITS(6)]; // commented out by DPB
|
|
162 |
*target++ = base64[temp]; // added by DPB
|
|
163 |
}
|
|
164 |
|
|
165 |
if (BITS_IN_BUFFER >= 6)
|
|
166 |
TARGETCHECK;
|
|
167 |
|
|
168 |
if (!needshift)
|
|
169 |
{
|
|
170 |
/* Write the explicit shift out character if
|
|
171 |
1) The caller has requested we always do it, or
|
|
172 |
2) The directly encoded character is in the
|
|
173 |
base64 set, or
|
|
174 |
3) The directly encoded character is SHIFT_OUT.
|
|
175 |
*/
|
|
176 |
if (verbose || ((!done) && (invbase64[r] >=0 || r == SHIFT_OUT)))
|
|
177 |
{
|
|
178 |
TARGETCHECK;
|
|
179 |
*target++ = SHIFT_OUT;
|
|
180 |
}
|
|
181 |
shifted = 0;
|
|
182 |
}
|
|
183 |
}
|
|
184 |
|
|
185 |
/* The character can be directly encoded as ASCII. */
|
|
186 |
if (!needshift && !done)
|
|
187 |
{
|
|
188 |
TARGETCHECK;
|
|
189 |
*target++ = (char) r;
|
|
190 |
}
|
|
191 |
|
|
192 |
}
|
|
193 |
while (!done);
|
|
194 |
|
|
195 |
*sourceStart = source;
|
|
196 |
*targetStart = target;
|
|
197 |
return result;
|
|
198 |
}
|
|
199 |
|
|
200 |
EXPORT_C // added by DPB
|
|
201 |
ConversionResult ConvertUTF7toUCS2(
|
|
202 |
char** sourceStart, char* sourceEnd,
|
|
203 |
UCS2** targetStart, UCS2* targetEnd)
|
|
204 |
{
|
|
205 |
ConversionResult result = ok;
|
|
206 |
DECLARE_BIT_BUFFER;
|
|
207 |
int shifted = 0, first = 0, wroteone = 0, base64EOF=0, base64value=0, done=0; // "base64EOF", "base64value" and "done" initialised to 0 by DPB (to avoid GCC warning)
|
|
208 |
unsigned int c=0, prevc; // "c" initialised to 0 by DPB (to avoid GCC warning)
|
|
209 |
unsigned long junk;
|
|
210 |
register char *source = *sourceStart;
|
|
211 |
register UCS2 *target = *targetStart;
|
|
212 |
|
|
213 |
short invbase64[128]; // added by DPB
|
|
214 |
char mustshiftsafe[128]; // added by DPB
|
|
215 |
char mustshiftopt[128]; // added by DPB
|
|
216 |
// if (needtables) // commented out by DPB
|
|
217 |
// tabinit(); // commented out by DPB
|
|
218 |
tabinit(invbase64, mustshiftsafe, mustshiftopt); // added by DPB
|
|
219 |
|
|
220 |
do
|
|
221 |
{
|
|
222 |
/* read an ASCII character c */
|
|
223 |
done = source >= sourceEnd;
|
|
224 |
if (!done)
|
|
225 |
c = *source++;
|
|
226 |
if (shifted)
|
|
227 |
{
|
|
228 |
/* We're done with a base64 string if we hit EOF, it's not a valid
|
|
229 |
ASCII character, or it's not in the base64 set.
|
|
230 |
*/
|
|
231 |
base64EOF = done || (c > 0x7f) || (base64value = invbase64[c]) < 0;
|
|
232 |
if (base64EOF)
|
|
233 |
{
|
|
234 |
shifted = 0;
|
|
235 |
/* If the character causing us to drop out was SHIFT_IN or
|
|
236 |
SHIFT_OUT, it may be a special escape for SHIFT_IN. The
|
|
237 |
test for SHIFT_IN is not necessary, but allows an alternate
|
|
238 |
form of UTF-7 where SHIFT_IN is escaped by SHIFT_IN. This
|
|
239 |
only works for some values of SHIFT_IN.
|
|
240 |
*/
|
|
241 |
if (!done && (c == SHIFT_IN || c == SHIFT_OUT))
|
|
242 |
{
|
|
243 |
/* get another character c */
|
|
244 |
prevc = c;
|
|
245 |
done = source >= sourceEnd;
|
|
246 |
if (!done)
|
|
247 |
c = *source++;
|
|
248 |
/* If no base64 characters were encountered, and the
|
|
249 |
character terminating the shift sequence was
|
|
250 |
SHIFT_OUT, then it's a special escape for SHIFT_IN.
|
|
251 |
*/
|
|
252 |
if (first && prevc == SHIFT_OUT)
|
|
253 |
{
|
|
254 |
/* write SHIFT_IN unicode */
|
|
255 |
TARGETCHECK;
|
|
256 |
*target++ = (UCS2)SHIFT_IN;
|
|
257 |
}
|
|
258 |
else if (!wroteone)
|
|
259 |
{
|
|
260 |
result = sourceCorrupt;
|
|
261 |
/* fprintf(stderr, "UTF7: empty sequence near byte %ld in input\n", source-sourceStart) */;
|
|
262 |
}
|
|
263 |
}
|
|
264 |
else if (!wroteone)
|
|
265 |
{
|
|
266 |
result = sourceCorrupt;
|
|
267 |
/* fprintf(stderr, "UTF7: empty sequence near byte %ld in input\n", source-sourceStart) */;
|
|
268 |
}
|
|
269 |
}
|
|
270 |
else
|
|
271 |
{
|
|
272 |
/* Add another 6 bits of base64 to the bit buffer. */
|
|
273 |
WRITE_N_BITS(base64value, 6);
|
|
274 |
first = 0;
|
|
275 |
}
|
|
276 |
|
|
277 |
/* Extract as many full 16 bit characters as possible from the
|
|
278 |
bit buffer.
|
|
279 |
*/
|
|
280 |
while (BITS_IN_BUFFER >= 16 && (target < targetEnd))
|
|
281 |
{
|
|
282 |
/* write a unicode */
|
|
283 |
*target++ = (UCS2)READ_N_BITS(16); // cast added by DPB
|
|
284 |
wroteone = 1;
|
|
285 |
}
|
|
286 |
|
|
287 |
if (BITS_IN_BUFFER >= 16)
|
|
288 |
TARGETCHECK;
|
|
289 |
|
|
290 |
if (base64EOF)
|
|
291 |
{
|
|
292 |
junk = READ_N_BITS(BITS_IN_BUFFER);
|
|
293 |
if (junk)
|
|
294 |
{
|
|
295 |
result = sourceCorrupt;
|
|
296 |
/* fprintf(stderr, "UTF7: non-zero pad bits near byte %ld in input\n", source-sourceStart) */;
|
|
297 |
}
|
|
298 |
}
|
|
299 |
}
|
|
300 |
|
|
301 |
if (!shifted && !done)
|
|
302 |
{
|
|
303 |
if (c == SHIFT_IN)
|
|
304 |
{
|
|
305 |
shifted = 1;
|
|
306 |
first = 1;
|
|
307 |
wroteone = 0;
|
|
308 |
}
|
|
309 |
else
|
|
310 |
{
|
|
311 |
/* It must be a directly encoded character. */
|
|
312 |
if (c > 0x7f)
|
|
313 |
{
|
|
314 |
result = sourceCorrupt;
|
|
315 |
/* fprintf(stderr, "UTF7: non-ASCII character near byte %ld in input\n", source-sourceStart) */;
|
|
316 |
}
|
|
317 |
/* write a unicode */
|
|
318 |
TARGETCHECK;
|
|
319 |
*target++ = (UCS2)c; // cast added by DPB
|
|
320 |
}
|
|
321 |
}
|
|
322 |
}
|
|
323 |
while (!done);
|
|
324 |
|
|
325 |
*sourceStart = source;
|
|
326 |
*targetStart = target;
|
|
327 |
return result;
|
|
328 |
}
|