charconvfw/Charconv/ongoing/test/source/otherutf/UTF7.CPP
changeset 16 56cd22a7a1cb
parent 0 1fb32624e06b
child 18 67f6b0d39020
child 21 f2f7b3284356
equal deleted inserted replaced
0:1fb32624e06b 16:56cd22a7a1cb
     1 /*
       
     2 * Copyright (c) 2000-2005 Nokia Corporation and/or its subsidiary(-ies). 
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description:      
       
    15 *
       
    16 */
       
    17 /* ================================================================ */
       
    18 /*
       
    19 File:   ConvertUTF7.c
       
    20 Author: David B. Goldsmith
       
    21 Copyright (C) 1994, 1996 Taligent, Inc. All rights reserved.
       
    22 
       
    23 This code is copyrighted. Under the copyright laws, this code may not
       
    24 be copied, in whole or part, without prior written consent of Taligent. 
       
    25 
       
    26 Taligent grants the right to use this code as long as this ENTIRE
       
    27 copyright notice is reproduced in the code.  The code is provided
       
    28 AS-IS, AND TALIGENT DISCLAIMS ALL WARRANTIES, EITHER EXPRESS OR
       
    29 IMPLIED, INCLUDING, BUT NOT LIMITED TO IMPLIED WARRANTIES OF
       
    30 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT
       
    31 WILL TALIGENT BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING,
       
    32 WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, BUSINESS
       
    33 INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY
       
    34 LOSS) ARISING OUT OF THE USE OR INABILITY TO USE THIS CODE, EVEN
       
    35 IF TALIGENT HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
       
    36 BECAUSE SOME STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF
       
    37 LIABILITY FOR CONSEQUENTIAL OR INCIDENTAL DAMAGES, THE ABOVE
       
    38 LIMITATION MAY NOT APPLY TO YOU.
       
    39 
       
    40 RESTRICTED RIGHTS LEGEND: Use, duplication, or disclosure by the
       
    41 government is subject to restrictions as set forth in subparagraph
       
    42 (c)(l)(ii) of the Rights in Technical Data and Computer Software
       
    43 clause at DFARS 252.227-7013 and FAR 52.227-19.
       
    44 
       
    45 This code may be protected by one or more U.S. and International
       
    46 Patents.
       
    47 
       
    48 TRADEMARKS: Taligent and the Taligent Design Mark are registered
       
    49 trademarks of Taligent, Inc.
       
    50 */
       
    51 
       
    52 // #include "CVTUTF7.H" // commented out by DPB
       
    53 #include "UTF7.H" // added by DPB
       
    54 #pragma warning (disable: 4706) // added by DPB (warning disabled: "assignment within conditional expression")
       
    55 
       
    56 static const char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; // "const" added by DPB
       
    57 // static short invbase64[128]; // commented out by DPB
       
    58 
       
    59 static const char direct[] = // "const" added by DPB
       
    60 	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?";
       
    61 static const char optional[] = "!\"#$%&*;<=>@[]^_`{|}"; // "const" added by DPB
       
    62 static const char spaces[] = " \011\015\012";		/* space, tab, return, line feed */ // "const" added by DPB
       
    63 // static char mustshiftsafe[128]; // commented out by DPB
       
    64 // static char mustshiftopt[128]; // commented out by DPB
       
    65 
       
    66 // static int needtables = 1; // commented out by DPB
       
    67 
       
    68 #define SHIFT_IN '+'
       
    69 #define SHIFT_OUT '-'
       
    70 
       
    71 int strlen(const char* aZeroTerminatedString) // added by DPB
       
    72 	{ // added by DPB
       
    73 	const char* character=aZeroTerminatedString; // added by DPB
       
    74 	while (*character!='\0') // added by DPB
       
    75 		{ // added by DPB
       
    76 		++character; // added by DPB
       
    77 		} // added by DPB
       
    78 	return character-aZeroTerminatedString; // added by DPB
       
    79 	} // added by DPB
       
    80 
       
    81 static void
       
    82 // tabinit() // commented out by DPB
       
    83 tabinit(short invbase64[], char mustshiftsafe[], char mustshiftopt[]) // added by DPB
       
    84 {
       
    85 	int i, limit;
       
    86 
       
    87 	for (i = 0; i < 128; ++i)
       
    88 	{
       
    89 		mustshiftopt[i] = mustshiftsafe[i] = 1;
       
    90 		invbase64[i] = -1;
       
    91 	}
       
    92 	limit = strlen(direct);
       
    93 	for (i = 0; i < limit; ++i)
       
    94 		mustshiftopt[direct[i]] = mustshiftsafe[direct[i]] = 0;
       
    95 	limit = strlen(spaces);
       
    96 	for (i = 0; i < limit; ++i)
       
    97 		mustshiftopt[spaces[i]] = mustshiftsafe[spaces[i]] = 0;
       
    98 	limit = strlen(optional);
       
    99 	for (i = 0; i < limit; ++i)
       
   100 		mustshiftopt[optional[i]] = 0;
       
   101 	limit = strlen(base64);
       
   102 	for (i = 0; i < limit; ++i)
       
   103 		invbase64[base64[i]] = (short)i; // cast added by DPB
       
   104 
       
   105 //	needtables = 0; // commented out by DPB
       
   106 }
       
   107 
       
   108 #define DECLARE_BIT_BUFFER register unsigned long BITbuffer = 0, buffertemp = 0; int bufferbits = 0
       
   109 #define BITS_IN_BUFFER bufferbits
       
   110 #define WRITE_N_BITS(x, n) ((BITbuffer |= ( ((x) & ~(-1L<<(n))) << (32-(n)-bufferbits) ) ), bufferbits += (n) )
       
   111 #define READ_N_BITS(n) ((buffertemp = (BITbuffer >> (32-(n)))), (BITbuffer <<= (n)), (bufferbits -= (n)), buffertemp)
       
   112 #define TARGETCHECK  {if (target >= targetEnd) {result = targetExhausted; break;}}
       
   113 
       
   114 EXPORT_C // added by DPB
       
   115 ConversionResult ConvertUCS2toUTF7(
       
   116                 UCS2** sourceStart, UCS2* sourceEnd, 
       
   117                 char** targetStart, char* targetEnd,
       
   118                 int optional, int verbose)
       
   119 {
       
   120 	ConversionResult result = ok;
       
   121 	DECLARE_BIT_BUFFER;
       
   122 	int shifted = 0, needshift = 0, done = 0;
       
   123 	register UCS2 *source = *sourceStart;
       
   124 	register char *target = *targetStart;
       
   125 	char *mustshift;
       
   126 
       
   127 	short invbase64[128]; // added by DPB
       
   128 	char mustshiftsafe[128]; // added by DPB
       
   129 	char mustshiftopt[128]; // added by DPB
       
   130 //	if (needtables) // commented out by DPB
       
   131 //		tabinit(); // commented out by DPB
       
   132 	tabinit(invbase64, mustshiftsafe, mustshiftopt); // added by DPB
       
   133 
       
   134 	if (optional)
       
   135 		mustshift = mustshiftopt;
       
   136 	else
       
   137 		mustshift = mustshiftsafe;
       
   138 
       
   139 	do
       
   140 	{
       
   141 		register UCS2 r=0; // initialised to 0 by DPB (to avoid GCC warning)
       
   142 
       
   143 		done = source >= sourceEnd;
       
   144 		if (!done)
       
   145 			r = *source++;
       
   146 		needshift = (!done && ((r > 0x7f) || mustshift[r]));
       
   147 
       
   148 		if (needshift && !shifted)
       
   149 		{
       
   150 			TARGETCHECK;
       
   151 			*target++ = SHIFT_IN;
       
   152 			/* Special case handling of the SHIFT_IN character */
       
   153 			if (r == (UCS2)SHIFT_IN) {
       
   154 				TARGETCHECK;
       
   155 				*target++ = SHIFT_OUT;
       
   156 			}
       
   157 			else
       
   158 				shifted = 1;
       
   159 		}
       
   160 
       
   161 		if (shifted)
       
   162 		{
       
   163 			/* Either write the character to the bit buffer, or pad
       
   164 			   the bit buffer out to a full base64 character.
       
   165 			 */
       
   166 			if (needshift)
       
   167 				WRITE_N_BITS(r, 16);
       
   168 			else
       
   169 				WRITE_N_BITS(0, (6 - (BITS_IN_BUFFER % 6))%6);
       
   170 
       
   171 			/* Flush out as many full base64 characters as possible
       
   172 			   from the bit buffer.
       
   173 			 */
       
   174 			while ((target < targetEnd) && BITS_IN_BUFFER >= 6)
       
   175 			{
       
   176 				int temp=READ_N_BITS(6); // added by DPB
       
   177 //				*target++ = base64[READ_N_BITS(6)]; // commented out by DPB
       
   178 				*target++ = base64[temp]; // added by DPB
       
   179 			}
       
   180 
       
   181 			if (BITS_IN_BUFFER >= 6)
       
   182 				TARGETCHECK;
       
   183 
       
   184 			if (!needshift)
       
   185 			{
       
   186 				/* Write the explicit shift out character if
       
   187 				   1) The caller has requested we always do it, or
       
   188 				   2) The directly encoded character is in the
       
   189 				   base64 set, or
       
   190 				   3) The directly encoded character is SHIFT_OUT.
       
   191 				 */
       
   192 				if (verbose || ((!done) && (invbase64[r] >=0 || r == SHIFT_OUT)))
       
   193 				{
       
   194 					TARGETCHECK;
       
   195 					*target++ = SHIFT_OUT;
       
   196 				}
       
   197 				shifted = 0;
       
   198 			}
       
   199 		}
       
   200 
       
   201 		/* The character can be directly encoded as ASCII. */
       
   202 		if (!needshift && !done)
       
   203 		{
       
   204 			TARGETCHECK;
       
   205 			*target++ = (char) r;
       
   206 		}
       
   207 
       
   208 	}
       
   209 	while (!done);
       
   210 	
       
   211     *sourceStart = source;
       
   212     *targetStart = target;
       
   213     return result;
       
   214 }
       
   215 
       
   216 EXPORT_C // added by DPB
       
   217 ConversionResult ConvertUTF7toUCS2(
       
   218                 char** sourceStart, char* sourceEnd, 
       
   219                 UCS2** targetStart, UCS2* targetEnd)
       
   220 {
       
   221 	ConversionResult result = ok;
       
   222 	DECLARE_BIT_BUFFER;
       
   223 	int shifted = 0, first = 0, wroteone = 0, base64EOF=0, base64value=0, done=0; // "base64EOF", "base64value" and "done" initialised to 0 by DPB (to avoid GCC warning)
       
   224 	unsigned int c=0, prevc; // "c" initialised to 0 by DPB (to avoid GCC warning)
       
   225 	unsigned long junk;
       
   226 	register char *source = *sourceStart;
       
   227 	register UCS2 *target = *targetStart;
       
   228 
       
   229 	short invbase64[128]; // added by DPB
       
   230 	char mustshiftsafe[128]; // added by DPB
       
   231 	char mustshiftopt[128]; // added by DPB
       
   232 //	if (needtables) // commented out by DPB
       
   233 //		tabinit(); // commented out by DPB
       
   234 	tabinit(invbase64, mustshiftsafe, mustshiftopt); // added by DPB
       
   235 
       
   236 	do
       
   237 	{
       
   238 		/* read an ASCII character c */
       
   239 		done = source >= sourceEnd;
       
   240 		if (!done)
       
   241 			c = *source++;
       
   242 		if (shifted)
       
   243 		{
       
   244 			/* We're done with a base64 string if we hit EOF, it's not a valid
       
   245 			   ASCII character, or it's not in the base64 set.
       
   246 			 */
       
   247 			base64EOF = done || (c > 0x7f) || (base64value = invbase64[c]) < 0;
       
   248 			if (base64EOF)
       
   249 			{
       
   250 				shifted = 0;
       
   251 				/* If the character causing us to drop out was SHIFT_IN or
       
   252 				   SHIFT_OUT, it may be a special escape for SHIFT_IN. The
       
   253 				   test for SHIFT_IN is not necessary, but allows an alternate
       
   254 				   form of UTF-7 where SHIFT_IN is escaped by SHIFT_IN. This
       
   255 				   only works for some values of SHIFT_IN.
       
   256 				 */
       
   257 				if (!done && (c == SHIFT_IN || c == SHIFT_OUT))
       
   258 				{
       
   259 					/* get another character c */
       
   260 					prevc = c;
       
   261 					done = source >= sourceEnd;
       
   262 					if (!done)
       
   263 						c = *source++;
       
   264 					/* If no base64 characters were encountered, and the
       
   265 					   character terminating the shift sequence was
       
   266 					   SHIFT_OUT, then it's a special escape for SHIFT_IN.
       
   267 					 */
       
   268 					if (first && prevc == SHIFT_OUT)
       
   269 					{
       
   270 						/* write SHIFT_IN unicode */
       
   271 						TARGETCHECK;
       
   272 						*target++ = (UCS2)SHIFT_IN;
       
   273 					}
       
   274 					else if (!wroteone)
       
   275 					{
       
   276 						result = sourceCorrupt;
       
   277 						/* fprintf(stderr, "UTF7: empty sequence near byte %ld in input\n", source-sourceStart) */;
       
   278 					}
       
   279 				}
       
   280 				else if (!wroteone)
       
   281 				{
       
   282 					result = sourceCorrupt;
       
   283 					/* fprintf(stderr, "UTF7: empty sequence near byte %ld in input\n", source-sourceStart) */;
       
   284 				}
       
   285 			}
       
   286 			else
       
   287 			{
       
   288 				/* Add another 6 bits of base64 to the bit buffer. */
       
   289 				WRITE_N_BITS(base64value, 6);
       
   290 				first = 0;
       
   291 			}
       
   292 
       
   293 			/* Extract as many full 16 bit characters as possible from the
       
   294 			   bit buffer.
       
   295 			 */
       
   296 			while (BITS_IN_BUFFER >= 16 && (target < targetEnd))
       
   297 			{
       
   298 				/* write a unicode */
       
   299 				*target++ = (UCS2)READ_N_BITS(16); // cast added by DPB
       
   300 				wroteone = 1;
       
   301 			}
       
   302 
       
   303 			if (BITS_IN_BUFFER >= 16)
       
   304 				TARGETCHECK;
       
   305 
       
   306 			if (base64EOF)
       
   307 			{
       
   308 				junk = READ_N_BITS(BITS_IN_BUFFER);
       
   309 				if (junk)
       
   310 				{
       
   311 					result = sourceCorrupt;
       
   312 					/* fprintf(stderr, "UTF7: non-zero pad bits near byte %ld in input\n", source-sourceStart) */;
       
   313 				}
       
   314 			}
       
   315 		}
       
   316 
       
   317 		if (!shifted && !done)
       
   318 		{
       
   319 			if (c == SHIFT_IN)
       
   320 			{
       
   321 				shifted = 1;
       
   322 				first = 1;
       
   323 				wroteone = 0;
       
   324 			}
       
   325 			else
       
   326 			{
       
   327 				/* It must be a directly encoded character. */
       
   328 				if (c > 0x7f)
       
   329 				{
       
   330 					result = sourceCorrupt;
       
   331 					/* fprintf(stderr, "UTF7: non-ASCII character near byte %ld in input\n", source-sourceStart) */;
       
   332 				}
       
   333 				/* write a unicode */
       
   334 				TARGETCHECK;
       
   335 				*target++ = (UCS2)c; // cast added by DPB
       
   336 			}
       
   337 		}
       
   338 	}
       
   339 	while (!done);
       
   340 
       
   341     *sourceStart = source;
       
   342     *targetStart = target;
       
   343     return result;
       
   344 }