secureswitools/swisistools/source/dumpsis/unicode_converter.h
changeset 4 32704c33136d
equal deleted inserted replaced
-1:000000000000 4:32704c33136d
       
     1 /*
       
     2 * Copyright (c) 2007-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of the License "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 * Unicode converter codes
       
    16 *
       
    17 */
       
    18 
       
    19 
       
    20 /**
       
    21  @file 
       
    22  @internalComponent 
       
    23 */
       
    24  
       
    25 #ifndef __UNICODE_CONVERTOR_H__
       
    26 #define __UNICODE_CONVERTOR_H__
       
    27 #if _MSC_VER <= 1200
       
    28 #define FIX_VC6_BROKEN_FSTREAM
       
    29 #endif
       
    30 
       
    31 // The default C++ wide streams narrow all wchar_t to char before writing to disk!!!!
       
    32 // This file defeats the narrowing, whilst still mapping NL <-> CR/NL.
       
    33 
       
    34 // This code is hard coded to use little endian UTF-16.
       
    35 
       
    36 // You probably want to #define FIX_VC6_BROKEN_FSTREAM then
       
    37 // include this file, and do the following to create an output stream:-
       
    38 //
       
    39 //	std::wofstream out;
       
    40 //	locale loc = _ADDFAC(locale::classic(), new NullCodecvt);
       
    41 //
       
    42 //	out.imbue(loc); 
       
    43 //	out.open("c:\\test.txt", std::ios_base::out|std::ios_base::trunc|std::ios_base::binary);
       
    44 //	out << wchar_t(0xfeff); // UTF-16 BOM - swapped by NullCondecvt to ff,fe == little endian
       
    45 //
       
    46 // Input streams should work as well, but the user will have to discard the BOM (if present).
       
    47 
       
    48 #include <iostream>
       
    49 #ifdef FIX_VC6_BROKEN_FSTREAM
       
    50 #define fwrite(a,b,c,d) fwrite_hack(a,b,c,d)
       
    51 #endif
       
    52 #include <fstream>
       
    53 #ifdef FIX_VC6_BROKEN_FSTREAM
       
    54 #undef fwrite
       
    55 static int fwrite_hack(const void *buffer, size_t size, size_t num, FILE *stream)
       
    56 {
       
    57 	// fstream _Fputc does a call of the form fwrite(p, N, 1, fs) == 1 for noconv data
       
    58 	// fstream uses call of the form fwrite(p, N, 1, fs) == N for partial or ok data.
       
    59 	// The second call is incorrect!
       
    60 	// As a hack for VC6 we always say we converted (so _FPutc is not used),
       
    61 	// and swap the arguments to fwrite...
       
    62 	return fwrite(buffer, num, size, stream); // write num 1 byte elements
       
    63 }
       
    64 #endif
       
    65 
       
    66 using namespace std;
       
    67 
       
    68 typedef codecvt<wchar_t, char, mbstate_t> NullCodecvtBase;
       
    69 
       
    70 // CLASS NullCodecvt
       
    71 class NullCodecvt : public NullCodecvtBase
       
    72 	{
       
    73 public:
       
    74 	explicit NullCodecvt(size_t _R = 0)
       
    75 	: NullCodecvtBase(_R) {}
       
    76 
       
    77 protected:
       
    78 	virtual result do_in(mbstate_t& _State,
       
    79 						 const char *fromStart, const char *fromEnd, const char *& fromNext,
       
    80 						 wchar_t *toStart, wchar_t *toLimit, wchar_t *& toNext) const
       
    81 
       
    82 	{		
       
    83 		fromNext = fromStart;
       
    84 		toNext = toStart;
       
    85 
       
    86 		result r = noconv;
       
    87 
       
    88 		int fromBytes = fromEnd - fromStart;
       
    89 		int toWChars = toLimit - toStart;
       
    90 		while((fromBytes >= 2) && (toWChars >= 1))
       
    91 		{
       
    92 			wchar_t wch = (fromNext[0] << 8) | fromNext[1];
       
    93 			fromNext += 2;
       
    94 			fromBytes -= 2;
       
    95 
       
    96 			if(wch == '\r')
       
    97 			{
       
    98 				// Drop CR characters, so did at least partial conversion
       
    99 				r = partial;
       
   100 				continue;
       
   101 			}
       
   102 
       
   103 			*toNext++ = wch;
       
   104 			--toWChars;
       
   105 		}
       
   106 
       
   107 		if(r == partial)
       
   108 		{
       
   109 			// Dropped some CR characters
       
   110 			if(fromBytes == 0)
       
   111 			{
       
   112 				// and converted all input
       
   113 				r = ok;
       
   114 			}
       
   115 		}
       
   116 		return r;	
       
   117 	}
       
   118 
       
   119 	virtual result do_out(mbstate_t& _State,
       
   120 						  const wchar_t *fromStart, const wchar_t *fromEnd, const wchar_t *& fromNext,
       
   121 						  char *toStart, char *toLimit, char *& toNext) const
       
   122 	{
       
   123 		fromNext = fromStart;
       
   124 		toNext = toStart;
       
   125 
       
   126 		result r = noconv;
       
   127 #ifdef FIX_VC6_BROKEN_FSTREAM
       
   128 		r = partial;
       
   129 #endif
       
   130 
       
   131 		int fromWChars = fromEnd - fromStart;
       
   132 		int toBytes = toLimit - toStart;
       
   133 		while((fromWChars >= 1) && (toBytes >= 2))
       
   134 		{
       
   135 			if(*fromNext == '\r')
       
   136 			{
       
   137 				r = partial;
       
   138 				// Do not expect CR internally, skip
       
   139 				++fromNext;
       
   140 				--fromWChars;
       
   141 				continue;
       
   142 			}
       
   143 
       
   144 			if(*fromNext == '\n')
       
   145 			{
       
   146 				r = partial;
       
   147 				if(toBytes < 4)
       
   148 				{
       
   149 					// Not enough space!!!!
       
   150 					// Hopefully they will call us again with at least do_max_length() bytes (ie. 4)
       
   151 					break;
       
   152 				}
       
   153 				*toNext++ = 0x0d; --toBytes; // 16 bit CR (little endian)
       
   154 				*toNext++ = 0x00; --toBytes;
       
   155 				*toNext++ = 0x0a; --toBytes; // 16 bit NL
       
   156 				*toNext++ = 0x00; --toBytes;
       
   157 				++fromNext;
       
   158 				--fromWChars;
       
   159 				continue;
       
   160 			}
       
   161 	
       
   162 			*toNext++ = (*fromNext & 0x00ff); --toBytes; // little endian
       
   163 			*toNext++ = (*fromNext & 0xff00) >> 8; --toBytes;
       
   164 			++fromNext;
       
   165 			--fromWChars;
       
   166 			}
       
   167 		
       
   168 		if( (r==partial) && (fromWChars == 0))
       
   169 		{
       
   170 			r = ok;
       
   171 		}
       
   172 		return r;	
       
   173 	}
       
   174 
       
   175 	virtual result do_unshift(mbstate_t& _State,
       
   176 							  char *_F2, char *_L2, char *& _Mid2) const
       
   177 
       
   178 	{		return noconv;	}
       
   179 
       
   180 	virtual int do_length(mbstate_t& _State, const char *from,
       
   181 						  const char *fromEnd, size_t maxInternal) const _THROW0()
       
   182 	{
       
   183 		int conWChars = 0;
       
   184 		const char *fromNext = from;
       
   185 		while(((fromEnd-fromNext) >= 2) && (conWChars < maxInternal))
       
   186 		{
       
   187 			wchar_t wch = (fromNext[0] << 8) | fromNext[1];
       
   188 			fromNext += 2;
       
   189 		
       
   190 			if(wch == '\r')
       
   191 			{
       
   192 				// Drop CR characters, so did at least partial conversion
       
   193 				continue;
       
   194 			}
       
   195 
       
   196 			++conWChars;
       
   197 		}
       
   198 		return fromNext - from;
       
   199 	}
       
   200 
       
   201 	virtual bool do_always_noconv() const _THROW0()
       
   202 	{	
       
   203 		// Sometimes we do CRNL -> NL conversion
       
   204 		return false;	
       
   205 	}
       
   206 
       
   207 	virtual int do_max_length() const _THROW0()
       
   208 	{	
       
   209 		// This is documented as the max number of external chars (bytes) that could
       
   210 		// be consumed to create a single wchar_t.....
       
   211 		// We convert 1 to 1, except for dropping CRs. This probably only, at worst,
       
   212 		// converts 4 external bytes (16bit CR, 16bit NL) to a single 16 bit NL,
       
   213 		// but we could consume infinite chars....
       
   214 		// We will, at worst, convert a single wchar_t to 4 external chars (bytes)
       
   215 		return 4;
       
   216 	}
       
   217 
       
   218 	virtual int do_encoding() const _THROW0()
       
   219 	{		
       
   220 		return 0; // Variable length encodings
       
   221 	}
       
   222 
       
   223 	}; 
       
   224 
       
   225 
       
   226 #endif