charconvfw/charconv_fw/tools/convtool/utf.cpp
changeset 0 1fb32624e06b
child 64 f66674566702
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 /*
       
     2 * Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 #include <STDLIB.H>
       
    20 
       
    21 const int KErrorIllFormedInput=-1;
       
    22 
       
    23 int Utf8ToUnicode(wchar_t* aUnicode, const char* aUtf8)
       
    24 // must '\0'-terminate the output
       
    25 	{
       
    26 	wchar_t* startOfUnicode=aUnicode;
       
    27 	for (;;)
       
    28 		{
       
    29 		unsigned int currentUtf8Byte=*aUtf8;
       
    30 		if (currentUtf8Byte=='\0')
       
    31 			{
       
    32 			break;
       
    33 			}
       
    34 		if ((currentUtf8Byte&0x80)==0x00)
       
    35 			{
       
    36 			if (startOfUnicode!=NULL)
       
    37 				{
       
    38 				*aUnicode=(wchar_t)currentUtf8Byte;
       
    39 				}
       
    40 			}
       
    41 		else if ((currentUtf8Byte&0xe0)==0xc0)
       
    42 			{
       
    43 			unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x1f)<<6);
       
    44 			++aUtf8;
       
    45 			currentUtf8Byte=*aUtf8;
       
    46 			if ((currentUtf8Byte&0xc0)!=0x80)
       
    47 				{
       
    48 				return KErrorIllFormedInput;
       
    49 				}
       
    50 			currentUnicodeCharacter|=(currentUtf8Byte&0x3f);
       
    51 			if (startOfUnicode!=NULL)
       
    52 				{
       
    53 				*aUnicode=(wchar_t)currentUnicodeCharacter;
       
    54 				}
       
    55 			}
       
    56 		else if ((currentUtf8Byte&0xf0)==0xe0)
       
    57 			{
       
    58 			unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x0f)<<12);
       
    59 			++aUtf8;
       
    60 			currentUtf8Byte=*aUtf8;
       
    61 			if ((currentUtf8Byte&0xc0)!=0x80)
       
    62 				{
       
    63 				return KErrorIllFormedInput;
       
    64 				}
       
    65 			currentUnicodeCharacter|=((currentUtf8Byte&0x3f)<<6);
       
    66 			++aUtf8;
       
    67 			currentUtf8Byte=*aUtf8;
       
    68 			if ((currentUtf8Byte&0xc0)!=0x80)
       
    69 				{
       
    70 				return KErrorIllFormedInput;
       
    71 				}
       
    72 			currentUnicodeCharacter|=(currentUtf8Byte&0x3f);
       
    73 			if (startOfUnicode!=NULL)
       
    74 				{
       
    75 				*aUnicode=(wchar_t)currentUnicodeCharacter;
       
    76 				}
       
    77 			}
       
    78 		else if ((currentUtf8Byte&0xf8)==0xf0)
       
    79 			{
       
    80 			unsigned int currentUnicodeCharacter=((currentUtf8Byte&0x07)<<8);
       
    81 			++aUtf8;
       
    82 			currentUtf8Byte=*aUtf8;
       
    83 			if ((currentUtf8Byte&0xc0)!=0x80)
       
    84 				{
       
    85 				return KErrorIllFormedInput;
       
    86 				}
       
    87 			currentUnicodeCharacter|=((currentUtf8Byte&0x3f)<<2);
       
    88 			if (currentUnicodeCharacter<0x0040)
       
    89 				{
       
    90 				return KErrorIllFormedInput;
       
    91 				}
       
    92 			currentUnicodeCharacter-=0x0040;
       
    93 			if (currentUnicodeCharacter>=0x0400)
       
    94 				{
       
    95 				return KErrorIllFormedInput;
       
    96 				}
       
    97 			++aUtf8;
       
    98 			currentUtf8Byte=*aUtf8;
       
    99 			if ((currentUtf8Byte&0xc0)!=0x80)
       
   100 				{
       
   101 				return KErrorIllFormedInput;
       
   102 				}
       
   103 			currentUnicodeCharacter|=((currentUtf8Byte&0x30)>>4);
       
   104 			if (startOfUnicode!=NULL)
       
   105 				{
       
   106 				*aUnicode=(wchar_t)(0xd800|currentUnicodeCharacter);
       
   107 				}
       
   108 			currentUnicodeCharacter=((currentUtf8Byte&0x0f)<<6);
       
   109 			++aUtf8;
       
   110 			currentUtf8Byte=*aUtf8;
       
   111 			if ((currentUtf8Byte&0xc0)!=0x80)
       
   112 				{
       
   113 				return KErrorIllFormedInput;
       
   114 				}
       
   115 			currentUnicodeCharacter|=(currentUtf8Byte&0x3f);
       
   116 			++aUnicode;
       
   117 			if (startOfUnicode!=NULL)
       
   118 				{
       
   119 				*aUnicode=(wchar_t)(0xdc00|currentUnicodeCharacter);
       
   120 				}
       
   121 			}
       
   122 		else
       
   123 			{
       
   124 			return KErrorIllFormedInput;
       
   125 			}
       
   126 		++aUnicode;
       
   127 		++aUtf8;
       
   128 		}
       
   129 	if (startOfUnicode!=NULL)
       
   130 		{
       
   131 		*aUnicode='\0';
       
   132 		}
       
   133 	return aUnicode-startOfUnicode;
       
   134 	}
       
   135 #include <STDIO.H>
       
   136 int UnicodeToUtf8(char* aUtf8, const wchar_t* aUnicode)
       
   137 // must '\0'-terminate the output
       
   138 	{
       
   139 	char* startOfUtf8=aUtf8;
       
   140 	for (;;)
       
   141 		{
       
   142 		unsigned int currentUnicodeCharacter=*aUnicode;
       
   143 		if (currentUnicodeCharacter=='\0')
       
   144 			{
       
   145 			break;
       
   146 			}
       
   147 		if ((currentUnicodeCharacter&0xff80)==0x0000)
       
   148 			{
       
   149 			if (startOfUtf8!=NULL)
       
   150 				{
       
   151 				*aUtf8=(char)currentUnicodeCharacter;
       
   152 				}
       
   153 			}
       
   154 		else if ((currentUnicodeCharacter&0xf800)==0x0000)
       
   155 			{
       
   156 			if (startOfUtf8!=NULL)
       
   157 				{
       
   158 				*aUtf8=(char)(0xc0|(currentUnicodeCharacter>>6));
       
   159 				}
       
   160 			++aUtf8;
       
   161 			if (startOfUtf8!=NULL)
       
   162 				{
       
   163 				*aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f));
       
   164 				}
       
   165 			}
       
   166 		else if ((currentUnicodeCharacter&0xfc00)==0xd800)
       
   167 			{
       
   168 			currentUnicodeCharacter+=0x0040;
       
   169 			if (startOfUtf8!=NULL)
       
   170 				{
       
   171 				*aUtf8=(char)(0xf0|((currentUnicodeCharacter>>8)&0x07));
       
   172 				}
       
   173 			++aUtf8;
       
   174 			if (startOfUtf8!=NULL)
       
   175 				{
       
   176 				*aUtf8=(char)(0x80|((currentUnicodeCharacter>>2)&0x3f));
       
   177 				}
       
   178 			{
       
   179 			unsigned int currentUtf8Byte=(0x80|((currentUnicodeCharacter&0x03)<<4));
       
   180 			++aUnicode;
       
   181 			currentUnicodeCharacter=*aUnicode;
       
   182 			if ((currentUnicodeCharacter&0xfc00)!=0xdc00)
       
   183 				{
       
   184 				return KErrorIllFormedInput;
       
   185 				}
       
   186 			currentUtf8Byte|=((currentUnicodeCharacter>>6)&0x0f);
       
   187 			++aUtf8;
       
   188 			if (startOfUtf8!=NULL)
       
   189 				{
       
   190 				*aUtf8=(char)currentUtf8Byte;
       
   191 				}
       
   192 			}
       
   193 			++aUtf8;
       
   194 			if (startOfUtf8!=NULL)
       
   195 				{
       
   196 				*aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f));
       
   197 				}
       
   198 			}
       
   199 		else
       
   200 			{
       
   201 			if (startOfUtf8!=NULL)
       
   202 				{
       
   203 				*aUtf8=(char)(0xe0|(currentUnicodeCharacter>>12));
       
   204 				}
       
   205 			++aUtf8;
       
   206 			if (startOfUtf8!=NULL)
       
   207 				{
       
   208 				*aUtf8=(char)(0x80|((currentUnicodeCharacter>>6)&0x3f));
       
   209 				}
       
   210 			++aUtf8;
       
   211 			if (startOfUtf8!=NULL)
       
   212 				{
       
   213 				*aUtf8=(char)(0x80|(currentUnicodeCharacter&0x3f));
       
   214 				}
       
   215 			}
       
   216 		++aUtf8;
       
   217 		++aUnicode;
       
   218 		}
       
   219 	if (startOfUtf8!=NULL)
       
   220 		{
       
   221 		*aUtf8='\0';
       
   222 		}
       
   223 	return aUtf8-startOfUtf8;
       
   224 	}
       
   225