Trying to figure out how to implement my WINC like compatibility layer. Going the emulation way is probably not so smart. We should not use the kernel but rather hook native functions in the Exec calls.
// Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
// All rights reserved.
// This component and the accompanying materials are made available
// under the terms of the License "Eclipse Public License v1.0"
// which accompanies this distribution, and is available
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
//
// Initial Contributors:
// Nokia Corporation - initial contribution.
//
// Contributors:
//
// Description:
//
// There are 2 reasons why not use existing unicodeconv.cpp:
// 1) "unicode->foreign" in existing unicodeconv.cpp is quite slow, especially
// for huge code pages (e.g, Asia code pages). See INC127598.
//
// 2) GB18030 has 32-bit code that existing unicodeconv.cpp cannot handle.
//
// The algorithm of this special version unicodeconv.cpp is straightforward:
// 1) foreign->unicode:
// 1.1) 1 byte/2 byte->unicode bmp: use existing mechanism; mapping table in
// "cp54936_2byte_tounicode.cpp", which is generated with command
// "perl -w ..\group\FatConversionTable.pl cp54936_2byte.txt".
//
// 1.2) 4 byte->unicode bmp: convert the 4-byte code to a 16-bit index, then
// search into the mapping table in "cp54936_4byte_tounicode.cpp",
// which is generated with command
// "perl -w ..\group\cp54936_4byte_tounicode.pl cp54936_4byte.txt".
//
// 1.3) 4 byte->unicode non-bmp: calculate with formula in this file.
//
// 2) unicode->foreign:
// 2.1) unicode bmp->1/2/4 byte: the huge table in "cp54936_allbmp_fromunicode.cpp"
// can map directly, which is generated with command
// "perl -w ..\group\cp54936_allbmp_fromunicode.pl cp54936_2byte.txt cp54936_4byte.txt".
//
// 2.2) unicode non-bmp->4 byte: calculate with formula in this file.
//
// The function cp54936_2byte_tounicode.cpp::TConvDataStruct::
// ConvertSingleUnicode() is not used anymore. It's reserved just because not
// changing the tool FatConversionTable.pl.
//
// About the mapping table "cp54936_2byte.txt" and "cp54936_4byte.txt":
// 1) All Private Used Area (PUA) code points are reserved.
// 2) All GB18030 code points that mapping to undefined Unicode are reserved.
//
//
// About the formula for non-bmp calculation:
// 1) All code points from 0x10000 to 0x10FFFF are supported.
// 2) Code points in 0x10000-0x1FFFF and 0x30000-0x10FFFF are summarized from
// the GB18030 standard, since the standard does not define the mapping for
// code points out of 0x20000-0x2FFFF.
#include <e32std.h>
#include <e32def.h>
#include <e32des8.h>
#include "unicodeconv.h"
#include "cp54936.h"
enum TFccPanic
{
EBadForeignCode = 0,
E4ByteIndexOutOfRange,
EPanicBadIndices1,
EInavlidUnicodeValue
};
void Panic(TFccPanic aPanic)
{
User::Panic(_L("FatCharsetConv"),aPanic);
}
//replacement character to be used when unicode cannot be converted
const TUint8 KForeignReplacement = 0x5F;
const TUint8 KU10000Byte1 = 0x90;
const TUint8 KU10000Byte2 = 0x30;
const TUint8 KU10000Byte3 = 0x81;
const TUint8 KU10000Byte4 = 0x30;
inline TBool IsSupplementary(TUint aChar)
/**
@param aChar The 32-bit code point value of a Unicode character.
@return True, if aChar is supplementary character; false, otherwise.
*/
{
return (aChar > 0xFFFF);
}
inline TBool IsSurrogate(TText16 aInt16)
/**
@return True, if aText16 is high surrogate or low surrogate; false, otherwise.
*/
{
return (aInt16 & 0xF800) == 0xD800;
}
inline TBool IsHighSurrogate(TText16 aInt16)
/**
@return True, if aText16 is high surrogate; false, otherwise.
*/
{
return (aInt16 & 0xFC00) == 0xD800;
}
inline TBool IsLowSurrogate(TText16 aInt16)
/**
@return True, if aText16 is low surrogate; false, otherwise.
*/
{
return (aInt16 & 0xFC00) == 0xDC00;
}
inline TUint JoinSurrogate(TText16 aHighSurrogate, TText16 aLowSurrogate)
/**
Combine a high surrogate and a low surrogate into a supplementary character.
@return The 32-bit code point value of the generated Unicode supplementary
character.
*/
{
return ((aHighSurrogate - 0xD7F7) << 10) + aLowSurrogate;
}
inline TText16 GetHighSurrogate(TUint aChar)
/**
Retrieve the high surrogate of a supplementary character.
@param aChar The 32-bit code point value of a Unicode character.
@return High surrogate of aChar, if aChar is a supplementary character;
aChar itself, if aChar is not a supplementary character.
*/
{
return STATIC_CAST(TText16, 0xD7C0 + (aChar >> 10));
}
inline TText16 GetLowSurrogate(TUint aChar)
/**
Retrieve the low surrogate of a supplementary character.
@param aChar The 32-bit code point value of a Unicode character.
@return Low surrogate of aChar, if aChar is a supplementary character;
zero, if aChar is not a supplementary character.
*/
{
return STATIC_CAST(TText16, 0xDC00 | (aChar & 0x3FF));
}
//This function converts from Unicoded characters, to foreign characters and adds them into a descriptor
EXPORT_C void UnicodeConv::ConvertFromUnicodeL(TDes8& aForeign, const TDesC16& aUnicode)
{
UnicodeConv::ConvertFromUnicodeL(aForeign, aUnicode, ETrue);
}
//This function converts from Unicoded characters, to foreign characters and adds them into a descriptor
EXPORT_C TInt UnicodeConv::ConvertFromUnicodeL(TDes8& aForeign, const TDesC16& aUnicode, TBool leaveWhenOverflow)
{
const TInt length = aUnicode.Length();
const TUint16* unicode = aUnicode.Ptr();
const TUint16* guard = unicode + length;
TUint8* foreign = const_cast<TUint8*>(aForeign.Ptr());
TUint8* foreignguard = foreign + aForeign.MaxLength();
//loop going through the character of the unicode descriptor
while (unicode < guard)
{
TUint32 unicodeChar = *unicode++;
if (IsHighSurrogate(unicodeChar))
{
if (unicode >= guard || !IsLowSurrogate(*unicode))
{
if (foreign >= foreignguard)
{
aForeign.SetLength(foreign-aForeign.Ptr());
if (leaveWhenOverflow)
User::Leave(KErrOverflow);
else
return KErrOverflow;
}
*foreign++ = KForeignReplacement;
continue;
}
unicodeChar = JoinSurrogate(unicodeChar, *unicode++);
}
if (IsLowSurrogate(unicodeChar))
{
if (foreign >= foreignguard)
{
aForeign.SetLength(foreign-aForeign.Ptr());
if (leaveWhenOverflow)
User::Leave(KErrOverflow);
else
return KErrOverflow;
}
*foreign++ = KForeignReplacement;
continue;
}
TUint8 b1, b2, b3, b4; // byte 1,2,3,4 of result GB18030 code.
TInt count; // byte count of result GB18030 code; can be 1, 2 or 4.
// unicode to cp54936
if (IsSupplementary(unicodeChar))
{
unicodeChar -= 0x10000;
b4 = unicodeChar % 10 + KU10000Byte4;
unicodeChar /= 10;
b3 = unicodeChar % 126 + KU10000Byte3;
unicodeChar /= 126;
b2 = unicodeChar % 10 + KU10000Byte2;
b1 = unicodeChar / 10 + KU10000Byte1;
count = 4;
}
else
{
TUint32 foreignChar;
foreignChar = KMappingTableUnicodeBmp2CP54936[unicodeChar];
b1 = ((foreignChar >> 24) & 0xFF);
b2 = ((foreignChar >> 16) & 0xFF);
b3 = ((foreignChar >> 8) & 0xFF);
b4 = (foreignChar & 0xFF);
count = 1;
if (b1)
{
count = 4;
}
else
{
__ASSERT_DEBUG(b2==0, Panic(EBadForeignCode));
if (b3)
{
count = 2;
}
}
}
if (foreign + count > foreignguard)
{
aForeign.SetLength(foreign-aForeign.Ptr());
if (leaveWhenOverflow)
User::Leave(KErrOverflow);
else
return KErrOverflow;
}
if (count == 4)
{
*foreign++ = b1;
*foreign++ = b2;
}
if (count >= 2)
*foreign++ = b3;
*foreign++ = b4;
}
aForeign.SetLength(foreign-aForeign.Ptr());
return KErrNone;
}
//This function converts from foreign characters into unicode and adds them into a descriptor
EXPORT_C void UnicodeConv::ConvertToUnicodeL(TDes16& aUnicode, const TDesC8& aForeign)
{
UnicodeConv::ConvertToUnicodeL(aUnicode, aForeign, ETrue);
}
//This function converts from foreign characters into unicode and adds them into a descriptor
EXPORT_C TInt UnicodeConv::ConvertToUnicodeL(TDes16& aUnicode, const TDesC8& aForeign, TBool leaveWhenOverflow)
{
const TInt foreignLength = aForeign.Length();
const TUint8* foreign = aForeign.Ptr();
const TUint8* guard = foreign + foreignLength;
TUint16* unicode = const_cast<TUint16*>(aUnicode.Ptr());
TUint16* unicodeguard = unicode + aUnicode.MaxLength();
TUint8 b1, b2, b3, b4;
enum TCodeType
{
E1Byte = 0,
E2Byte,
E4ByteBmp,
E4ByteSupplementary,
EError,
};
TCodeType codetype;
TUint32 unicodeChar;
//loop going through the characters of the foreign descriptor
while (foreign < guard)
{
// roughly, detect which area the foreign code belongs to
b1 = *foreign++;
if (b1 <= 0x7F)
codetype = E1Byte;
else if (b1 == 0x80 || b1 > 0xFE)
codetype = EError;
else if (foreign >= guard)
codetype = EError;
else
{
b2 = *foreign++;
if (b2 >= 0x40 && b2 <= 0xFE && b2 != 0x7F)
codetype = E2Byte;
else if (b2 < 0x30 || b2 > 0x39)
codetype = EError;
else if (foreign+1 >= guard)
codetype = EError;
else
{
b3 = *foreign++;
if (b3 < 0x81 || b3 > 0xFE)
codetype = EError;
else
{
b4 = *foreign++;
if (b4 < 0x30 || b4 > 0x39)
codetype = EError;
else if (b1 >= 0x81 && b1 <= 0x84) // 0x81308130-0x8439FE39
codetype = E4ByteBmp;
else if (b1 >= 0x90 && b1 <= 0xE3) // 0x90308130-0xE339FE39
codetype = E4ByteSupplementary;
else
codetype = EError; // others are reserved
}
}
}
// cp54936 to unicode
if (codetype == E1Byte)
{
unicodeChar = b1;
}
else if (codetype == E2Byte)
{
// conventional algorithm used in FatCharsetConv
const TLeadOrSingle* structPtr = TConvDataStruct::KFirstByteConversions + (b1-0x80);
if (structPtr->iUnicodeIfSingle)
unicodeChar = structPtr->iUnicodeIfSingle;
else if (TConvDataStruct::KMinTrailByte <= b2 && b2 <= TConvDataStruct::KMaxTrailByte)
unicodeChar = TConvDataStruct::KDoubleByteConversions[structPtr->iDoubleByteIndex + (b2 - TConvDataStruct::KMinTrailByte)];
else
unicodeChar = 0xFFFD;
}
else if (codetype == E4ByteBmp)
{
TUint index = (b1-0x81)*12600 + (b2-0x30)*1260 + (b3-0x81)*10 + (b4-0x30);
__ASSERT_DEBUG(index<39420, Panic(E4ByteIndexOutOfRange));
unicodeChar = KMappingTable4ByteBmp2Unicode[index];
}
else if (codetype == E4ByteSupplementary)
{
unicodeChar = 0x10000 + (b1 - KU10000Byte1) * 12600 +
(b2 - KU10000Byte2) * 1260 +
(b3 - KU10000Byte3) * 10 +
(b4 - KU10000Byte4);
__ASSERT_DEBUG(unicodeChar >= 0x10000 && unicodeChar <= 0x10FFFF, Panic(EInavlidUnicodeValue));
}
else
{
unicodeChar = 0xFFFD;
}
// append to output buffer
if (IsSupplementary(unicodeChar))
{
if (unicode + 1 >= unicodeguard)
{
aUnicode.SetLength(unicode-aUnicode.Ptr());
if (leaveWhenOverflow)
User::Leave(KErrOverflow);
else
return KErrOverflow;
}
*unicode++ = GetHighSurrogate(unicodeChar);
*unicode++ = GetLowSurrogate(unicodeChar);
}
else
{
if (unicode >= unicodeguard)
{
aUnicode.SetLength(unicode-aUnicode.Ptr());
if (leaveWhenOverflow)
User::Leave(KErrOverflow);
else
return KErrOverflow;
}
*unicode++ = unicodeChar;
}
}
aUnicode.SetLength(unicode-aUnicode.Ptr());
return KErrNone;
}
EXPORT_C TBool UnicodeConv::IsLegalShortNameCharacter (TUint aCharacter)
{
//1. aCharacter >= 0x0080
if (aCharacter>=0x0080)
{
// Since all Unicode characters can be mapped to GB18030, so no need to
// test the converting.
if (aCharacter <= 0x10FFFF && !IsSurrogate(aCharacter))
return ETrue;
else
return EFalse;
}
// For most common cases:
// Note: lower case characters are considered legal DOS char here.
if ((aCharacter>='a' && aCharacter<='z') ||
(aCharacter>='A' && aCharacter<='Z') ||
(aCharacter>='0' && aCharacter<='9'))
{
return ETrue;
}
// Checking for illegal chars:
// 2. aCharacter <= 0x20
// Note: leading 0x05 byte should be guarded by callers of this function
// as the information of the position of the character is required.
if (aCharacter < 0x20)
return EFalse;
// Space (' ') is not considered as a legal DOS char here.
if (aCharacter == 0x20)
return EFalse;
// 3. 0x20 < aCharacter < 0x80
// According to FAT Spec, "following characters are not legal in any bytes of DIR_Name":
switch (aCharacter)
{
case 0x22: // '"'
case 0x2A: // '*'
case 0x2B: // '+'
case 0x2C: // ','
//case 0x2E: // '.' // Although '.' is not allowed in any bytes of DIR_Name, it
// is a valid character in short file names.
case 0x2F: // '/'
case 0x3A: // ':'
case 0x3B: // ';'
case 0x3C: // '<'
case 0x3D: // '='
case 0x3E: // '>'
case 0x3F: // '?'
case 0x5B: // '['
case 0x5C: // '\'
case 0x5D: // ']'
case 0x7C: // '|'
return EFalse;
default:
return ETrue;
}
}