--- a/userlibandfileserver/fatfilenameconversionplugins/src/cp54936_unicodeconv.cpp Mon Dec 21 16:14:42 2009 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,461 +0,0 @@
-/*
-* Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
-* All rights reserved.
-* This component and the accompanying materials are made available
-* under the terms of "Eclipse Public License v1.0"
-* which accompanies this distribution, and is available
-* at the URL "http://www.eclipse.org/legal/epl-v10.html".
-*
-* Initial Contributors:
-* Nokia Corporation - initial contribution.
-*
-* Contributors:
-*
-* Description:
-*
-*/
-// There are 2 reasons why not use existing unicodeconv.cpp:
-// 1) "unicode->foreign" in existing unicodeconv.cpp is quite slow, especially
-// for huge code pages (e.g, Asia code pages). See INC127598.
-//
-// 2) GB18030 has 32-bit code that existing unicodeconv.cpp cannot handle.
-//
-// The algorithm of this special version unicodeconv.cpp is straightforward:
-// 1) foreign->unicode:
-// 1.1) 1 byte/2 byte->unicode bmp: use existing mechanism; mapping table in
-// "cp54936_2byte_tounicode.cpp", which is generated with command
-// "perl -w ..\group\FatConversionTable.pl cp54936_2byte.txt".
-//
-// 1.2) 4 byte->unicode bmp: convert the 4-byte code to a 16-bit index, then
-// search into the mapping table in "cp54936_4byte_tounicode.cpp",
-// which is generated with command
-// "perl -w ..\group\cp54936_4byte_tounicode.pl cp54936_4byte.txt".
-//
-// 1.3) 4 byte->unicode non-bmp: calculate with formula in this file.
-//
-// 2) unicode->foreign:
-// 2.1) unicode bmp->1/2/4 byte: the huge table in "cp54936_allbmp_fromunicode.cpp"
-// can map directly, which is generated with command
-// "perl -w ..\group\cp54936_allbmp_fromunicode.pl cp54936_2byte.txt cp54936_4byte.txt".
-//
-// 2.2) unicode non-bmp->4 byte: calculate with formula in this file.
-//
-// The function cp54936_2byte_tounicode.cpp::TConvDataStruct::
-// ConvertSingleUnicode() is not used anymore. It's reserved just because not
-// changing the tool FatConversionTable.pl.
-//
-// About the mapping table "cp54936_2byte.txt" and "cp54936_4byte.txt":
-// 1) All Private Used Area (PUA) code points are reserved.
-// 2) All GB18030 code points that mapping to undefined Unicode are reserved.
-//
-//
-// About the formula for non-bmp calculation:
-// 1) All code points from 0x10000 to 0x10FFFF are supported.
-// 2) Code points in 0x10000-0x1FFFF and 0x30000-0x10FFFF are summarized from
-// the GB18030 standard, since the standard does not define the mapping for
-// code points out of 0x20000-0x2FFFF.
-
-
-#include <e32std.h>
-#include <e32def.h>
-#include <e32des8.h>
-#include "unicodeconv.h"
-#include "cp54936.h"
-
-
-enum TFccPanic
- {
- EBadForeignCode = 0,
- E4ByteIndexOutOfRange,
- EPanicBadIndices1,
- EInavlidUnicodeValue
- };
-void Panic(TFccPanic aPanic)
- {
-
- User::Panic(_L("FatCharsetConv"),aPanic);
- }
-
-
-//replacement character to be used when unicode cannot be converted
-const TUint8 KForeignReplacement = 0x5F;
-
-const TUint8 KU10000Byte1 = 0x90;
-const TUint8 KU10000Byte2 = 0x30;
-const TUint8 KU10000Byte3 = 0x81;
-const TUint8 KU10000Byte4 = 0x30;
-
-inline TBool IsSupplementary(TUint aChar)
-/**
-@param aChar The 32-bit code point value of a Unicode character.
-
-@return True, if aChar is supplementary character; false, otherwise.
-*/
- {
- return (aChar > 0xFFFF);
- }
-
-inline TBool IsSurrogate(TText16 aInt16)
-/**
-@return True, if aText16 is high surrogate or low surrogate; false, otherwise.
-*/
- {
- return (aInt16 & 0xF800) == 0xD800;
- }
-
-inline TBool IsHighSurrogate(TText16 aInt16)
-/**
-@return True, if aText16 is high surrogate; false, otherwise.
-*/
- {
- return (aInt16 & 0xFC00) == 0xD800;
- }
-
-inline TBool IsLowSurrogate(TText16 aInt16)
-/**
-@return True, if aText16 is low surrogate; false, otherwise.
-*/
- {
- return (aInt16 & 0xFC00) == 0xDC00;
- }
-
-inline TUint JoinSurrogate(TText16 aHighSurrogate, TText16 aLowSurrogate)
-/**
-Combine a high surrogate and a low surrogate into a supplementary character.
-
-@return The 32-bit code point value of the generated Unicode supplementary
- character.
-*/
- {
- return ((aHighSurrogate - 0xD7F7) << 10) + aLowSurrogate;
- }
-
-inline TText16 GetHighSurrogate(TUint aChar)
-/**
-Retrieve the high surrogate of a supplementary character.
-
-@param aChar The 32-bit code point value of a Unicode character.
-
-@return High surrogate of aChar, if aChar is a supplementary character;
- aChar itself, if aChar is not a supplementary character.
-*/
- {
- return STATIC_CAST(TText16, 0xD7C0 + (aChar >> 10));
- }
-
-inline TText16 GetLowSurrogate(TUint aChar)
-/**
-Retrieve the low surrogate of a supplementary character.
-
-@param aChar The 32-bit code point value of a Unicode character.
-
-@return Low surrogate of aChar, if aChar is a supplementary character;
- zero, if aChar is not a supplementary character.
-*/
- {
- return STATIC_CAST(TText16, 0xDC00 | (aChar & 0x3FF));
- }
-
-//This function converts from Unicoded characters, to foreign characters and adds them into a descriptor
-EXPORT_C void UnicodeConv::ConvertFromUnicodeL(TDes8& aForeign, const TDesC16& aUnicode)
- {
- UnicodeConv::ConvertFromUnicodeL(aForeign, aUnicode, ETrue);
- }
-
-//This function converts from Unicoded characters, to foreign characters and adds them into a descriptor
-EXPORT_C TInt UnicodeConv::ConvertFromUnicodeL(TDes8& aForeign, const TDesC16& aUnicode, TBool leaveWhenOverflow)
- {
- const TInt length = aUnicode.Length();
- const TUint16* unicode = aUnicode.Ptr();
- const TUint16* guard = unicode + length;
-
- TUint8* foreign = const_cast<TUint8*>(aForeign.Ptr());
- TUint8* foreignguard = foreign + aForeign.MaxLength();
-
- //loop going through the character of the unicode descriptor
- while (unicode < guard)
- {
- TUint32 unicodeChar = *unicode++;
- if (IsHighSurrogate(unicodeChar))
- {
- if (unicode >= guard || !IsLowSurrogate(*unicode))
- {
- if (foreign >= foreignguard)
- {
- aForeign.SetLength(foreign-aForeign.Ptr());
- if (leaveWhenOverflow)
- User::Leave(KErrOverflow);
- else
- return KErrOverflow;
- }
- *foreign++ = KForeignReplacement;
- continue;
- }
- unicodeChar = JoinSurrogate(unicodeChar, *unicode++);
- }
- if (IsLowSurrogate(unicodeChar))
- {
- if (foreign >= foreignguard)
- {
- aForeign.SetLength(foreign-aForeign.Ptr());
- if (leaveWhenOverflow)
- User::Leave(KErrOverflow);
- else
- return KErrOverflow;
- }
- *foreign++ = KForeignReplacement;
- continue;
- }
-
- TUint8 b1, b2, b3, b4; // byte 1,2,3,4 of result GB18030 code.
- TInt count; // byte count of result GB18030 code; can be 1, 2 or 4.
-
- // unicode to cp54936
- if (IsSupplementary(unicodeChar))
- {
- unicodeChar -= 0x10000;
- b4 = unicodeChar % 10 + KU10000Byte4;
- unicodeChar /= 10;
- b3 = unicodeChar % 126 + KU10000Byte3;
- unicodeChar /= 126;
- b2 = unicodeChar % 10 + KU10000Byte2;
- b1 = unicodeChar / 10 + KU10000Byte1;
- count = 4;
- }
- else
- {
- TUint32 foreignChar;
- foreignChar = KMappingTableUnicodeBmp2CP54936[unicodeChar];
- b1 = ((foreignChar >> 24) & 0xFF);
- b2 = ((foreignChar >> 16) & 0xFF);
- b3 = ((foreignChar >> 8) & 0xFF);
- b4 = (foreignChar & 0xFF);
- count = 1;
- if (b1)
- {
- count = 4;
- }
- else
- {
- __ASSERT_DEBUG(b2==0, Panic(EBadForeignCode));
- if (b3)
- {
- count = 2;
- }
- }
- }
-
- if (foreign + count > foreignguard)
- {
- aForeign.SetLength(foreign-aForeign.Ptr());
- if (leaveWhenOverflow)
- User::Leave(KErrOverflow);
- else
- return KErrOverflow;
- }
- if (count == 4)
- {
- *foreign++ = b1;
- *foreign++ = b2;
- }
- if (count >= 2)
- *foreign++ = b3;
- *foreign++ = b4;
- }
- aForeign.SetLength(foreign-aForeign.Ptr());
- return KErrNone;
- }
-
-
-//This function converts from foreign characters into unicode and adds them into a descriptor
-EXPORT_C void UnicodeConv::ConvertToUnicodeL(TDes16& aUnicode, const TDesC8& aForeign)
- {
- UnicodeConv::ConvertToUnicodeL(aUnicode, aForeign, ETrue);
- }
-
-//This function converts from foreign characters into unicode and adds them into a descriptor
-EXPORT_C TInt UnicodeConv::ConvertToUnicodeL(TDes16& aUnicode, const TDesC8& aForeign, TBool leaveWhenOverflow)
- {
- const TInt foreignLength = aForeign.Length();
- const TUint8* foreign = aForeign.Ptr();
- const TUint8* guard = foreign + foreignLength;
-
- TUint16* unicode = const_cast<TUint16*>(aUnicode.Ptr());
- TUint16* unicodeguard = unicode + aUnicode.MaxLength();
-
- TUint8 b1, b2, b3, b4;
- enum TCodeType
- {
- E1Byte = 0,
- E2Byte,
- E4ByteBmp,
- E4ByteSupplementary,
- EError,
- };
- TCodeType codetype;
- TUint32 unicodeChar;
-
- //loop going through the characters of the foreign descriptor
- while (foreign < guard)
- {
- // roughly, detect which area the foreign code belongs to
- b1 = *foreign++;
- if (b1 <= 0x7F)
- codetype = E1Byte;
- else if (b1 == 0x80 || b1 > 0xFE)
- codetype = EError;
- else if (foreign >= guard)
- codetype = EError;
- else
- {
- b2 = *foreign++;
- if (b2 >= 0x40 && b2 <= 0xFE && b2 != 0x7F)
- codetype = E2Byte;
- else if (b2 < 0x30 || b2 > 0x39)
- codetype = EError;
- else if (foreign+1 >= guard)
- codetype = EError;
- else
- {
- b3 = *foreign++;
- if (b3 < 0x81 || b3 > 0xFE)
- codetype = EError;
- else
- {
- b4 = *foreign++;
- if (b4 < 0x30 || b4 > 0x39)
- codetype = EError;
- else if (b1 >= 0x81 && b1 <= 0x84) // 0x81308130-0x8439FE39
- codetype = E4ByteBmp;
- else if (b1 >= 0x90 && b1 <= 0xE3) // 0x90308130-0xE339FE39
- codetype = E4ByteSupplementary;
- else
- codetype = EError; // others are reserved
- }
- }
- }
-
- // cp54936 to unicode
- if (codetype == E1Byte)
- {
- unicodeChar = b1;
- }
- else if (codetype == E2Byte)
- {
- // conventional algorithm used in FatCharsetConv
- const TLeadOrSingle* structPtr = TConvDataStruct::KFirstByteConversions + (b1-0x80);
- if (structPtr->iUnicodeIfSingle)
- unicodeChar = structPtr->iUnicodeIfSingle;
- else if (TConvDataStruct::KMinTrailByte <= b2 && b2 <= TConvDataStruct::KMaxTrailByte)
- unicodeChar = TConvDataStruct::KDoubleByteConversions[structPtr->iDoubleByteIndex + (b2 - TConvDataStruct::KMinTrailByte)];
- else
- unicodeChar = 0xFFFD;
- }
- else if (codetype == E4ByteBmp)
- {
- TUint index = (b1-0x81)*12600 + (b2-0x30)*1260 + (b3-0x81)*10 + (b4-0x30);
- __ASSERT_DEBUG(index<39420, Panic(E4ByteIndexOutOfRange));
- unicodeChar = KMappingTable4ByteBmp2Unicode[index];
- }
- else if (codetype == E4ByteSupplementary)
- {
- unicodeChar = 0x10000 + (b1 - KU10000Byte1) * 12600 +
- (b2 - KU10000Byte2) * 1260 +
- (b3 - KU10000Byte3) * 10 +
- (b4 - KU10000Byte4);
- __ASSERT_DEBUG(unicodeChar >= 0x10000 && unicodeChar <= 0x10FFFF, Panic(EInavlidUnicodeValue));
- }
- else
- {
- unicodeChar = 0xFFFD;
- }
-
- // append to output buffer
- if (IsSupplementary(unicodeChar))
- {
- if (unicode + 1 >= unicodeguard)
- {
- aUnicode.SetLength(unicode-aUnicode.Ptr());
- if (leaveWhenOverflow)
- User::Leave(KErrOverflow);
- else
- return KErrOverflow;
- }
- *unicode++ = GetHighSurrogate(unicodeChar);
- *unicode++ = GetLowSurrogate(unicodeChar);
- }
- else
- {
- if (unicode >= unicodeguard)
- {
- aUnicode.SetLength(unicode-aUnicode.Ptr());
- if (leaveWhenOverflow)
- User::Leave(KErrOverflow);
- else
- return KErrOverflow;
- }
- *unicode++ = unicodeChar;
- }
- }
- aUnicode.SetLength(unicode-aUnicode.Ptr());
- return KErrNone;
- }
-
-EXPORT_C TBool UnicodeConv::IsLegalShortNameCharacter (TUint aCharacter)
- {
- //1. aCharacter >= 0x0080
- if (aCharacter>=0x0080)
- {
- // Since all Unicode characters can be mapped to GB18030, so no need to
- // test the converting.
- if (aCharacter <= 0x10FFFF && !IsSurrogate(aCharacter))
- return ETrue;
- else
- return EFalse;
- }
-
- // For most common cases:
- // Note: lower case characters are considered legal DOS char here.
- if ((aCharacter>='a' && aCharacter<='z') ||
- (aCharacter>='A' && aCharacter<='Z') ||
- (aCharacter>='0' && aCharacter<='9'))
- {
- return ETrue;
- }
- // Checking for illegal chars:
- // 2. aCharacter <= 0x20
- // Note: leading 0x05 byte should be guarded by callers of this function
- // as the information of the position of the character is required.
- if (aCharacter < 0x20)
- return EFalse;
- // Space (' ') is not considered as a legal DOS char here.
- if (aCharacter == 0x20)
- return EFalse;
-
- // 3. 0x20 < aCharacter < 0x80
- // According to FAT Spec, "following characters are not legal in any bytes of DIR_Name":
- switch (aCharacter)
- {
- case 0x22: // '"'
- case 0x2A: // '*'
- case 0x2B: // '+'
- case 0x2C: // ','
- //case 0x2E: // '.' // Although '.' is not allowed in any bytes of DIR_Name, it
- // is a valid character in short file names.
- case 0x2F: // '/'
- case 0x3A: // ':'
- case 0x3B: // ';'
- case 0x3C: // '<'
- case 0x3D: // '='
- case 0x3E: // '>'
- case 0x3F: // '?'
- case 0x5B: // '['
- case 0x5C: // '\'
- case 0x5D: // ']'
- case 0x7C: // '|'
- return EFalse;
- default:
- return ETrue;
- }
- }
-