diff -r 000000000000 -r 1fb32624e06b charconvfw/charconvplugins/tools/UTF.PM --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/charconvfw/charconvplugins/tools/UTF.PM Tue Feb 02 02:02:46 2010 +0200 @@ -0,0 +1,200 @@ +# +# Copyright (c) 2000-2009 Nokia Corporation and/or its subsidiary(-ies). +# All rights reserved. +# This component and the accompanying materials are made available +# under the terms of "Eclipse Public License v1.0" +# which accompanies this distribution, and is available +# at the URL "http://www.eclipse.org/legal/epl-v10.html". +# +# Initial Contributors: +# Nokia Corporation - initial contribution. +# +# Contributors: +# +# Description: +# + +use strict; +use integer; + +package UTF; +require Exporter; +@UTF::ISA=qw(Exporter); +@UTF::EXPORT=qw(Utf8ToUnicode UnicodeToUtf8); + +my $KErrorIllFormedInput=-1; + +sub Utf8ToUnicode + { + my $Unicode = shift; + my $Utf8 = shift; + my $UnicodeTemplate = shift; + my $Utf8Index = 0; + my $UnicodeIndex = 0; + my $numOfBytes = length($Utf8); + my @Utf8Unpacked = unpack "C*",$Utf8; + my @UnicodeUnpacked = (); + + for (;;) + { + if ($Utf8Index > $#Utf8Unpacked) + { + last; + } + + my $currentUtf8Byte = $Utf8Unpacked[$Utf8Index]; + + if (($currentUtf8Byte&0x80)==0x00) + { + $UnicodeUnpacked[$UnicodeIndex] = $currentUtf8Byte; + } + + elsif (($currentUtf8Byte&0xe0)==0xc0) + { + my $currentUnicodeCharacter=(($currentUtf8Byte&0x1f)<<6); + ++$Utf8Index; + $currentUtf8Byte=$Utf8Unpacked[$Utf8Index]; + if (($currentUtf8Byte&0xc0)!=0x80) + { + return $KErrorIllFormedInput; + } + $currentUnicodeCharacter|=($currentUtf8Byte&0x3f); + $UnicodeUnpacked[$UnicodeIndex] = $currentUnicodeCharacter; + } + + elsif (($currentUtf8Byte&0xf0)==0xe0) + { + my $currentUnicodeCharacter=(($currentUtf8Byte&0x0f)<<12); + ++$Utf8Index; + $currentUtf8Byte=$Utf8Unpacked[$Utf8Index]; + if (($currentUtf8Byte&0xc0)!=0x80) + { + return $KErrorIllFormedInput; + } + $currentUnicodeCharacter|=(($currentUtf8Byte&0x3f)<<6); + ++$Utf8Index; + $currentUtf8Byte=$Utf8Unpacked[$Utf8Index]; + if (($currentUtf8Byte&0xc0)!=0x80) + { + return $KErrorIllFormedInput; + } + $currentUnicodeCharacter|=($currentUtf8Byte&0x3f); + $UnicodeUnpacked[$UnicodeIndex] = $currentUnicodeCharacter; + } + + elsif (($currentUtf8Byte&0xf8)==0xf0) + { + my $currentUnicodeCharacter=(($currentUtf8Byte&0x07)<<8); + ++$Utf8Index; + $currentUtf8Byte=$Utf8Unpacked[$Utf8Index]; + if (($currentUtf8Byte&0xc0)!=0x80) + { + return $KErrorIllFormedInput; + } + $currentUnicodeCharacter|=(($currentUtf8Byte&0x3f)<<2); + if ($currentUnicodeCharacter<0x0040) + { + return $KErrorIllFormedInput; + } + $currentUnicodeCharacter-=0x0040; + if ($currentUnicodeCharacter>=0x0400) + { + return $KErrorIllFormedInput; + } + ++$Utf8Index; + $currentUtf8Byte=$Utf8Unpacked[$Utf8Index]; + if (($currentUtf8Byte&0xc0)!=0x80) + { + return $KErrorIllFormedInput; + } + $currentUnicodeCharacter|=(($currentUtf8Byte&0x30)>>4); + $UnicodeUnpacked[$UnicodeIndex] = (0xd800|$currentUnicodeCharacter); + $currentUnicodeCharacter=(($currentUtf8Byte&0x0f)<<6); + ++$Utf8Index; + $currentUtf8Byte=$Utf8Unpacked[$Utf8Index]; + if (($currentUtf8Byte&0xc0)!=0x80) + { + return $KErrorIllFormedInput; + } + $currentUnicodeCharacter|=($currentUtf8Byte&0x3f); + ++$UnicodeIndex; + $UnicodeUnpacked[$UnicodeIndex] = (0xdc00|$currentUnicodeCharacter); + } + else + { + return $KErrorIllFormedInput; + } + ++$UnicodeIndex; + ++$Utf8Index; + } + $$Unicode = (); + $$Unicode = pack "$UnicodeTemplate*", @UnicodeUnpacked; + return $UnicodeIndex; + } + +sub UnicodeToUtf8 + { + my $Utf8 = shift; + my $Unicode = shift; + my $UnicodeTemplate = shift; + my $Utf8Index = 0; + my $UnicodeIndex = 0; + my $numOfBytes = length($Unicode); + my @UnicodeUnpacked = unpack "$UnicodeTemplate*", $Unicode; + my @Utf8Unpacked = (); + + for (;;) + { + # exit the loop if no more in the UnicodeUnpacked + if ($UnicodeIndex > $#UnicodeUnpacked) + { + last; + } + + my $currentUnicodeCharacter=$UnicodeUnpacked[$UnicodeIndex]; + if (($currentUnicodeCharacter&0xff80)==0x0000) + { + $Utf8Unpacked[$Utf8Index]= $currentUnicodeCharacter; + } + elsif (($currentUnicodeCharacter&0xf800)==0x0000) + { + + $Utf8Unpacked[$Utf8Index]= (0xc0 | $currentUnicodeCharacter >> 6); + ++$Utf8Index; + $Utf8Unpacked[$Utf8Index]= (0x80 | $currentUnicodeCharacter&0x3f); + } + elsif (($currentUnicodeCharacter&0xfc00)==0xd800) + { + $currentUnicodeCharacter+=0x0040; + $Utf8Unpacked[$Utf8Index]= (0xf0|(($currentUnicodeCharacter>>8)&0x07)); + ++$Utf8Index; + $Utf8Unpacked[$Utf8Index]= (0x80|(($currentUnicodeCharacter>>2)&0x3f)); + my $currentUtf8Byte=(0x80|(($currentUnicodeCharacter&0x03)<<4)); + ++$UnicodeIndex; + $currentUnicodeCharacter=$UnicodeUnpacked[$UnicodeIndex]; + if (($currentUnicodeCharacter&0xfc00)!=0xdc00) + { + return $KErrorIllFormedInput; + } + $currentUtf8Byte|=(($currentUnicodeCharacter>>6)&0x0f); + ++$Utf8Index; + $Utf8Unpacked[$Utf8Index]= $currentUtf8Byte; + ++$Utf8Index; + $Utf8Unpacked[$Utf8Index]= (0x80| ($currentUnicodeCharacter&0x3f)); + } + else + { + $Utf8Unpacked[$Utf8Index]= (0xe0|($currentUnicodeCharacter>>12)); + ++$Utf8Index; + $Utf8Unpacked[$Utf8Index]= (0x80|(($currentUnicodeCharacter>>6)&0x3f)); + ++$Utf8Index; + $Utf8Unpacked[$Utf8Index]= (0x80| ($currentUnicodeCharacter&0x3f)); + } + ++$Utf8Index; + ++$UnicodeIndex; + } + $$Utf8 = (); + $$Utf8 = pack "C*", @Utf8Unpacked; + return $Utf8Index; + + }