charconvfw/charconvplugins/tools/UTF.PM
changeset 0 1fb32624e06b
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/charconvfw/charconvplugins/tools/UTF.PM	Tue Feb 02 02:02:46 2010 +0200
@@ -0,0 +1,200 @@
+#
+# Copyright (c) 2000-2009 Nokia Corporation and/or its subsidiary(-ies).
+# All rights reserved.
+# This component and the accompanying materials are made available
+# under the terms of "Eclipse Public License v1.0"
+# which accompanies this distribution, and is available
+# at the URL "http://www.eclipse.org/legal/epl-v10.html".
+#
+# Initial Contributors:
+# Nokia Corporation - initial contribution.
+#
+# Contributors:
+#
+# Description: 
+#
+
+use strict;
+use integer;
+
+package UTF;
+require Exporter;
+@UTF::ISA=qw(Exporter);
+@UTF::EXPORT=qw(Utf8ToUnicode UnicodeToUtf8);
+
+my $KErrorIllFormedInput=-1;
+
+sub Utf8ToUnicode
+	{
+	my $Unicode = shift;  
+	my $Utf8 = shift;
+	my $UnicodeTemplate = shift;
+	my $Utf8Index = 0;
+	my $UnicodeIndex = 0;
+	my $numOfBytes = length($Utf8);
+	my @Utf8Unpacked = unpack "C*",$Utf8;
+	my @UnicodeUnpacked = (); 
+
+	for (;;)
+		{
+		if ($Utf8Index > $#Utf8Unpacked)
+			{
+			last;
+			}
+
+		my $currentUtf8Byte = $Utf8Unpacked[$Utf8Index];
+		
+		if (($currentUtf8Byte&0x80)==0x00)
+			{
+			$UnicodeUnpacked[$UnicodeIndex] = $currentUtf8Byte;
+			}
+		
+		elsif (($currentUtf8Byte&0xe0)==0xc0)
+			{
+			my $currentUnicodeCharacter=(($currentUtf8Byte&0x1f)<<6);
+			++$Utf8Index;
+			$currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
+			if (($currentUtf8Byte&0xc0)!=0x80)
+				{
+				return $KErrorIllFormedInput;
+				}
+			$currentUnicodeCharacter|=($currentUtf8Byte&0x3f);
+			$UnicodeUnpacked[$UnicodeIndex] = $currentUnicodeCharacter;
+			}
+
+		elsif (($currentUtf8Byte&0xf0)==0xe0)
+			{
+			my $currentUnicodeCharacter=(($currentUtf8Byte&0x0f)<<12);
+			++$Utf8Index;
+			$currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
+			if (($currentUtf8Byte&0xc0)!=0x80)
+				{
+				return $KErrorIllFormedInput;
+				}
+			$currentUnicodeCharacter|=(($currentUtf8Byte&0x3f)<<6);
+			++$Utf8Index;
+			$currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
+			if (($currentUtf8Byte&0xc0)!=0x80)
+				{
+				return $KErrorIllFormedInput;
+				}
+			$currentUnicodeCharacter|=($currentUtf8Byte&0x3f);
+			$UnicodeUnpacked[$UnicodeIndex] = $currentUnicodeCharacter;
+			}
+
+		elsif (($currentUtf8Byte&0xf8)==0xf0)
+			{                                         
+			my $currentUnicodeCharacter=(($currentUtf8Byte&0x07)<<8);
+			++$Utf8Index;
+			$currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
+			if (($currentUtf8Byte&0xc0)!=0x80)
+				{
+				return $KErrorIllFormedInput;
+				}
+			$currentUnicodeCharacter|=(($currentUtf8Byte&0x3f)<<2);
+			if ($currentUnicodeCharacter<0x0040)
+				{
+				return $KErrorIllFormedInput;
+				}
+			$currentUnicodeCharacter-=0x0040;
+			if ($currentUnicodeCharacter>=0x0400)
+				{
+				return $KErrorIllFormedInput;
+				}
+			++$Utf8Index;
+			$currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
+			if (($currentUtf8Byte&0xc0)!=0x80)
+				{
+				return $KErrorIllFormedInput;
+				}
+			$currentUnicodeCharacter|=(($currentUtf8Byte&0x30)>>4);
+			$UnicodeUnpacked[$UnicodeIndex] = (0xd800|$currentUnicodeCharacter);
+			$currentUnicodeCharacter=(($currentUtf8Byte&0x0f)<<6);
+			++$Utf8Index;
+			$currentUtf8Byte=$Utf8Unpacked[$Utf8Index];
+			if (($currentUtf8Byte&0xc0)!=0x80)
+				{
+				return $KErrorIllFormedInput;
+				}
+			$currentUnicodeCharacter|=($currentUtf8Byte&0x3f);
+			++$UnicodeIndex;
+			$UnicodeUnpacked[$UnicodeIndex] = (0xdc00|$currentUnicodeCharacter);
+			}
+		else
+			{
+			return $KErrorIllFormedInput;
+			}
+		++$UnicodeIndex;
+		++$Utf8Index;
+		}
+	$$Unicode = (); 
+	$$Unicode = pack "$UnicodeTemplate*", @UnicodeUnpacked;
+	return $UnicodeIndex;  
+	}
+
+sub UnicodeToUtf8
+	{
+	my $Utf8 = shift; 
+	my $Unicode = shift;
+	my $UnicodeTemplate = shift;
+	my $Utf8Index = 0;
+	my $UnicodeIndex = 0;
+	my $numOfBytes = length($Unicode);
+	my @UnicodeUnpacked = unpack "$UnicodeTemplate*", $Unicode;
+	my @Utf8Unpacked = ();
+	
+	for (;;)
+		{
+		# exit the loop if no more in the UnicodeUnpacked
+		if ($UnicodeIndex > $#UnicodeUnpacked)
+			{
+			last;
+			}
+
+		my $currentUnicodeCharacter=$UnicodeUnpacked[$UnicodeIndex];
+		if (($currentUnicodeCharacter&0xff80)==0x0000)
+			{	
+			$Utf8Unpacked[$Utf8Index]= $currentUnicodeCharacter;
+			}
+		elsif (($currentUnicodeCharacter&0xf800)==0x0000)
+			{
+
+			$Utf8Unpacked[$Utf8Index]= (0xc0 | $currentUnicodeCharacter >> 6);
+			++$Utf8Index;
+			$Utf8Unpacked[$Utf8Index]= (0x80 | $currentUnicodeCharacter&0x3f);
+			}
+		elsif (($currentUnicodeCharacter&0xfc00)==0xd800)
+			{
+			$currentUnicodeCharacter+=0x0040;
+			$Utf8Unpacked[$Utf8Index]= (0xf0|(($currentUnicodeCharacter>>8)&0x07));
+			++$Utf8Index;
+			$Utf8Unpacked[$Utf8Index]= (0x80|(($currentUnicodeCharacter>>2)&0x3f));
+			my $currentUtf8Byte=(0x80|(($currentUnicodeCharacter&0x03)<<4));
+			++$UnicodeIndex;
+			$currentUnicodeCharacter=$UnicodeUnpacked[$UnicodeIndex];
+			if (($currentUnicodeCharacter&0xfc00)!=0xdc00)
+				{
+				return $KErrorIllFormedInput;
+				}
+			$currentUtf8Byte|=(($currentUnicodeCharacter>>6)&0x0f);
+			++$Utf8Index;
+			$Utf8Unpacked[$Utf8Index]= $currentUtf8Byte;
+			++$Utf8Index;
+			$Utf8Unpacked[$Utf8Index]= (0x80| ($currentUnicodeCharacter&0x3f));
+			}
+		else
+			{
+			$Utf8Unpacked[$Utf8Index]= (0xe0|($currentUnicodeCharacter>>12));
+			++$Utf8Index;
+			$Utf8Unpacked[$Utf8Index]= (0x80|(($currentUnicodeCharacter>>6)&0x3f));
+			++$Utf8Index;
+			$Utf8Unpacked[$Utf8Index]= (0x80| ($currentUnicodeCharacter&0x3f));
+			}
+		++$Utf8Index;
+		++$UnicodeIndex;
+		}
+	$$Utf8 = ();	
+	$$Utf8 = pack "C*", @Utf8Unpacked;
+	return $Utf8Index; 
+
+	}