userlibandfileserver/fatfilenameconversionplugins/group/cp54936_4byte_tounicode.pl
author John Imhofe
Mon, 19 Oct 2009 15:55:17 +0100
changeset 0 a41df078684a
child 15 4122176ea935
permissions -rw-r--r--
Convert Kernelhwsrv package from SFL to EPL kernel\eka\compsupp is subject to the ARM EABI LICENSE userlibandfileserver\fatfilenameconversionplugins\unicodeTables is subject to the Unicode license kernel\eka\kernel\zlib is subject to the zlib license

# Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
# All rights reserved.
# This component and the accompanying materials are made available
# under the terms of the License "Eclipse Public License v1.0"
# which accompanies this distribution, and is available
# at the URL "http://www.eclipse.org/legal/epl-v10.html".
#
# Initial Contributors:
# Nokia Corporation - initial contribution.
#
# Contributors:
#
# Description:
# See line 111 of this file.
#

if (@ARGV != 1 && @ARGV != 2)
	{
	print <<EOD;
Usage: perl -w cp54936_4byte.pl cp54936_4byte.txt
EOD
	exit(1);
	}

# Removes the extenstion from the filename
$ARGV[0] =~ m/(.*)\..*/;
my $root = $1;
$root =~ m/.*[\\\/]([^\\\/]*)$/;
my $header_to_include = $1;

if (@ARGV == 2)
	{
	$ARGV[1] =~ m/(.*)\..*/;
	$root = $1;
	}

open (IN, "<$ARGV[0]") or die ("Error: $ARGV[0] $!");

my $lineNumber = 0;
my $acceptLineNumber = 0;
my %lines;		# hash table of all characters in format with key=foreign(string) and value=unicode(string)
while (!eof(IN))
	{
	my $line = <IN>;
	$lineNumber++;
	if ($line =~ /^(0[xX]8[1-4]3\d[\da-fA-F]{2}3\d)\s*(0[xX][\da-fA-F]{4}).*/)
		{
		# read a line like "0x81318133	0x060D"
		$acceptLineNumber++;
		my $foreign = $1;
		my $unicode = $2;
		$lines{$foreign} = $unicode;
		}
	else
		{
		#print "Ignore line: $line";
		}
	}
close IN;
print "Read $ARGV[0] done.\n";
print "$acceptLineNumber of $lineNumber lines accepted.\n";


# increase input cp54936 code by 1
# param is a string like "0x81308439"
# return a string like "0x81308530"
sub IncreaseCP54936Code
	{
	my ($increaseme) = @_;
	$increaseme =~ /0[xX]([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})/;
	($b1, $b2, $b3, $b4) = (hex($1), hex($2), hex($3), hex($4));
	$b4++;
	if ($b4 == 0x3A)
		{
		$b4 = 0x30;
		$b3++;
		if ($b3 == 0xFF)
			{
			$b3 = 0x81;
			$b2++;
			if ($b2 == 0x3A)
				{
				$b2 = 0x30;
				$b1++;
				}
			}
		}
	return sprintf("0x%02X%02X%02X%02X", $b1, $b2, $b3, $b4);
	}

# return the offset from 0x81308130 to input "0x8234A235"
sub OffsetOfCP54936Code
	{
	$_[0] =~ /0[xX]([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})/;
	($b1, $b2, $b3, $b4) = (hex($1), hex($2), hex($3), hex($4));
	return ($b1-0x81)*12600 + ($b2-0x30)*1260 + ($b3-0x81)*10 + ($b4-0x30);
	}

# return the last byte of input "0x8234A235"
sub Byte4OfCP54936Code
	{
	$_[0] =~ /0[xX]([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})([\da-fA-F]{2})/;
	return hex($4);
	}


print "Write to $root.cpp...\n";
open (CPP, ">$root.cpp") or die ("Error: $ARGV[0] Can't open cpp file");

print CPP <<EOD;
// Copyright (c) 2008-2009 Nokia Corporation and/or its subsidiary(-ies).
// All rights reserved.
// This component and the accompanying materials are made available
// under the terms of the License "Eclipse Public License v1.0"
// which accompanies this distribution, and is available
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
//
// Initial Contributors:
// Nokia Corporation - initial contribution.
//
// Contributors:
//
// Description:
//
// Auto-generated by the cp54936_4byte_tounicode.pl tool - Do not edit!!!
//

#include <e32std.h>
#include <e32def.h>
#include "cp54936.h"


// mapping table of: CP54936 4-byte in-BMP ---> Unicode
// To calculate index: index=(b1-144)*12600+(b2-48)*1260+(b3-129)*10+(b4-48), in which,
// b1,b2,b3,b4 is byte1,2,3,4 of CP54936 code.
// For example, CP54936 code 0x8232EA38, the index=(0x82-144)*12600+(0x32-48)*1260+(0xEA-129)*10+(0x38-48)=16178
// So we get the Unicode 0x42AB.
// Generated with: \"perl -w ..\\group\\cp54936_4byte_tounicode.pl cp54936_4byte.txt cp54936_4byte_tounicode.cpp\".

EOD

my $bytecount = 0;
my $expect = "0x81308130";
my $last = "0x8431A439";
my $totalCount = OffsetOfCP54936Code($last) + 1;


print CPP "const TUint16 KMappingTable4ByteBmp2Unicode[$totalCount] =\n\t{\n\t";

my $outIndex = 0;	# to wrap every 10 items
while (OffsetOfCP54936Code($expect) <= OffsetOfCP54936Code($last))
	{
	if (!exists($lines{$expect}))
		{
		print CPP "0xFFFD, ";
		}
	else
		{
		print CPP "$lines{$expect}, ";
		}
	$bytecount += 2;
	$outIndex++;
	if ($outIndex % 10 == 0)
		{
		print CPP "\t// $expect\n\t";
		}
	# to next foreign
	$expect = IncreaseCP54936Code($expect);
	}

print CPP "};\n";
print CPP "// total byte count = $bytecount\n";
print "\nTotal byte count: $bytecount.\n";
close CPP;
print "Done.\n";