MCL/sf/os/kernelhwsrv: kernel/eka/euser/unicode/perl/UnicodeCompositionEx.pl@2f92ad2dc5db (annotated)

0 a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	1	#
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	2	# Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	3	# All rights reserved.
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	4	# This component and the accompanying materials are made available
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	5	# under the terms of the License "Eclipse Public License v1.0"
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	6	# which accompanies this distribution, and is available
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	7	# at the URL "http://www.eclipse.org/legal/epl-v10.html".
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	8	#
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	9	# Initial Contributors:
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	10	# Nokia Corporation - initial contribution.
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	11	#
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	12	# Contributors:
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	13	#
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	14	# Description:
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	15	#
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	16	# UnicodeCompositionEx
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	17	# adds composition exclusion information to unicode data
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	18	#
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	19	# Added as a new field:
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	20	# Symbian:<excluded-from-composition>
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	21	# where <excluded-from-composition> is E or null.
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	22	#
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	23	# Usage:
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	24	# perl -w UnicodeAddComposeEx.pl CompositionExclusions.txt < <Unicode-data-file>
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	25
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	26	use strict;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	27
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	28	if (scalar(@ARGV) != 1)
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	29	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	30	print (STDERR "Usage:\nperl -w UnicodeAddComposeEx.pl CompositionExclusions.txt < <Unicode-data-file>\n");
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	31	exit 1;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	32	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	33
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	34	open(EXCLUSIONS, $ARGV[0]) or die("Could not open file $ARGV[0]\n");
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	35
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	36	my $lineNo = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	37	my %Excluded = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	38	while (<EXCLUSIONS>)
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	39	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	40	$lineNo++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	41	# try to parse the line if there is some non-whitespace before the comment
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	42	if (!/^[ \t]([#].)?$/)
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	43	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	44	/^[ \t]([0-9A-Fa-f]{4,6})[ \t]([#].*)?$/ or die("Did not understand line $lineNo of $ARGV[0]");
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	45	my $code = hex($1);
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	46	die ("Value $code outside Unicode range at line $lineNo of $ARGV[0]")
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	47	unless ($code < 0x110000);
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	48	$Excluded{$code} = 1;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	49	#printf("Excluding %X because it is in the exclusion list\n", $code);
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	50	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	51	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	52
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	53	close EXCLUSIONS;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	54	# This is a two-pass operation, so we must store the lines ready for output later.
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	55	my @DataFileLines = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	56	my %DataFileLineCodes = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	57	# The first pass will collect all the relevant data:
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	58	# The first character of the decomposition if there is more than one
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	59	my %FirstOfDecompositionString = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	60	# The singleton decomposition if it is a singleton
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	61	my %SingletonDecomposition = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	62	# The decompositions tag, if any
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	63	my %DecompTag = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	64	# The combining class
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	65	my %CombiningClass = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	66	# We will also be marking all singleton decompositions for exclusion
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	67	$lineNo = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	68	while (my $line = <STDIN>)
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	69	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	70	chomp $line;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	71	$DataFileLines[$lineNo] = $line;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	72	$lineNo++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	73	# Split into fields: make sure trailing null strings are not
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	74	# deleted by adding a dummy final field
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	75	my @attribute = split(/;/, $line.';dummy');
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	76	# Delete the dummy field
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	77	pop @attribute;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	78
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	79	if (scalar(@attribute) == 15)
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	80	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	81	my $code = $attribute[0];
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	82	die("First attribute '$code' not a valid Unicode codepoint at line $lineNo")
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	83	unless $code =~ /^1?[0-9a-fA-F]{4,5}$/;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	84	$code = hex($code);
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	85	my $combiningClass = $attribute[3];
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	86	die("Fourth attribute '$combiningClass' is not a valid Unicode combining class at line $lineNo")
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	87	unless (0 <= $combiningClass && $combiningClass < 256);
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	88	my $decompositionString = $attribute[5];
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	89	die ("Sixth attribute '$decompositionString' is not a valid decomposition string at line $lineNo")
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	90	unless ($decompositionString =~ /^(<.>)?[0-9a-fA-F \t]$/);
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	91	my @decomposition = split(/[ \t]+/, $decompositionString);
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	92	if (@decomposition && $decomposition[0] =~ /^<.*>$/)
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	93	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	94	$DecompTag{$code} = shift @decomposition;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	95	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	96	if (scalar(@decomposition) == 1)
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	97	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	98	# We want to exclude codes such as these, with a singleton
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	99	# decomposition mapping, but at the moment we don't know if the
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	100	# character mapped to has a decomposition mapping, so we will
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	101	# defer this to another stage.
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	102	die("Decomposition $decomposition[0] not understood at line $lineNo")
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	103	unless ($decomposition[0] =~ /^[0-9A-Fa-f]+$/);
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	104	$SingletonDecomposition{$code} = hex($decomposition[0]);
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	105	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	106	elsif (1 < scalar(@decomposition))
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	107	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	108	die("Decomposition $decomposition[0] not understood at line $lineNo")
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	109	unless ($decomposition[0] =~ /^[0-9A-Fa-f]+$/);
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	110	$FirstOfDecompositionString{$code} = hex($decomposition[0]);
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	111	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	112	$CombiningClass{$code} = $combiningClass;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	113	$DataFileLineCodes{$lineNo-1} = $code;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	114	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	115	elsif ($line !~ /^[ \t]*$/)
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	116	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	117	die 'Do not understand line '.$lineNo;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	118	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	119	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	120
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	121	# Each code that has a decomposition string longer than one character
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	122	# where the first character has non-zero combining class is excluded
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	123	foreach my $code (keys %FirstOfDecompositionString)
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	124	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	125	my $decomp = $FirstOfDecompositionString{$code};
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	126	if (exists($CombiningClass{$decomp}))
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	127	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	128	if ($CombiningClass{$decomp} != 0)
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	129	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	130	$Excluded{$code} = 1;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	131	#printf("Excluding %X because its decomposition starts with a non-starter(%X)\n", $code, $decomp);
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	132	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	133	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	134	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	135
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	136	# Each code that has a singleton decomposition string may be excluded if
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	137	# that code has only a singleton mapping itself.
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	138	foreach my $code (sort (keys %SingletonDecomposition))
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	139	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	140	my $mapsTo = $code;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	141	while (exists $SingletonDecomposition{$mapsTo} && !exists $DecompTag{$code})
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	142	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	143	$mapsTo = $SingletonDecomposition{$mapsTo};
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	144	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	145	if (!exists $FirstOfDecompositionString{$mapsTo})
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	146	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	147	#printf("Excluding %X because its decomposition is a singleton(%X)\n", $code, $mapsTo);
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	148	$Excluded{$code} = 1;
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	149	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	150	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	151
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	152	# Now we output the file with the extra filed appended to each line
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	153	for(my $i = 0; $i != scalar(@DataFileLines); $i++)
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	154	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	155	print $DataFileLines[$i];
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	156	if (exists($DataFileLineCodes{$i}))
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	157	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	158	print ';Symbian:';
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	159	if (exists($Excluded{ $DataFileLineCodes{$i} }))
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	160	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	161	print 'E';
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	162	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	163	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	164	print "\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL John Imhofe parents: diff changeset	165	}

author	Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
	Wed, 31 Mar 2010 23:38:45 +0300
branch	RCL_3
changeset 22	2f92ad2dc5db
parent 0	a41df078684a
permissions	-rw-r--r--