kernel/eka/euser/unicode/perl/UnicodeMaxDecompose.pl
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Tue, 26 Jan 2010 13:13:38 +0200
changeset 14 5d2844f35677
parent 0 a41df078684a
permissions -rw-r--r--
Revision: 201004 Kit: 201004
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     1
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     2
# Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     3
# All rights reserved.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     4
# This component and the accompanying materials are made available
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     5
# under the terms of the License "Eclipse Public License v1.0"
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     6
# which accompanies this distribution, and is available
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     7
# at the URL "http://www.eclipse.org/legal/epl-v10.html".
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     8
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     9
# Initial Contributors:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    10
# Nokia Corporation - initial contribution.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    11
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    12
# Contributors:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    13
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    14
# Description:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    15
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    16
# UnicodeMaxDecompose.pl
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    17
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    18
# Adds maximal decompositions of the character and maximal decompositions of
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    19
# its folded varient to the Unicode data.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    20
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    21
# Added as the fourth field after the 'Symbain:' marker in the following format:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    22
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    23
# Symbian:<grapheme-role>;<excluded>;<folded>;<max-decomposition>;<folded-decomposition>
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    24
# where each of <max-decomposition> and <folded-decomposition> are strings
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    25
# of hex numbers separated by spaces, representing the complete decomposition
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    26
# of the character and its folded equivalent respectively.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    27
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    28
# Usage:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    29
# perl -w UnicodeMaxDecompose.pl < <output-of-UnicodeAddFolded>
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    30
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    31
use strict;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    32
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    33
if (scalar(@ARGV) != 0)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    34
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    35
	print (STDERR "Usage:\nperl -w UnicodeMaxDecompose.pl < <output-of-UnicodeAddFolded>\n");
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    36
	exit 1;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    37
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    38
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    39
my %StatedDecomposition = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    40
my %CompleteDecomposition = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    41
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    42
sub Decompose
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    43
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    44
	my ($code) = @_;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    45
	return unless exists $StatedDecomposition{$code};
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    46
	my $stated = $StatedDecomposition{$code};
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    47
	delete $StatedDecomposition{$code};
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    48
	my @complete = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    49
	foreach my $hexelt ( split(' ', $stated) )
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    50
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    51
		if ($hexelt)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    52
			{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    53
			Decompose($hexelt);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    54
			if (exists $CompleteDecomposition{$hexelt})
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    55
				{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    56
				push @complete, $CompleteDecomposition{$hexelt};
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    57
				}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    58
			else
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    59
				{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    60
				push @complete, $hexelt;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    61
				}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    62
			}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    63
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    64
	$CompleteDecomposition{$code} = join(' ', @complete);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    65
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    66
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    67
my %Folded = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    68
my %LineToCode = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    69
my @RawLine = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    70
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    71
my $lineNo = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    72
while (my $line = <STDIN>)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    73
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    74
	chomp $line;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    75
	$lineNo++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    76
	# Split into fields: make sure trailing null strings are not
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    77
	# deleted by adding a dummy final field
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    78
	my @attribute = split(/;/, $line.';dummy');
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    79
	# Delete the dummy field
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    80
	pop @attribute;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    81
	die ("Line $lineNo is missing 'Symbian:' entries. Has UnicodeAddFolded been run?")
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    82
		if (scalar(@attribute) == 16);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    83
	if (scalar(@attribute) == 17)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    84
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    85
		die ("Line $lineNo is missing 'Symbian:' entries. Has UnicodeAddFolded been run?")
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    86
			if ($attribute[15] !~ /^[ \t]*symbian:/i);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    87
		my $code = $attribute[0];
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    88
		die("First attribute '$code' not a valid Unicode codepoint at line $lineNo")
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    89
			unless ($code =~ /^1?[0-9a-fA-F]{4,5}$/ && hex($code) < 0x110000);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    90
		my $decomposition = $attribute[5];
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    91
		die("Decomposition '$decomposition' at line $lineNo is not a valid Unicode decomposition.")
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    92
			unless $decomposition =~ /^[ \t]*(<.*>[ \t]*[0-9a-fA-F])?[0-9a-fA-F \t]*$/;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    93
		my $folded = $attribute[16];
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    94
		die ("'$folded' not a valid string of hex values at line $lineNo.")
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    95
			unless $folded =~ /[0-9a-fA-F \t]*/;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    96
		# Store all decompositions that  have no tag and at least one value
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    97
		if ($decomposition =~ /^[ \t]*[0-9a-fA-F]/)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    98
			{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    99
			$StatedDecomposition{$code} = $decomposition;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   100
			}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   101
		if ($folded =~ /[0-9a-fA-F]/)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   102
			{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   103
			$Folded{$code} = $folded;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   104
			}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   105
		$LineToCode{$lineNo-1} = $code;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   106
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   107
	elsif ($line !~ /^[ \t]*$/)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   108
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   109
		die 'Do not understand line '.$lineNo;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   110
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   111
	$RawLine[$lineNo-1] = $line;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   112
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   113
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   114
# Completely decompose all strings in the %StatedDecomposition
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   115
foreach my $code (keys %StatedDecomposition)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   116
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   117
	Decompose($code);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   118
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   119
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   120
# Now decompose all the folded versions
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   121
foreach my $code (keys %Folded)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   122
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   123
	my @result = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   124
	foreach my $hexelt (split(' ', $Folded{$code}))
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   125
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   126
		if (exists $CompleteDecomposition{$hexelt})
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   127
			{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   128
			push @result, split(' ', $CompleteDecomposition{$hexelt});
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   129
			}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   130
		else
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   131
			{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   132
			push @result, $hexelt;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   133
			}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   134
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   135
	$Folded{$code} = join(' ', @result);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   136
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   137
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   138
# Now output all the results
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   139
for (my $i = 0; $i != scalar(@RawLine); $i++)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   140
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   141
	print $RawLine[$i];
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   142
	if (exists $LineToCode{$i})
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   143
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   144
		my $code = $LineToCode{$i};
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   145
		print ';';
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   146
		my $decomp = '';
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   147
		$decomp = $CompleteDecomposition{$code}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   148
			if exists $CompleteDecomposition{$code};
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   149
		print $decomp.';';
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   150
		if (exists $Folded{$code})
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   151
			{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   152
			print $Folded{$code}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   153
			}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   154
		else
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   155
			{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   156
			# If there is no folded value, but there is a decomposition
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   157
			# sequence, the character must fold to the decomposition
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   158
			# sequence too.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   159
			print $decomp;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   160
			}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   161
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   162
	print "\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   163
	}