kernel/eka/euser/unicode/perl/UnicodeAddFolded.pl
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Wed, 23 Dec 2009 11:43:31 +0000
changeset 4 56f325a607ea
parent 0 a41df078684a
permissions -rw-r--r--
Revision: 200951 Kit: 200951
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     1
# Copyright (c) 2002-2009 Nokia Corporation and/or its subsidiary(-ies).
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     2
# All rights reserved.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     3
# This component and the accompanying materials are made available
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     4
# under the terms of the License "Eclipse Public License v1.0"
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     5
# which accompanies this distribution, and is available
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     6
# at the URL "http://www.eclipse.org/legal/epl-v10.html".
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     7
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     8
# Initial Contributors:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     9
# Nokia Corporation - initial contribution.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    10
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    11
# Contributors:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    12
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    13
# Description:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    14
# Adds folding information to Unicode data
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    15
# Added as the third field after the 'Symbian:' marker in the following format:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    16
# Symbian:<grapheme-role>;<excluded-from-composition>;<folded-form>
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    17
# where <folded-form> is null or a sequence of hex unicode values
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    18
# separated by spaces representing the folded form of the character.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    19
# Usage:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    20
# perl -w UnicodeAddFolded.pl CaseFolding.txt < <output-of-UnicodeCompositionEx>
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    21
# 
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    22
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    23
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    24
use strict;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    25
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    26
if (scalar(@ARGV) != 1)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    27
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    28
	print (STDERR "Usage:\nperl -w UnicodeAddFolded.pl CaseFolding.txt < <output-of-UnicodeCompositionEx>\n");
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    29
	exit 1;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    30
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    31
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    32
open(FOLDING, $ARGV[0]) or die("Could not open file $ARGV[0]\n");
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    33
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    34
my %Fold = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    35
my %MappingLine = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    36
my $lineNo = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    37
while (<FOLDING>)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    38
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    39
	$lineNo++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    40
	my ($line, $comment) = split(/#/, $_, 2);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    41
	if ($line =~ /^[ \t]*(1?[0-9a-fA-F]{4,5});[ \t]*([LEICSFT]);[ \t]*([0-9a-fA-F][0-9a-fA-F \t]*);[ \t]*$/)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    42
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    43
		my $code = hex($1);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    44
		my $type = $2;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    45
		my $folded = $3;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    46
		# We'll deal with Turkic mappings with our own hack.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    47
		# F = Full mappings (fold is longer than one character)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    48
		# T = I = Turkic mapping
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    49
		if ($type !~ /[FTI]/ && $folded !~ /[ \t]/)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    50
			{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    51
			die ("$code has two mappings: lines $MappingLine{$code} and $lineNo.")
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    52
				if (exists $Fold{$code});
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    53
			$Fold{$code} = $folded;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    54
			$MappingLine{$code} = $lineNo;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    55
			}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    56
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    57
	elsif ($line !~ /^[ \t]*$/)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    58
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    59
		die ("Did not understand line $lineNo of $ARGV[0]");
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    60
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    61
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    62
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    63
close FOLDING;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    64
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    65
# Turkic hack:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    66
# Map dotted capital I and dotless small I to lower case i.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    67
# This makes all the 'i's fold the same, which isn't very nice for Turkic
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    68
# languages, but it at least gives us behaviour consistent across locales
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    69
# which does at least map dotted I, and i to the same value, as well
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    70
# as mapping I and dotless i to the same value, and mapping I and i
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    71
# to the same value.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    72
$Fold{0x49} = '0069';
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    73
$Fold{0x130} = '0069';
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    74
$Fold{0x131} = '0069';
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    75
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    76
$lineNo = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    77
while (my $line = <STDIN>)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    78
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    79
	chomp $line;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    80
	$lineNo++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    81
	# Split into fields: make sure trailing null strings are not
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    82
	# deleted by adding a dummy final field
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    83
	my @attribute = split(/;/, $line.';dummy');
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    84
	# Delete the dummy field
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    85
	pop @attribute;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    86
	die ("Line $lineNo is missing 'Symbian:' entries. Has UnicodeCompositionEx been run?")
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    87
		if (scalar(@attribute) == 15);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    88
	if (scalar(@attribute) == 16)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    89
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    90
		die ("Line $lineNo is missing 'Symbian:' entries. Has UnicodeCompositionEx been run?")
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    91
			if ($attribute[15] !~ /^[ \t]*symbian:/i);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    92
		my $code = $attribute[0];
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    93
		die("First attribute '$code' not a valid Unicode codepoint at line $lineNo")
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    94
			unless $code =~ /^1?[0-9a-fA-F]{4,5}$/;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    95
		$code = hex($code);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    96
		$attribute[16] = exists $Fold{$code}? $Fold{$code} : '';
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    97
		print join(';', @attribute);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    98
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    99
	elsif ($line !~ /^[ \t]*$/)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   100
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   101
		die 'Do not understand line '.$lineNo;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   102
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   103
	else
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   104
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   105
		print $line;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   106
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   107
	print "\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   108
	}