kernel/eka/euser/unicode/perl/FoldAndDecompTables.pl
author John Imhofe
Mon, 19 Oct 2009 15:55:17 +0100
changeset 0 a41df078684a
permissions -rw-r--r--
Convert Kernelhwsrv package from SFL to EPL kernel\eka\compsupp is subject to the ARM EABI LICENSE userlibandfileserver\fatfilenameconversionplugins\unicodeTables is subject to the Unicode license kernel\eka\kernel\zlib is subject to the zlib license
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     1
# Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     2
# All rights reserved.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     3
# This component and the accompanying materials are made available
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     4
# under the terms of the License "Eclipse Public License v1.0"
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     5
# which accompanies this distribution, and is available
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     6
# at the URL "http://www.eclipse.org/legal/epl-v10.html".
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     7
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     8
# Initial Contributors:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
     9
# Nokia Corporation - initial contribution.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    10
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    11
# Contributors:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    12
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    13
# Description:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    14
# Creates C++ code describing how to decompose, compose and fold each character.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    15
# Usage:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    16
# perl -w FoldAndDecompTables.pl < <output-from-UnicodeMaxDecompose>
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    17
# Tables we want to create:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    18
# A: Ordered list of non-excluded decompositions
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    19
# B: List of folded decompositions matching A
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    20
# C: List of decompositions not listed in A of length > 1
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    21
# D: List of folded decompositions matching C
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    22
# E: List of decompositions of length = 1 whose matching folded decompositions
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    23
# are of length > 1
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    24
# F: List of folded decompositions matching E
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    25
# G: List of decompositions of length = 1 with matching folded decompositions
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    26
# H: List of folded decompostions matching G
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    27
# I: List of folded decompositions that do not have matching decompositions
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    28
# J: List of decompositions (folding and otherwise) of length > 2
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    29
# K: Hash table mapping Unicode value to its folded decomposition value in the
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    30
# concatenated list B-D-F-H-I
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    31
# L: List of hash slots in K matching A (providing a mapping from non-excluded
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    32
# decompositions to Unicode value)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    33
# [all lengths are of UTF16 strings]
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    34
# 
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    35
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    36
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    37
use strict;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    38
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    39
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    40
# Hash table:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    41
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    42
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    43
# Size of hashing table = 1 to the power $LgHashTableSize
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    44
my $LgHashTableSize = 12;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    45
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    46
# Do not change these next two values!
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    47
my $HashTableSize = 1 << $LgHashTableSize;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    48
my $HashTableBitmaskCpp = sprintf('0x%x', $HashTableSize - 1);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    49
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    50
# Hashing function in Perl: Getting the initial search position
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    51
sub HashStart
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    52
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    53
	return $_[0] & ($HashTableSize - 1);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    54
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    55
# How far to step through each time
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    56
sub HashStep
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    57
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    58
	my ($code) = @_;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    59
	$code *= $code >> $LgHashTableSize;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    60
	return ($code * 2 + 1) & ($HashTableSize - 1);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    61
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    62
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    63
# Make sure input string is all hex numbers separated by single spaces with
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    64
# each hex number having 4 digits and decomposed into UTF16
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    65
sub Normalize
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    66
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    67
	my ($string) = @_;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    68
	if ($string =~ /^([0-9A-F]{4}( [0-9A-F]{4})*)?$/)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    69
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    70
		return $string;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    71
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    72
	my $norm = '';
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    73
	foreach my $elt (split(' ', $string))
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    74
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    75
		if ($elt)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    76
			{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    77
			die "'$elt' is not a hex number"
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    78
				unless $elt =~ /[0-9a-fA-F]+/;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    79
			$norm = $norm.' '
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    80
				unless $norm eq '';
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    81
			$elt = hex $elt;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    82
			if ($elt < 0x10000)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    83
				{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    84
				$norm = $norm.(sprintf('%04X', $elt));
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    85
				}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    86
			else
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    87
				{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    88
				# Add a surrogate pair
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    89
				$norm = $norm.(sprintf('%04X %04X',
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    90
					($elt / 0x400) + 0xD7C0, ($elt % 0x400) + 0xDC00));
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    91
				}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    92
			}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    93
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    94
	#print STDERR "'$string' normalized to '$norm'\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    95
	return $norm;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    96
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    97
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    98
# First stage:
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
    99
# Hash of Unicode values to normalised decomposition and folded strings
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   100
my %Decomp = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   101
my %Folded = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   102
# Mapping from decomposition->char, if not excluded
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   103
my %Composition = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   104
# characters with non-excluded decompositions
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   105
my @IncludedDecomps = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   106
# characters with long (>1 UTF16) excluded decompositions
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   107
my @LongExcludedDecomps = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   108
# characters with singleton decompositions but long folds
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   109
my @ShortDecompsLongFolds = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   110
# characters with singleton folds and singleton
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   111
my @ShortDecompsShortFolds = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   112
# characters with singleton folds but no decomps
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   113
my @ShortFoldsOnly = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   114
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   115
# A mapping from decompositions of length greater than two
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   116
# to the code that produced them.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   117
my %VeryLongDecompositions = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   118
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   119
# A list of characters containing all decompositions of length >2 as slices
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   120
my @VeryLongDecompData = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   121
# Mapping from decomposition->index into VeryLongDecompData
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   122
my %VeryLongDecompMap = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   123
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   124
# There will be a hash table mapping Unicode values to indices into the other
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   125
# tables. %Index maps the same thing in Perl.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   126
my %Index = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   127
# %HashTableEntryContents maps the table entries to the Unicode values they
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   128
# contain.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   129
my %HashTableEntryContents = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   130
# %HashTableEntry maps Unicode value to the entry in the hash table
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   131
my %HashTableEntry = ();
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   132
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   133
# Bind a unicode value to an index into the tables
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   134
sub AddHashValue
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   135
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   136
	my ($unicode, $index) = @_;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   137
	$Index{$unicode} = $index;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   138
	my $pos = HashStart($unicode);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   139
	my $step = HashStep($unicode);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   140
	while (exists $HashTableEntryContents{$pos})
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   141
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   142
		$pos += $step;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   143
		if ($HashTableSize <= $pos)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   144
			{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   145
			$pos %= $HashTableSize;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   146
			}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   147
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   148
	$HashTableEntryContents{$pos} = $unicode;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   149
	$HashTableEntry{$unicode} = $pos;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   150
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   151
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   152
# Bind a whole array to the indices starting from that given as the first
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   153
# argument. Returns the index of the next slot to be filled.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   154
sub AddListToHash
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   155
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   156
	my ($index, @unicodes) = @_;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   157
	while (@unicodes)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   158
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   159
		AddHashValue(shift @unicodes, $index);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   160
		$index++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   161
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   162
	return $index;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   163
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   164
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   165
# put the results of a read line into the data structures
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   166
sub AddCode
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   167
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   168
	my ($code, $excluded, $decomposition, $folded) = @_;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   169
	return if ($decomposition eq '' && $folded eq '');
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   170
	$Decomp{$code} = $decomposition;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   171
	$Folded{$code} = $folded;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   172
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   173
	if (!$excluded && $decomposition ne '')
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   174
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   175
		push @IncludedDecomps, $code;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   176
		$Composition{$decomposition} = $code;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   177
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   178
	elsif (4 < length $decomposition)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   179
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   180
		push @LongExcludedDecomps, $code;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   181
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   182
	elsif (4 < length $folded)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   183
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   184
		push @ShortDecompsLongFolds, $code;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   185
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   186
	elsif ($decomposition ne '')
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   187
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   188
		push @ShortDecompsShortFolds, $code;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   189
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   190
	elsif ($folded ne '')
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   191
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   192
		push @ShortFoldsOnly, $code;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   193
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   194
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   195
	$VeryLongDecompositions{$decomposition} = $code
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   196
		if (9 < length $decomposition);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   197
	$VeryLongDecompositions{$folded} = $code
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   198
		if (9 < length $folded);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   199
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   200
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   201
if (scalar(@ARGV) != 0)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   202
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   203
	print (STDERR "Usage:\nperl -w FoldAndDecompTables.pl < <output-from-UnicodeMaxDecompose>\n");
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   204
	exit 1;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   205
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   206
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   207
my $lineNo = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   208
my $inBlock = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   209
while(<STDIN>)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   210
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   211
	$lineNo++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   212
	if (/^(1?[0-9a-fA-F]{4,5});([^;]*);.*symbian:(E?);[^;]*;([0-9a-fA-F \t]*);([0-9a-fA-F \t]*)[ \t]*$/i)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   213
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   214
		my $code = hex $1;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   215
		my $description = $2;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   216
		my $excluded = $3;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   217
		my $decomposition = Normalize($4);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   218
		my $folded = Normalize($5);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   219
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   220
		die ("Value $1 too large to be Unicode at line $lineNo.")
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   221
			if (0x110000 <= $code);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   222
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   223
		die("Normalisation failed with '$decomposition' at line $lineNo.")
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   224
			unless (length $decomposition) == 0 || (length $decomposition) % 5 == 4;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   225
		die("Normalisation failed with '$folded' at line $lineNo.")
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   226
			unless (length $folded) == 0 || (length $folded) % 5 == 4;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   227
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   228
		AddCode($code, $excluded, $decomposition, $folded);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   229
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   230
		if ($description =~ /^<.*Last>$/i)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   231
			{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   232
			die("End of block without start at line $lineNo!")
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   233
				if !$inBlock;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   234
			while ($inBlock <= $code)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   235
				{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   236
				AddCode($inBlock, $excluded, $decomposition, $folded);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   237
				$inBlock++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   238
				}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   239
			$inBlock = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   240
			}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   241
		elsif ($description =~ /^<.*First>$/i)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   242
			{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   243
			die("Block within block at line $lineNo!")
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   244
				if $inBlock;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   245
			$inBlock = $code + 1;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   246
			}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   247
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   248
	elsif (!/^[ \t]*$/)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   249
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   250
		die("Did not understand line $lineNo.");
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   251
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   252
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   253
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   254
# We need to construct the data for the table of decompositions of length > 2.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   255
foreach my $decomp (sort {length $::b <=> length $::a} keys %VeryLongDecompositions)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   256
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   257
	if (!exists $VeryLongDecompMap{$decomp})
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   258
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   259
		# Does not already exist
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   260
		my $newPos = scalar @VeryLongDecompData;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   261
		$VeryLongDecompMap{$decomp} = $newPos;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   262
		foreach my $code (split(' ', $decomp))
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   263
			{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   264
			push @VeryLongDecompData, $code;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   265
			}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   266
		while ($decomp =~ /^([0-9A-F]{4}( [0-9A-F]{4}){2,}) [0-9A-F]{4}$/)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   267
			{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   268
			$decomp = $1;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   269
			$VeryLongDecompMap{$decomp} = $newPos;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   270
			}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   271
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   272
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   273
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   274
# We need to sort the codes for included decompositions into lexicographic
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   275
# order of their decompositions.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   276
# This, luckily, is the same as sorting the strings that represent their
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   277
# decompositions in hex lexicographically.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   278
@IncludedDecomps = sort {$Decomp{$::a} cmp $Decomp{$::b}} @IncludedDecomps;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   279
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   280
print (STDERR 'Included: ', scalar(@IncludedDecomps), "\nLong: ", scalar(@LongExcludedDecomps));
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   281
print(STDERR "\nLongFolds: ", scalar(@ShortDecompsLongFolds), "\nShort: ", scalar(@ShortDecompsShortFolds));
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   282
print(STDERR "\nShortFoldsOnly: ", scalar(@ShortFoldsOnly), "\nTOTAL: ");
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   283
print STDERR (scalar(@IncludedDecomps) + scalar(@LongExcludedDecomps) + scalar(@ShortDecompsLongFolds) + scalar(@ShortDecompsShortFolds) + scalar(@ShortFoldsOnly));
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   284
print STDERR "\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   285
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   286
# Analyse the hash table to find out the maximum and average time
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   287
# taken to find each ASCII character
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   288
my $maxAsciiTime = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   289
my $totalAsciiTime = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   290
my $mostDifficultCode = undef;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   291
my $asciiFoundWithoutStepCount = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   292
for (32..126)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   293
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   294
	my $code = $_;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   295
	my $pos = HashStart($code);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   296
	my $step = HashStep($code);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   297
	my $stepCount = 1;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   298
	if ($HashTableEntry{$code})
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   299
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   300
		my $posRequired = $HashTableEntry{$code};
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   301
		while ($pos != $posRequired)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   302
			{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   303
			$pos = ($pos + $step) % $HashTableSize;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   304
			$stepCount++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   305
			}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   306
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   307
	$totalAsciiTime += $stepCount;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   308
	if ($maxAsciiTime < $stepCount)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   309
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   310
		$maxAsciiTime = $stepCount;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   311
		$mostDifficultCode = $code;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   312
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   313
	if ($stepCount == 1)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   314
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   315
		$asciiFoundWithoutStepCount++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   316
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   317
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   318
printf (STDERR "Average ASCII search: %f\n", $totalAsciiTime / 95);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   319
printf (STDERR "Maximum ASCII search %d for %x: '%c'.\n", $maxAsciiTime, $mostDifficultCode, $mostDifficultCode);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   320
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   321
# Now we populate the hash table
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   322
my $index = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   323
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   324
$index = AddListToHash($index, @IncludedDecomps);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   325
my $hashIndexAfterIncludedDecomps = $index;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   326
printf (STDERR "after IncludedDecomps index= %d\n", $hashIndexAfterIncludedDecomps);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   327
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   328
$index = AddListToHash($index, @LongExcludedDecomps);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   329
my $hashIndexAfterLongExcludeDecomps = $index;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   330
printf (STDERR "after LongExcludedDecomps index= %d\n", $hashIndexAfterLongExcludeDecomps);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   331
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   332
$index = AddListToHash($index, @ShortDecompsLongFolds);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   333
my $hashIndexAfterShortDecompsLongFolds = $index;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   334
printf (STDERR "after ShortDecompsLongFolds index= %d\n", $hashIndexAfterShortDecompsLongFolds);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   335
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   336
$index = AddListToHash($index, @ShortDecompsShortFolds);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   337
my $hashIndexAfterShortDecompsShortFolds = $index;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   338
printf (STDERR "after ShortDecompsShortFolds index= %d\n", $hashIndexAfterShortDecompsShortFolds);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   339
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   340
$index = AddListToHash($index, @ShortFoldsOnly);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   341
my $hashIndexAfterShortFoldsOnly = $index;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   342
printf (STDERR "after ShortFoldsOnly index= %d\n", $hashIndexAfterShortFoldsOnly);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   343
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   344
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   345
# Output C++ File
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   346
#
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   347
my $totalBytes = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   348
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   349
print "// Copyright (c) 2001-2009 Nokia Corporation and/or its subsidiary(-ies).\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   350
print "// All rights reserved.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   351
print "// This component and the accompanying materials are made available\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   352
print "// under the terms of the License \"Eclipse Public License v1.0\"\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   353
print "// which accompanies this distribution, and is available\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   354
print "// at the URL \"http://www.eclipse.org/legal/epl-v10.html\".\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   355
print "//\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   356
print "// Initial Contributors:\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   357
print "// Nokia Corporation - initial contribution.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   358
print "//\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   359
print "// Contributors:\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   360
print "//\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   361
print "// Description:\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   362
print "//\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   363
print "// Fold and decomposition tables.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   364
print "//\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   365
print "// These tables are linked in the following way:\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   366
print "// KUnicodeToIndexHash is a hash table using double hashing for\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   367
print "// conflict resolution. The functions DecompositionHashStart and\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   368
print "// DecompositionHashStep give the start and step values for accessing\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   369
print "// the table. The first probe is at DecompositionHashStart and each\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   370
print "// subsequent probe is offset by DecompositionHashStep. Probes\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   371
print "// continue until either 0 is found (indicating that the Unicode value\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   372
print "// sought has no decompostion (i.e. decomposes to itself)) or a value\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   373
print "// is found that has the sought Unicode value in its lower 20 bits.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   374
print "//\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   375
print "// In this latter case, the upper 12 bits contain an index into\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   376
print "// one of the following tables, according to the following rules:\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   377
print "//\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   378
print "// In the case of folding:\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   379
print "// If the Index is less than the length of KNonSingletonFolds / 2,\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   380
print "// it is an index into KNonSingletonFolds. If the Index is\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   381
print "// greater than the length of KNonSingletonFolds / 2, then it is an\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   382
print "// index into KSingletonFolds.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   383
print "//\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   384
print "// In the case of decomposition:\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   385
print "// If the Index is less than the length of KNonSingletonDecompositions / 2,\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   386
print "// it is an index into KNonSingletonDecompositions. If the Index is\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   387
print "// greater than the length of KNonSingletonDecompositions / 2, then it is an\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   388
print "// index into KSingletonDecompositions.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   389
print "//\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   390
print "// In summary:\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   391
print "// Let Knsf be the length of KNonSingletonFolds / 2,\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   392
print "// let Knsd be the length of KNonSingletonDecompositions / 2,\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   393
print "// let Ksd be the length of KSingletonDecompositions and\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   394
print "// let Ksf be the length of KSingletonFolds.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   395
print "// Now if you want to fold a character and you have found\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   396
print "// its index 'i' from the KUnicodeToIndexHash, then;\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   397
print "// if (i < Knsf) then look up\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   398
print "//\t\tKNonSingletonFolds[i * 2] and KNonSingletonFolds[i * 2 + 1]\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   399
print "// else if (Knsf <= i < Knsf + Ksf) look up KSingletonFolds[i - Knsf]\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   400
print "// else there is no fold for this character.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   401
print "//\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   402
print "// Or if you want to decompose the same character, then;\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   403
print "// if (i < Knsd) then look up KNonSingletonDecompositions[i * 2]\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   404
print "//\t\tand KNonSingletonDecompositions[i * 2 + 1]\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   405
print "// else if (Knsd <= i < Knsd + Ksd) look up KSingletonDecompositions[i - Knsd]\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   406
print "// else there is no decomposition for this character.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   407
print "//\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   408
print "// Your index into KSingletonDecompositions or KSingletonFolds\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   409
print "// yields a single value which is the decomposition or fold.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   410
print "//\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   411
print "// The KNonSingletonFolds and KNonSingletonDecomposition\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   412
print "// tables are made up of pairs of values. Each pair is either a pair\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   413
print "// of Unicode values that constitute the fold or decomposition, or\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   414
print "// the first value is KLongD and the second has its top 4 bits as the\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   415
print "// length of the decomposition (or folded decomposition) minus 3,\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   416
print "// and its bottom 12 bits as the index into KLongDecompositions\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   417
print "// of where you can find this decomposition.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   418
print "//\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   419
print "// KLongDecompositions simply contains UTF-16 (Unicode) for\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   420
print "// all the decomposed and folded sequences longer than 4 bytes long.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   421
print "\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   422
print "// Hash table mapping unicode values to indices into the other tables\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   423
print "// in use = ".$hashIndexAfterShortFoldsOnly." entries\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   424
print "const unsigned long KUnicodeToIndexHash[$HashTableSize] =\n\t{\n\t";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   425
my @HashTableOutput;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   426
for (0..($HashTableSize - 1))
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   427
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   428
	my $v = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   429
	if (exists $HashTableEntryContents{$_})
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   430
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   431
		$v = $HashTableEntryContents{$_};
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   432
		die ('Did not expect a Unicode value > 0xFFFFF')
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   433
			if 0xFFFFF < $v;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   434
		$v |= ($Index{$v}) << 20;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   435
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   436
	push @HashTableOutput, sprintf('0x%08x', $v);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   437
	$totalBytes += 4;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   438
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   439
print (shift @HashTableOutput);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   440
my $valueCount = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   441
foreach my $v (@HashTableOutput)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   442
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   443
	print (((++$valueCount & 7) == 0)? ",\n\t" : ', ');
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   444
	print $v;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   445
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   446
print "\n\t};\n\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   447
print "// Hash table access functions\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   448
print "const int KDecompositionHashBitmask = $HashTableBitmaskCpp;\n\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   449
print "inline int DecompositionHashStart(long a)\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   450
print "\t{\n\treturn a & $HashTableBitmaskCpp;\n\t}\n\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   451
print "inline int DecompositionHashStep(long a)\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   452
print "\t{\n\ta *= a >> $LgHashTableSize;\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   453
print "\treturn ((a<<1) + 1) & $HashTableBitmaskCpp;\n\t}\n\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   454
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   455
print "// Table mapping KNonSingletonDecompositions to the hash table entry that\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   456
print "// indexes it\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   457
print "const unsigned short KCompositionMapping[] =\n\t{\n\t";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   458
for (0..(scalar(@IncludedDecomps - 1)))
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   459
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   460
	if ($_ != 0)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   461
		{print (($_ & 7) == 0? ",\n\t" : ', ')}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   462
	printf( '0x%04x', $HashTableEntry{$IncludedDecomps[$_]} );
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   463
	$totalBytes += 2;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   464
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   465
print "\n\t};\n\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   466
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   467
print "// Table containing all the decomposition and folding strings longer\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   468
print "// than 2 UTF16 characters\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   469
print "const unsigned short KLongDecompositions[] =\n\t{\n\t0x";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   470
for(0..(scalar(@VeryLongDecompData) - 1))
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   471
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   472
	if ($_ != 0)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   473
		{print (($_ & 7) == 0?",\n\t0x" : ', 0x')}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   474
	print $VeryLongDecompData[$_];
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   475
	$totalBytes += 2;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   476
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   477
print "\n\t};\n\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   478
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   479
print "// Table containing decompositions longer than one UTF16 character.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   480
print "// The top of the table contains all compositions, sorted lexicographically.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   481
print "// Any decompositions of length 2 are in the table as a pair of values,\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   482
print "// decompositions longer than that are represented by a KLongD followed by\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   483
print "// a value whose top four bits indicate the length of the decomposition minus\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   484
print "// three and whose bottom 12 bits indicate an index into the KLongDecompositions\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   485
print "// array where the decomposition starts.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   486
print "const long KLongD = 0;\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   487
print "// sizeof/2 = ".$hashIndexAfterLongExcludeDecomps."\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   488
print "const unsigned short KNonSingletonDecompositions[] =\n\t{\n\t";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   489
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   490
sub PrintNonsingletonDecompTableEntry
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   491
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   492
	my ($decomp) = @_;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   493
	if (length $decomp < 10)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   494
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   495
		if ($decomp =~ /([0-9A-F]{4}) ([0-9A-F]{4})/)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   496
			{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   497
			print '0x'.$1.', 0x'.$2;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   498
			}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   499
		else
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   500
			{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   501
			die("$decomp expected to be normalized and of length 1 or 2")
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   502
				if $decomp !~ /[0-9A-F]{4}/;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   503
			print '0x'.$decomp.', 0xFFFF';
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   504
			}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   505
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   506
	else
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   507
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   508
		printf ('KLongD, 0x%1X%03X', ((length $decomp) - 14)/5, $VeryLongDecompMap{$decomp});
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   509
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   510
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   511
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   512
{my $entryNo = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   513
foreach my $code (@IncludedDecomps)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   514
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   515
	if ($entryNo != 0)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   516
		{print (($entryNo & 3) == 0?",\n\t" : ', ')}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   517
	PrintNonsingletonDecompTableEntry($Decomp{$code});
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   518
	$entryNo++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   519
	$totalBytes += 4;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   520
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   521
foreach my $code (@LongExcludedDecomps)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   522
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   523
	print (($entryNo & 3) == 0?",\n\t" : ', ');
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   524
	PrintNonsingletonDecompTableEntry($Decomp{$code});
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   525
	$entryNo++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   526
	$totalBytes += 4;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   527
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   528
}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   529
print "\n\t};\n\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   530
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   531
print "// Table of folded decompositions which either have more than one UTF16, or\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   532
print "// their normal decompositions have more than one UTF16\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   533
print "// sizeof/2 = ".$hashIndexAfterShortDecompsLongFolds."\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   534
print "const unsigned short KNonSingletonFolds[] =\n\t{\n\t";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   535
{my $entryNo = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   536
foreach my $code (@IncludedDecomps)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   537
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   538
	if ($entryNo != 0)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   539
		{print (($entryNo & 3) == 0?",\n\t" : ', ')}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   540
	PrintNonsingletonDecompTableEntry($Folded{$code});
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   541
	$entryNo++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   542
	$totalBytes += 4;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   543
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   544
foreach my $code (@LongExcludedDecomps)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   545
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   546
	print (($entryNo & 3) == 0?",\n\t" : ', ');
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   547
	PrintNonsingletonDecompTableEntry($Folded{$code});
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   548
	$entryNo++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   549
	$totalBytes += 4;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   550
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   551
foreach my $code (@ShortDecompsLongFolds)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   552
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   553
	print (($entryNo & 3) == 0?",\n\t" : ', ');
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   554
	PrintNonsingletonDecompTableEntry($Folded{$code});
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   555
	$entryNo++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   556
	$totalBytes += 4;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   557
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   558
}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   559
print "\n\t};\n\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   560
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   561
print "// Table of singleton decompositions and characters with singleton folds\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   562
print "// Note for Unicode 5.0:\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   563
print "// Unicode 5.0 contains some non-BMP characters have non-BMP \"singleton\" folds.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   564
print "// As per the algorithm of this file, the non-BMP character should be stored in \n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   565
print "// this table. \"Unsigned short\" is not big enough to hold them. However, this \n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   566
print "// \"character\" information is not useful. So we just store 0xFFFF instead. \n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   567
print "// Please do check 0xFFFF when access this table. If meet 0xFFFF, that means \n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   568
print "// your character has no decomposition.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   569
print "// See the variable \"ShortDecompsLongFolds\" in FoldAndDecompTables.pl if you \n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   570
print "// want to know more.\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   571
print "// sizeof = ".($hashIndexAfterShortDecompsShortFolds-$hashIndexAfterLongExcludeDecomps)."\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   572
print "const unsigned short KSingletonDecompositions[] =\n\t{\n\t0x";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   573
{my $entryNo = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   574
foreach my $code (@ShortDecompsLongFolds)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   575
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   576
	if ($entryNo != 0)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   577
		{print (($entryNo & 7) == 0?",\n\t0x" : ', 0x')}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   578
	if (exists $Decomp{$code} && $Decomp{$code} ne '')
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   579
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   580
		print $Decomp{$code};
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   581
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   582
	else
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   583
		{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   584
		# Don't take these 0xFFFF as character.
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   585
		#printf ('%04X', $code);
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   586
		printf ("FFFF");
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   587
		}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   588
	$entryNo++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   589
	$totalBytes += 4;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   590
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   591
foreach my $code (@ShortDecompsShortFolds)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   592
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   593
	if ($entryNo != 0)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   594
		{print (($entryNo & 7) == 0?",\n\t0x" : ', 0x')}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   595
	print $Decomp{$code};
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   596
	$entryNo++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   597
	$totalBytes += 4;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   598
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   599
}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   600
print "\n\t};\n\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   601
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   602
print "// Table of singleton folds\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   603
print "// sizeof = ".($hashIndexAfterShortFoldsOnly-$hashIndexAfterShortDecompsLongFolds)."\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   604
print "const unsigned short KSingletonFolds[] =\n\t{\n\t0x";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   605
{my $entryNo = 0;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   606
foreach my $code (@ShortDecompsShortFolds)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   607
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   608
	if ($entryNo != 0)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   609
		{print (($entryNo & 7) == 0?",\n\t0x" : ', 0x')}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   610
	print $Folded{$code};
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   611
	$entryNo++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   612
	$totalBytes += 4;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   613
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   614
foreach my $code (@ShortFoldsOnly)
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   615
	{
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   616
	print (($entryNo & 7) == 0?",\n\t0x" : ', 0x');
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   617
	print $Folded{$code};
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   618
	$entryNo++;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   619
	$totalBytes += 4;
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   620
	}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   621
}
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   622
print "\n\t};\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   623
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   624
print "\n// Total size: $totalBytes bytes\n";
a41df078684a Convert Kernelhwsrv package from SFL to EPL
John Imhofe
parents:
diff changeset
   625
print STDERR $totalBytes, " bytes\n";