Trying to figure out how to implement my WINC like compatibility layer. Going the emulation way is probably not so smart. We should not use the kernel but rather hook native functions in the Exec calls.
# Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies).
# All rights reserved.
# This component and the accompanying materials are made available
# under the terms of the License "Eclipse Public License v1.0"
# which accompanies this distribution, and is available
# at the URL "http://www.eclipse.org/legal/epl-v10.html".
#
# Initial Contributors:
# Nokia Corporation - initial contribution.
#
# Contributors:
#
# Description:
# Case Equivalence
# Given the unicode data file, work out the case equivalence classes
# i.e. the equivalence classes for the transitive closure of ~ defined as
# follows:
# a~b if Uppercase(a) == b || Lowercase(a) == b || Titlecase(a) == b
# Usage: perl CaseEquivalence <UnicodeData.txt
#
#
use strict;
my @Name = ();
my @Upper = ();
my @Lower = ();
my @Title = ();
# $DecompositionValue[$code] is undefined if $code has no decomposition
# sequence, if it has a single value decomposition sequence, then this is it,
# if it has a longer sequence, the value is -1
my @DecompositionValue = ();
# 1 for each code that has a differently-cased version,
# 2 for each code that is a lower-case version of something else.
my %Codes = ();
my %CaseClass = ();
# Command-line options
my $OptionOutputTrie = 1;
my $OptionOutputForwardMapping = 0;
my $OptionOutputReverseMapping = 0;
my $OptionIgnoreOneToOneReverseMappings = 0;
my $OptionIncludeExtraMappings = 1;
foreach my $optionString (@ARGV)
{
if ($optionString =~ m![/-]o[tfrm]!)
{
$OptionOutputTrie = 0;
my $option = substr($optionString, 2, 1);
if ($option eq 'f')
{
$OptionOutputForwardMapping = 1;
}
elsif ($option eq 'r')
{
$OptionOutputReverseMapping = 1;
}
elsif ($option eq 'm')
{
$OptionOutputReverseMapping = 1;
$OptionIgnoreOneToOneReverseMappings = 1;
}
else
{
$OptionOutputTrie = 1;
}
}
elsif ($optionString =~ m![/-]s!)
{
$OptionIncludeExtraMappings = 0;
}
else
{
print STDERR "Usage: perl CaseEquivalence [-o<mapping>] [-s]\nusing standard input and output streams.\n";
print STDERR "<mapping> is one of:\nt: output C++ code giving a trie for folding case. Each trie level is 4 bits.\n";
print STDERR "f: Give a list of all codes that need mapping and what they map to.\n";
print STDERR "r: Give a list of all codes are mapped to and what maps to them.\n";
print STDERR "m: Give a list of all codes are mapped to by more than one code.\n";
print STDERR "\nOmitting the -s option adds the following case-equivalence:\nSpace = Non-breaking space\n";
exit;
}
}
# set a code as being part of a non-unitary case-equivalence class.
sub add
{
my ($addition) = @_;
if (!$Codes{$addition})
{
$Codes{$addition} = 1;
}
}
# make a code point to its final case varient
sub chaseDown
{
my ($codeVal) = @_;
my $class = $codeVal;
while ($CaseClass{$class})
{
$class = $CaseClass{$class};
}
$CaseClass{$codeVal} = $class unless $codeVal == $class;
return $class;
}
# link two codes together as being part of the same case-equivalence class
sub makeEquivalent
{
my ($left, $right) = @_;
if (!$left || !$right)
{
return;
}
$left = chaseDown($left);
$right = chaseDown($right);
if ($Codes{$left} < $Codes{$right})
{
$CaseClass{$left} = $right;
return;
}
if ($Codes{$right} < $Codes{$left})
{
$CaseClass{$right} = $left;
return;
}
if ($left < $right)
{
$CaseClass{$right} = $left;
return;
}
if ($right < $left)
{
$CaseClass{$left} = $right;
return;
}
# $left == $right.. do nothing
return;
}
# Link possibly unmentioned codes together. The first one is considered lower-case
sub addEquivalenceClass
{
my ($lower, @rest) = @_;
$Codes{$lower} = 2;
foreach my $one (@rest)
{
$Codes{$one} = 1;
makeEquivalent($lower, $one);
}
}
# Firstly we read in the data
while(<STDIN>)
{
my @line = split('#', $_, 1);
my @fields = split(/;/, $line[0]);
my @decomposition = split(' ', $fields[5]);
if (1 < scalar(@fields))
{
my $codeVal = hex($fields[0]);
# if the character has a non-compatibility decomposition sequence, record this fact.
if (0 < scalar(@decomposition))
{
my $decompositionType = "";
if ($decomposition[0] =~ m/<[a-zA-Z0-9]+>/)
{
$decompositionType = shift @decomposition;
}
if ($decompositionType !~ m/compat/i)
{
$DecompositionValue[$codeVal] = scalar(@decomposition) == 1? hex($decomposition[0]) : -1;
}
}
$Name[$codeVal] = $fields[1];
my $upperval = $fields[12];
my $lowerval = $fields[13];
my $titleval = $fields[14];
# strip whitespace from the end of the string
$titleval =~ s/\s+$//;
if ($upperval)
{
$upperval = hex($upperval);
$Upper[$codeVal] = $upperval;
add $codeVal;
add $upperval;
}
if ($titleval)
{
$titleval = hex($titleval);
$Title[$codeVal] = $titleval;
add $codeVal;
add $titleval;
}
if ($lowerval)
{
$lowerval = hex($lowerval);
$Lower[$codeVal] = $lowerval;
add $codeVal;
$Codes{$lowerval} = 2;
}
}
}
# Remove all codes that decompose to a sequence
foreach my $codeVal (keys(%Codes))
{
my $current = $DecompositionValue[$codeVal];
while ($current && 0 < $current)
{
$current = $DecompositionValue[$current];
}
if ($current && $current == -1)
{
delete $Codes{$codeVal};
}
}
# Next we form the equivalence classes.
if ($OptionIncludeExtraMappings)
{
# space = non-breaking space
addEquivalenceClass(0x20, 0xA0);
}
# We try to end up with everything being equivalent to a lower case letter
foreach my $codeVal (keys(%Codes))
{
makeEquivalent($codeVal, $Lower[$codeVal]);
makeEquivalent($codeVal, $Upper[$codeVal]);
makeEquivalent($codeVal, $Title[$codeVal]);
}
# Next we chase each pointer in CaseClass down to its final result
foreach my $codeVal (keys(%CaseClass))
{
chaseDown($codeVal);
}
# Now output the results in order, and collect the raw data
my @Offset = ();
my $oldCodeCount = 0;
foreach my $codeVal (sort {$a <=> $b} keys(%CaseClass))
{
my $class = $CaseClass{$codeVal};
my $offset = $class - $codeVal;
if ($OptionOutputForwardMapping)
{
printf "%x %d\t\t%s => %s\n", $codeVal, $offset, $Name[$codeVal], $Name[$class];
}
while ($oldCodeCount != $codeVal)
{
$Offset[$oldCodeCount] = 0;
$oldCodeCount++;
}
$oldCodeCount++;
$Offset[$codeVal] = $offset;
}
if ($OptionOutputReverseMapping)
{
my %ReverseMapping = ();
foreach my $codeVal (keys(%CaseClass))
{
my $mapsTo = $CaseClass{$codeVal};
if (!$ReverseMapping{$mapsTo})
{
$ReverseMapping{$mapsTo} = [$codeVal];
}
else
{
push (@{ $ReverseMapping{$mapsTo} }, $codeVal);
}
}
foreach my $mapVal (sort {$a <=> $b} keys(%ReverseMapping))
{
next if ($OptionIgnoreOneToOneReverseMappings && scalar(@{$ReverseMapping{$mapVal}}) == 1);
printf("%x: %s <=", $mapVal, $Name[$mapVal]);
my $firstTime = 1;
foreach my $val ( @{ $ReverseMapping{$mapVal} } )
{
if (!$firstTime)
{
print ',';
}
$firstTime = 0;
printf(" %s:%x", $Name[$val], $val);
}
print "\n";
}
}
# does the array 2 match array 1? Match the shorter array against the prefix of
# the other array
sub arraysMatch
{
my ($left, $right, $leftpos) = @_;
my $last = scalar(@$left) - $leftpos;
if (scalar(@$right) < $last)
{
$last = scalar(@$right);
}
my $pos = 0;
while ($pos < $last)
{
if ($$left[$pos + $leftpos] != $$right[$pos])
{
return 0;
}
$pos++;
}
return 1;
}
# find a match for array 2 in array 1, allowing values past the end of array 1
# to match anything in array 1
sub findMatch
{
my ($candidate, $term) = @_;
my $pos = 0;
while (!arraysMatch($candidate, $term, $pos))
{
$pos++;
}
return $pos;
}
# add the data in array 2 to array 1, returning the position they went in.
sub addArray
{
my ($candidate, $addition) = @_;
my $pos = findMatch($candidate, $addition);
# add any required on to the end of the candidate block
my $last = $pos + scalar(@$addition);
my $additionPos = scalar(@$candidate) - $pos;
while ($pos + $additionPos < $last)
{
$$candidate[$pos + $additionPos] = $$addition[$additionPos];
$additionPos++;
}
return $pos;
}
# create data block 1 and indices 2 from data 3 and block size 4
sub createTrieLevel
{
my ($data, $indices, $input, $blockSize) = @_;
my $block = 0;
while ($block * $blockSize < scalar(@$input))
{
my $start = $block * $blockSize;
my $end = $start + $blockSize;
my $currentBlockSize = $blockSize;
if (scalar(@$input) < $end)
{
$end = scalar(@$input);
$currentBlockSize = $end - $start;
}
my @currentBlock = @$input[$start..($end - 1)];
while ($currentBlockSize != $blockSize)
{
$currentBlock[$currentBlockSize] = 0;
$currentBlockSize++;
}
$$indices[$block] = addArray($data, \@currentBlock);
$block++;
}
}
sub OutputArray
{
my $index = 0;
my $firstTime = 1;
while ($index != scalar(@_))
{
if (!$firstTime)
{
if ($index % 8)
{
print ', ';
}
else
{
print ",\n\t";
}
}
else
{
print "\t";
$firstTime = 0;
}
print($_[$index]);
$index++;
}
print "\n";
}
if ($OptionOutputTrie)
{
my @Trie0 = ();
my @Index0 = ();
my @Trie1 = ();
my @Index1 = ();
my @Trie2 = ();
my @Index2 = ();
createTrieLevel(\@Trie0, \@Index0, \@Offset, 16);
createTrieLevel(\@Trie1, \@Index1, \@Index0, 16);
createTrieLevel(\@Trie2, \@Index2, \@Index1, 16);
print "// Use the bits from 12 up from your character to index CaseFoldTable0.\n";
print "// Use the result of this plus bits 8-11 to index CaseFoldTable1.\n";
print "// Use the result of this plus bits 4-7 to index CaseFoldTable2.\n";
print "// Use the result of this plus bits 0-3 to index CaseFoldTable3.\n";
print "// Add the result of this to your character to fold it.\n\n";
print "static const short CaseFoldTable3[] =\n\t{\n";
OutputArray(@Trie0);
print "\t};\n\nstatic const unsigned short CaseFoldTable2[] =\n\t{\n";
OutputArray(@Trie1);
print "\t};\n\nstatic const unsigned char CaseFoldTable1[] =\n\t{\n";
OutputArray(@Trie2);
print "\t};\n\nstatic const unsigned char CaseFoldTable0[] =\n\t{\n";
OutputArray(@Index2);
print "\t};\n";
}