diff -r 000000000000 -r 1fb32624e06b charconvfw/charconvplugins/tools/cnvtool.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/charconvfw/charconvplugins/tools/cnvtool.pl Tue Feb 02 02:02:46 2010 +0200 @@ -0,0 +1,1533 @@ +# +# Copyright (c) 1997-2009 Nokia Corporation and/or its subsidiary(-ies). +# All rights reserved. +# This component and the accompanying materials are made available +# under the terms of "Eclipse Public License v1.0" +# which accompanies this distribution, and is available +# at the URL "http://www.eclipse.org/legal/epl-v10.html". +# +# Initial Contributors: +# Nokia Corporation - initial contribution. +# +# Contributors: +# +# Description: +# + +use strict; +use integer; + +BEGIN + { + my $perlScriptPath=$0; + my $os = $^O; #get the OS type + #check OS type + if($os=~/MSWin32/) #Windows OS + { + $perlScriptPath=~s/\//\\/g; # replace any forward-slashes with back-slashes + $perlScriptPath=~s/(\\?)[^\\]+$/$1/; # get rid of this Perl-script's file-name + } + else #Unix OS + { + $perlScriptPath=~s/\\/\//g; # replace any back-slashes with forward-slashes + $perlScriptPath=~s/(\/?)[^\/]+$/$1/; # get rid of this Perl-script's file-name + } + unshift(@INC, $perlScriptPath); # can't do "use lib $perlScriptPath" here as "use lib" only seems to work with *hard-coded* directory names + } +use PARSER; +use WRITER; + +$|=1; # ensures that any progress information sent to the screen is displayed immediately and not buffered +if ((@ARGV==0) || ($ARGV[0]=~/\?/i) || ($ARGV[0]=~/-h/i) || ($ARGV[0]=~/help/i)) + { + die("\nVersion 021\n\nCharacter-set conversion-table generating tool\nCopyright (c) 2000-2009 Nokia Corporation and/or its subsidiary(-ies).\n\nUsage:\n\n\tcnvtool [options]\n\nwhere the following options are available (each has a short form and a long form which are shown below separated by a '|'):\n\n\t-s | -generateSourceCode\n\t-c | -columns(: , )\n\t-r | -omitReplacementForUnconvertibleUnicodeCharacters\n\t-p | -cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed\n\t-u | -sourceFilesToSubtract(, , ...)\n\n"); + } +my $generateSourceCode=0; +my @columns=(2, 1, 2); +my $omitReplacementForUnconvertibleUnicodeCharacters=0; +my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=0; +my @sourceFilesToSubtract=(); +my $flattenHashAndSave=0; # this flag is not published for use outside of the CHARCONV component +&extractCommandLineFlags(\$generateSourceCode, \@columns, \$omitReplacementForUnconvertibleUnicodeCharacters, \$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, \@sourceFilesToSubtract, \$flattenHashAndSave); +(!$omitReplacementForUnconvertibleUnicodeCharacters || $generateSourceCode) or die("Error: bad combination of flags\n"); +my $controlFile=shift; +my $sourceFile=shift; +my $outputFile=shift; +print("Generating $outputFile...\n"); +my $uid=0; +my $endiannessAsText=''; +my $endianness=0; +my $replacementForUnconvertibleUnicodeCharacters=''; +my @foreignVariableByteData=(); +my @foreignToUnicodeData=(); +my @unicodeToForeignData=(); +my %foreignCharacterCodes=(); +my %unicodeCharacterCodes=(); +my %preferredForeignCharacterCodesForConflictResolution=(); +my %preferredUnicodeCharacterCodesForConflictResolution=(); +my %additionalSubsetTables=(); +my %privateUseUnicodeCharacterSlotsUsed=(); + +print(" reading $controlFile...\n"); +open(CONTROL_FILE, "< $controlFile") or die("Error: could not open \"$controlFile\" for reading\n"); +&readHeaderFromControlFile(\*CONTROL_FILE, $controlFile, $generateSourceCode, \$uid, \$endiannessAsText, \$endianness, \$replacementForUnconvertibleUnicodeCharacters, $flattenHashAndSave); +&readForeignVariableByteDataFromControlFile(\*CONTROL_FILE, $controlFile, \@foreignVariableByteData); +&readOneDirectionDataFromControlFile(\*CONTROL_FILE, $controlFile, \@foreignToUnicodeData, \%preferredUnicodeCharacterCodesForConflictResolution, \%additionalSubsetTables, 1); +&readOneDirectionDataFromControlFile(\*CONTROL_FILE, $controlFile, \@unicodeToForeignData, \%preferredForeignCharacterCodesForConflictResolution, \%additionalSubsetTables, 0); +close(CONTROL_FILE) or die("Error: could not close \"$controlFile\"\n"); + +print(" reading $sourceFile...\n"); +open(SOURCE_FILE, "< $sourceFile") or die("Error: could not open \"$sourceFile\" for reading\n"); +&readSourceFile(\*SOURCE_FILE, $sourceFile, \%foreignCharacterCodes, \%unicodeCharacterCodes, \@columns, $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, \%privateUseUnicodeCharacterSlotsUsed, 0); +close(SOURCE_FILE) or die("Error: could not close \"$sourceFile\"\n"); + +my $sourceFileToSubtract; +foreach $sourceFileToSubtract (@sourceFilesToSubtract) + { + print(" subtracting from $sourceFileToSubtract...\n"); + open(SOURCE_FILE_TO_SUBTRACT, "< $sourceFileToSubtract") or die("Error: could not open \"$sourceFileToSubtract\" for reading\n"); + &readSourceFile(\*SOURCE_FILE_TO_SUBTRACT, $sourceFileToSubtract, \%foreignCharacterCodes, \%unicodeCharacterCodes, \@columns, $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, \%privateUseUnicodeCharacterSlotsUsed, 1); + close(SOURCE_FILE_TO_SUBTRACT) or die("Error: could not close \"$sourceFileToSubtract\"\n"); + } + +&warnIfAnyPrivateUseUnicodeCharacterSlotsBeingUsed(\%privateUseUnicodeCharacterSlotsUsed); +&resolveConflictsAndFlattenArraysToScalars(\%foreignCharacterCodes, \%preferredForeignCharacterCodesForConflictResolution, 'Unicode', 'foreign'); +&resolveConflictsAndFlattenArraysToScalars(\%unicodeCharacterCodes, \%preferredUnicodeCharacterCodesForConflictResolution, 'foreign', 'Unicode'); +&checkForeignVariableByteData($endianness, \@foreignVariableByteData, \@foreignToUnicodeData); + +print(" writing $outputFile...\n"); +open(OUTPUT_FILE, "> $outputFile") or die("Error: could not open \"$outputFile\" for writing\n"); +if ($generateSourceCode) + { + my @sourceCodeOfForeignToUnicodeIndexedTables16=(); + my @sourceCodeOfForeignToUnicodeKeyedTables1616=(); + my @sourceCodeOfForeignToUnicodeKeyedTables16OfIndexedTables16_indexedEntries=(); + my @sourceCodeOfForeignToUnicodeKeyedTables16OfIndexedTables16_keyedEntries=(); + + my @sourceCodeOfUnicodeToForeignIndexedTables16=(); + my @sourceCodeOfUnicodeToForeignKeyedTables1616=(); + my @sourceCodeOfUnicodeToForeignKeyedTables16OfIndexedTables16_indexedEntries=(); + my @sourceCodeOfUnicodeToForeignKeyedTables16OfIndexedTables16_keyedEntries=(); + + # new for 32 bit encoding begin + my @sourceCodeOfForeignToUnicodeIndexedTables32=(); + my @sourceCodeOfForeignToUnicodeKeyedTables3232=(); + my @sourceCodeOfForeignToUnicodeKeyedTables32OfIndexedTables32_indexedEntries=(); + my @sourceCodeOfForeignToUnicodeKeyedTables32OfIndexedTables32_keyedEntries=(); + + my @sourceCodeOfUnicodeToForeignIndexedTables32=(); + my @sourceCodeOfUnicodeToForeignKeyedTables3232=(); + my @sourceCodeOfUnicodeToForeignKeyedTables32OfIndexedTables32_indexedEntries=(); + my @sourceCodeOfUnicodeToForeignKeyedTables32OfIndexedTables32_keyedEntries=(); + # new for 32 bit endcoding end + + my @sourceCodeOfTopLevelStructures=(); + + &writeSourceCodeHeader(\*OUTPUT_FILE, $outputFile, $replacementForUnconvertibleUnicodeCharacters); + &writeSourceCodeForeignVariableByteData(\@sourceCodeOfTopLevelStructures, \@foreignVariableByteData); + &writeSourceCodeOneDirectionData(\@sourceCodeOfTopLevelStructures, + \@sourceCodeOfForeignToUnicodeIndexedTables16, \@sourceCodeOfForeignToUnicodeKeyedTables1616, \@sourceCodeOfForeignToUnicodeKeyedTables16OfIndexedTables16_indexedEntries, \@sourceCodeOfForeignToUnicodeKeyedTables16OfIndexedTables16_keyedEntries, + \@sourceCodeOfForeignToUnicodeIndexedTables32, \@sourceCodeOfForeignToUnicodeKeyedTables3232, \@sourceCodeOfForeignToUnicodeKeyedTables32OfIndexedTables32_indexedEntries, \@sourceCodeOfForeignToUnicodeKeyedTables32OfIndexedTables32_keyedEntries, + \@foreignToUnicodeData, \%unicodeCharacterCodes, 1); + &writeSourceCodeOneDirectionData(\@sourceCodeOfTopLevelStructures, + \@sourceCodeOfUnicodeToForeignIndexedTables16, \@sourceCodeOfUnicodeToForeignKeyedTables1616, \@sourceCodeOfUnicodeToForeignKeyedTables16OfIndexedTables16_indexedEntries, \@sourceCodeOfUnicodeToForeignKeyedTables16OfIndexedTables16_keyedEntries, + \@sourceCodeOfUnicodeToForeignIndexedTables32, \@sourceCodeOfUnicodeToForeignKeyedTables3232, \@sourceCodeOfUnicodeToForeignKeyedTables32OfIndexedTables32_indexedEntries, \@sourceCodeOfUnicodeToForeignKeyedTables32OfIndexedTables32_keyedEntries, + \@unicodeToForeignData, \%foreignCharacterCodes, 0); + &writeSourceCodeFinalStuff(\*OUTPUT_FILE, + \@sourceCodeOfForeignToUnicodeIndexedTables16, \@sourceCodeOfForeignToUnicodeKeyedTables1616, \@sourceCodeOfForeignToUnicodeKeyedTables16OfIndexedTables16_indexedEntries, \@sourceCodeOfForeignToUnicodeKeyedTables16OfIndexedTables16_keyedEntries, + \@sourceCodeOfUnicodeToForeignIndexedTables16, \@sourceCodeOfUnicodeToForeignKeyedTables1616, \@sourceCodeOfUnicodeToForeignKeyedTables16OfIndexedTables16_indexedEntries, \@sourceCodeOfUnicodeToForeignKeyedTables16OfIndexedTables16_keyedEntries, + \@sourceCodeOfForeignToUnicodeIndexedTables32, \@sourceCodeOfForeignToUnicodeKeyedTables3232, \@sourceCodeOfForeignToUnicodeKeyedTables32OfIndexedTables32_indexedEntries, \@sourceCodeOfForeignToUnicodeKeyedTables32OfIndexedTables32_keyedEntries, + \@sourceCodeOfUnicodeToForeignIndexedTables32, \@sourceCodeOfUnicodeToForeignKeyedTables3232, \@sourceCodeOfUnicodeToForeignKeyedTables32OfIndexedTables32_indexedEntries, \@sourceCodeOfUnicodeToForeignKeyedTables32OfIndexedTables32_keyedEntries, + \@sourceCodeOfTopLevelStructures, $endiannessAsText, \%additionalSubsetTables); + } +elsif ($flattenHashAndSave) + { + binmode OUTPUT_FILE; + #instead of calling the writeBinaryHeader, just write the data I need followed by + # writeBinaryForeignVariableByteData... + &write8(\*OUTPUT_FILE, $endianness); + &write8(\*OUTPUT_FILE, length($replacementForUnconvertibleUnicodeCharacters)); + &writeString(\*OUTPUT_FILE, $replacementForUnconvertibleUnicodeCharacters); + &writeBinaryForeignVariableByteData(\*OUTPUT_FILE, \@foreignVariableByteData); + #choose %unicodeCharacterCodes and write the data as keypair + my $key; + my $rangekey; + my $limit; + foreach $key (keys(%unicodeCharacterCodes)) + { + &write16(\*OUTPUT_FILE,$key); + &write16(\*OUTPUT_FILE,$unicodeCharacterCodes{$key}); + } + } +else + { + binmode OUTPUT_FILE; + &writeBinaryHeader(\*OUTPUT_FILE, $uid, $endianness, $replacementForUnconvertibleUnicodeCharacters); + &writeBinaryForeignVariableByteData(\*OUTPUT_FILE, \@foreignVariableByteData); + &writeBinaryOneDirectionData(\*OUTPUT_FILE, \@foreignToUnicodeData, \%unicodeCharacterCodes, 1); + &writeBinaryOneDirectionData(\*OUTPUT_FILE, \@unicodeToForeignData, \%foreignCharacterCodes, 0); + } +close(OUTPUT_FILE) or die("Error: could not close \"$outputFile\"\n"); +print("complete\n\n"); + +sub extractCommandLineFlags() + { + my $generateSourceCode=shift; + my $columns=shift; + my $omitReplacementForUnconvertibleUnicodeCharacters=shift; + my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=shift; + my $sourceFilesToSubtract=shift; + my $flattenHashAndSave=shift; + my $i; + for ($i=0; $i<=$#ARGV;) # (i) not cache-ing $#ARGV into a variable as @ARGV may change length in this loop (ii) iterate forwards as some parameters may occupy more than one element in @ARGV + { + if (($ARGV[$i]=~/^-s$/i) || ($ARGV[$i]=~/^-generateSourceCode$/i)) + { + if ($$flattenHashAndSave==1) + { + die ("Error: Cannot have -s and -b flags set at the same time"); + } + else + { + splice(@ARGV, $i, 1); + $$generateSourceCode=1; + } + } + elsif (($ARGV[$i]=~/^-c\b(.*)$/i) || ($ARGV[$i]=~/^-columns\b(.*)$/i)) + { + my $columnsData=$1; + splice(@ARGV, $i, 1); + for (;;) + { + if ($columnsData=~/^\s*\(\s*(\d+)\s*:\s*(\d+)\s*\,?\s*(\d+)\s*\)\s*$/) + { + @$columns=($1, $2, $3); + last; + } + ($#ARGV>=$i) or die("Error: bad \"-columns\" format\n"); + $columnsData.=(splice(@ARGV, $i, 1))[0]; + } + } + elsif (($ARGV[$i]=~/^-r$/i) || ($ARGV[$i]=~/^-omitReplacementForUnconvertibleUnicodeCharacters$/i)) + { + splice(@ARGV, $i, 1); + $$omitReplacementForUnconvertibleUnicodeCharacters=1; + } + elsif (($ARGV[$i]=~/^-p$/i) || ($ARGV[$i]=~/^-cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed$/i)) + { + splice(@ARGV, $i, 1); + $$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=1; + } + elsif (($ARGV[$i]=~/^-u\b(.*)$/i) || ($ARGV[$i]=~/^-sourceFilesToSubtract\b(.*)$/i)) + { + my $sourceFilesData=$1; + splice(@ARGV, $i, 1); + for (;;) + { + if ($sourceFilesData=~/^\s*\(\s*(.+)\)\s*$/) + { + my $sourceFilesData=$1; + @$sourceFilesToSubtract=split(/,/, $sourceFilesData, -1); + my $j; + for ($j=$#$sourceFilesToSubtract; $j>=0; --$j) + { + $sourceFilesToSubtract->[$j]=~s/^\s+//; + $sourceFilesToSubtract->[$j]=~s/\s+$//; + ($sourceFilesToSubtract->[$j] ne '') or die("Error: bad \"-sourceFilesToSubtract\" format (1)\n"); + } + last; + } + ($#ARGV>=$i) or die("Error: bad \"-sourceFilesToSubtract\" format (2)\n"); + $sourceFilesData.=(splice(@ARGV, $i, 1))[0]; + } + } + elsif (($ARGV[$i]=~/^-f$/i) || ($ARGV[$i]=~/^-flattenHashAndSave$/i)) + { + if ($$generateSourceCode==1) + { + die ("Error: Cannot have -s and -b flags set at the same time"); + } + else + { + splice(@ARGV, $i, 1); + $$flattenHashAndSave=1; + } + } + else + { + ++$i; + } + } + } + +sub algorithm + { + my $algorithmAsText=shift; + if ($algorithmAsText=~/^Direct$/i) + { + return 0; + } + elsif ($algorithmAsText=~/^Offset$/i) + { + return 1; + } + elsif ($algorithmAsText=~/^IndexedTable16$/i) + { + return 2; + } + elsif ($algorithmAsText=~/^KeyedTable1616$/i) + { + return 3; + } + elsif ($algorithmAsText=~/^KeyedTable16OfIndexedTables16$/i) + { + return 4; + } + elsif ($algorithmAsText=~/^IndexedTable32$/i) + { + return 5; + } + elsif ($algorithmAsText=~/^KeyedTable3232$/i) + { + return 6; + } + elsif ($algorithmAsText=~/^KeyedTable32OfIndexedTables32$/i) + { + return 7; + } + else + { + return -1; + } + } + +sub hexadecimalify + { + my $string=shift; + my $result=''; + my $lengthOfString=length($string); + my $i; + for ($i=0; $i<$lengthOfString; ++$i) + { + $result.=sprintf("\\x%02x", (unpack('C', substr($string, $i, 1)))[0]); + } + return $result; + } + +sub readSourceFile + { + my $fileHandle=shift; + my $fileName=shift; + my $foreignCharacterCodes=shift; + my $unicodeCharacterCodes=shift; + my $columns=shift; + my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=shift; + my $privateUseUnicodeCharacterSlotsUsed=shift; + my $subtract=shift; + my $foreignCharacterCodeProcessingCode=''; + if (!(($columns->[0]>0) && ($columns->[1]>0) && ($columns->[2]>0) && ($columns->[1]<=$columns->[0]) && ($columns->[2]<=$columns->[0]) && ($columns->[1]!=$columns->[2]))) + { + close($fileHandle); + die("Error: bad \"-columns\" data\n"); + } + my $patternOfLineContainingCharacterCodes=join('\s+', ('0x([0-9a-f]+)') x $columns->[0]); + my $line; + my $strippedDownLine; + for (;;) + { + ($line, $strippedDownLine)=&nextNonEmptyStrippedDownLine($fileHandle); + if (($strippedDownLine eq '')||(substr($strippedDownLine,0,1) eq chr(26))) # if there are no more lines in the file or if we encountered EOF character + { + last; + } + if ($strippedDownLine=~/^SET_FOREIGN_CHARACTER_CODE_PROCESSING_CODE\s+(.*)$/i) + { + $foreignCharacterCodeProcessingCode=$1; + } + elsif ($strippedDownLine=~/^$patternOfLineContainingCharacterCodes$/i) + { + no strict 'refs'; # so that we can use symbolic references for $1, $2, etc + my $foreignCharacterCode=hex(${$columns->[1]}); + my $unicodeCharacterCode=hex(${$columns->[2]}); + use strict 'refs'; + if ($foreignCharacterCodeProcessingCode ne '') + { + $foreignCharacterCode=eval($foreignCharacterCodeProcessingCode); + } + my $handleConversionPair=1; + if ((($unicodeCharacterCode>=0xe000) && ($unicodeCharacterCode<=0xf8ff)) || (($unicodeCharacterCode>=0xf0000) && ($unicodeCharacterCode<=0x10ffff))) + { + if ($cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed) + { + $handleConversionPair=0; + } + else + { + if ($subtract) + { + delete $privateUseUnicodeCharacterSlotsUsed->{$unicodeCharacterCode}; + } + else + { + $privateUseUnicodeCharacterSlotsUsed->{$unicodeCharacterCode}=1; + } + } + } + if ($handleConversionPair) + { + if ($subtract) + { + if (!defined($foreignCharacterCodes->{$unicodeCharacterCode}->{$foreignCharacterCode})) + { + close($fileHandle); + die('Error: cannot subtract conversion pair ['.sprintf('foreign 0x%x, Unicode 0x%04x', $foreignCharacterCode, $unicodeCharacterCode)."] as it does not occur in \"$fileName\"\n"); + } + if (!defined($unicodeCharacterCodes->{$foreignCharacterCode}->{$unicodeCharacterCode})) + { + close($fileHandle); + die('Error: cannot subtract conversion pair ['.sprintf('Unicode 0x%04x, foreign 0x%x', $unicodeCharacterCode, $foreignCharacterCode)."] as it does not occur in \"$fileName\"\n"); + } + delete $foreignCharacterCodes->{$unicodeCharacterCode}->{$foreignCharacterCode}; + if (keys(%{$foreignCharacterCodes->{$unicodeCharacterCode}})==0) + { + delete $foreignCharacterCodes->{$unicodeCharacterCode}; + } + delete $unicodeCharacterCodes->{$foreignCharacterCode}->{$unicodeCharacterCode}; + if (keys(%{$unicodeCharacterCodes->{$foreignCharacterCode}})==0) + { + delete $unicodeCharacterCodes->{$foreignCharacterCode}; + } + } + else + { + if (defined($foreignCharacterCodes->{$unicodeCharacterCode}->{$foreignCharacterCode})) + { + close($fileHandle); + die('Error: same conversion pair ['.sprintf('foreign 0x%x, Unicode 0x%04x', $foreignCharacterCode, $unicodeCharacterCode)."] occurs more than once in \"$fileName\"\n"); + } + if (defined($unicodeCharacterCodes->{$foreignCharacterCode}->{$unicodeCharacterCode})) + { + close($fileHandle); + die('Error: same conversion pair ['.sprintf('Unicode 0x%04x, foreign 0x%x', $unicodeCharacterCode, $foreignCharacterCode)."] occurs more than once in \"$fileName\"\n"); + } + $foreignCharacterCodes->{$unicodeCharacterCode}->{$foreignCharacterCode}=1; + $unicodeCharacterCodes->{$foreignCharacterCode}->{$unicodeCharacterCode}=1; + } + } + } + elsif ($line!~/^\s*0x([0-9a-f]+)\s*#\s*undefined.*$/i) + { + close($fileHandle); + die("Error: unexpected line in \"$fileName\":\n $line\n"); + } + } + } + +sub readHeaderFromControlFile + { + my $fileHandle=shift; + my $fileName=shift; + my $generateSourceCode=shift; + my $uid=shift; + my $endiannessAsText=shift; + my $endianness=shift; + my $replacementForUnconvertibleUnicodeCharacters=shift; + my $flattenHashAndSave=shift; + my $line; + my $strippedDownLine; + ($line, $strippedDownLine)=&nextNonEmptyStrippedDownLine($fileHandle); + if ($strippedDownLine=~/^UID\s+0x([0-9a-f]+)$/i) + { + if ($generateSourceCode) + { + print(STDERR "Warning: \"UID\" keyword should not be used with \"-generateSourceCode\" flag - specify the UID in the MMP file\n"); + } + $$uid=hex($1); + ($line, $strippedDownLine)=&nextNonEmptyStrippedDownLine($fileHandle); + } + else + { + if (!$generateSourceCode && !$flattenHashAndSave) + { + close($fileHandle); + die("Error: unexpected line in \"$fileName\" (\"UID\" keyword expected):\n $line\n"); + } + } + if ($strippedDownLine=~/^Name\s+"(.+?)"$/i) + { + print(STDERR "Warning: obsolete keyword \"Name\" used\n"); + ($line, $strippedDownLine)=&nextNonEmptyStrippedDownLine($fileHandle); + } + if ($strippedDownLine!~/^Endianness\s+(\w+)$/i) + { + close($fileHandle); + die("Error: unexpected line in \"$fileName\" (\"Endianness\" keyword expected):\n $line\n"); + } + $$endiannessAsText=$1; + if ($$endiannessAsText=~/Unspecified/i) + { + $$endianness=0; # SCnvConversionData::EUnspecified + } + elsif ($$endiannessAsText=~/FixedLittleEndian/i) + { + $$endianness=1; # SCnvConversionData::EFixedLittleEndian + } + elsif ($$endiannessAsText=~/FixedBigEndian/i) + { + $$endianness=2; # SCnvConversionData::EFixedBigEndian + } + else + { + close($fileHandle); + die("Error: \"$$endiannessAsText\" is not a legal value for \"Endianness\"\n"); + } + ($line, $strippedDownLine)=&nextNonEmptyStrippedDownLine($fileHandle); + if ($strippedDownLine!~/^ReplacementForUnconvertibleUnicodeCharacters\s+(.*?)$/i) + { + close($fileHandle); + die("Error: unexpected line in \"$fileName\" (\"ReplacementForUnconvertibleUnicodeCharacters\" keyword expected):\n $line\n"); + } + $$replacementForUnconvertibleUnicodeCharacters=''; + my $remainderOfXxx=$1; + while ($remainderOfXxx ne '') + { + if ($remainderOfXxx!~/^0x([0-9a-f]{1,2})\s*(.*)$/i) + { + close($fileHandle); + die("Error: unexpected line in \"$fileName\":\n $line\n"); + } + $$replacementForUnconvertibleUnicodeCharacters.=pack("C", hex($1)); + $remainderOfXxx=$2; + } + my @temp=&nextNonEmptyStrippedDownLine($fileHandle); + if ($temp[1]=~/^ForeignCharacterCodeProcessingCode/i) + { + print(STDERR "Warning: obsolete keyword \"ForeignCharacterCodeProcessingCode\" used\n"); + } + else + { + ungetNonEmptyStrippedDownLine(@temp) + } + } + +sub readForeignVariableByteDataFromControlFile + { + my $fileHandle=shift; + my $fileName=shift; + my $foreignVariableByteData=shift; + my $line; + my $strippedDownLine; + ($line, $strippedDownLine)=&nextNonEmptyStrippedDownLine($fileHandle); + if ($strippedDownLine!~/^StartForeignVariableByteData$/i) + { + close($fileHandle); + die("Error: unexpected line in \"$fileName\":\n $line\n"); + } + + for (;;) + { + ($line, $strippedDownLine)=&nextNonEmptyStrippedDownLine($fileHandle); + if ($strippedDownLine=~/^EndForeignVariableByteData$/i) + { + last; + } + if ($strippedDownLine!~/^0x([0-9a-f]+)\s+0x([0-9a-f]+)\s+(\d+)$/i) + { + close($fileHandle); + die("Error: unexpected line in \"$fileName\":\n $line\n"); + } + my $firstInitialByteValueInRange=hex($1); + my $lastInitialByteValueInRange=hex($2); + my $numberOfSubsequentBytes=$3; + if ($firstInitialByteValueInRange>0xff) + { + close($fileHandle); + die("Error: firstInitialByteValueInRange ".sprintf("0x%02x", $firstInitialByteValueInRange)." does not fit in a single byte\n"); + } + if ($lastInitialByteValueInRange>0xff) + { + close($fileHandle); + die("Error: lastInitialByteValueInRange ".sprintf("0x%02x", $lastInitialByteValueInRange)." does not fit in a single byte\n"); + } + if ($lastInitialByteValueInRange<$firstInitialByteValueInRange) + { + close($fileHandle); + die("Error: lastInitialByteValueInRange ".sprintf("0x%02x", $lastInitialByteValueInRange)." is less than firstInitialByteValueInRange ".sprintf("0x%02x", $firstInitialByteValueInRange)."\n"); + } + push(@$foreignVariableByteData, [$firstInitialByteValueInRange, $lastInitialByteValueInRange, $numberOfSubsequentBytes]); + } + } + +sub readOneDirectionDataFromControlFile + { + my $fileHandle=shift; + my $fileName=shift; + my $oneDirectionData=shift; + my $preferredCharacterCodesForConflictResolution=shift; + my $additionalSubsetTables=shift; + my $outputIsUnicode=shift; + my $source=$outputIsUnicode? 'foreign': 'Unicode'; + my $target=$outputIsUnicode? 'Unicode': 'foreign'; + my $middlePortionOfKeyWords=$outputIsUnicode? "ForeignToUnicode": "UnicodeToForeign"; + my $extraPatternToMatch=$outputIsUnicode? '()': '\s+(\d+)'; + my $line; + my $strippedDownLine; + ($line, $strippedDownLine)=&nextNonEmptyStrippedDownLine($fileHandle); + if ($strippedDownLine!~/^Start${middlePortionOfKeyWords}Data$/i) + { + close($fileHandle); + die("Error: unexpected line in \"$fileName\":\n $line\n"); + } + my $doingConflictResolution=0; + for (;;) + { + ($line, $strippedDownLine)=&nextNonEmptyStrippedDownLine($fileHandle); + if ($strippedDownLine=~/^End${middlePortionOfKeyWords}Data$/i) + { + last; + } + if ($strippedDownLine=~/^ConflictResolution$/i) + { + $doingConflictResolution=1; + } + elsif ($doingConflictResolution) + { + if ($strippedDownLine!~/^0x([0-9a-f]+)\s+0x([0-9a-f]+)$/i) + { + close($fileHandle); + die("Error: unexpected line in \"$fileName\":\n $line\n"); + } + my $sourceCharacterCodeToResolve=hex($1); + my $targetCharacterCodePreferred=hex($2); + $preferredCharacterCodesForConflictResolution->{$sourceCharacterCodeToResolve}=$targetCharacterCodePreferred; + } + elsif ($strippedDownLine=~/^(Start|End)AdditionalSubsetTable\s+(.*)$/i) + { + my $prefix=$1; + my $nameOfAdditionalSubsetTable=$2; + my $index=$prefix=~(/^Start$/i)? 0: 1; + if (!$outputIsUnicode) + { + $index+=2; + } + if (defined($additionalSubsetTables{$nameOfAdditionalSubsetTable}->[$index])) + { + close($fileHandle); + die("Error: multiple redefinition of \"${prefix}AdditionalSubsetTable $nameOfAdditionalSubsetTable\"\n"); + } + $additionalSubsetTables{$nameOfAdditionalSubsetTable}->[$index]=@$oneDirectionData; + } + else + { + if ($strippedDownLine!~/^(\d+)\s+(\d+)\s+0x([0-9a-f]+)\s+0x([0-9a-f]+)\s+(\w+)$extraPatternToMatch\s+\{(.*?)\}$/i) + { + close($fileHandle); + die("Error: unexpected line in \"$fileName\":\n $line\n"); + } + my $includePriority=$1; + my $searchPriority=$2; + my $firstInputCharacterCodeInRange=hex($3); + my $lastInputCharacterCodeInRange=hex($4); + my $algorithmAsText=$5; + my $sizeOfOutputCharacterCodeInBytes=$6; + my $parameters=$7; + if ($lastInputCharacterCodeInRange<$firstInputCharacterCodeInRange) + { + close($fileHandle); + die("Error: lastInputCharacterCodeInRange ".sprintf("0x%02x", $lastInputCharacterCodeInRange)." is less than firstInputCharacterCodeInRange ".sprintf("0x%02x", $firstInputCharacterCodeInRange)."\n"); + } + my $algorithm=&algorithm($algorithmAsText); + if ($algorithm<0) + { + close($fileHandle); + die("Error: unexpected algorithm \"$algorithmAsText\"\n"); + } + my $rangeData=[$includePriority, $searchPriority, $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange, $algorithm]; + if (!$outputIsUnicode) + { + push(@$rangeData, $sizeOfOutputCharacterCodeInBytes); + } + push(@$rangeData, $parameters); + push(@$oneDirectionData, $rangeData); + } + } + } + +sub warnIfAnyPrivateUseUnicodeCharacterSlotsBeingUsed + { + my $privateUseUnicodeCharacterSlotsUsed=shift; + my @sortedPrivateUseUnicodeCharacterSlotsUsed=sort({$a<=>$b} keys(%$privateUseUnicodeCharacterSlotsUsed)); + if (@sortedPrivateUseUnicodeCharacterSlotsUsed>0) + { + my $lastPrivateUseUnicodeCharacterSlotUsed=$sortedPrivateUseUnicodeCharacterSlotsUsed[0]; + my $asText=sprintf('0x%04x', $lastPrivateUseUnicodeCharacterSlotUsed); + my @asText=($asText); + my $i; + for ($i=1; $i<@sortedPrivateUseUnicodeCharacterSlotsUsed; ++$i) # this loop starts from 1 not 0 as we have already dealt with $sortedPrivateUseUnicodeCharacterSlotsUsed[0] + { + ($sortedPrivateUseUnicodeCharacterSlotsUsed[$i]>$lastPrivateUseUnicodeCharacterSlotUsed) or die("Error: internal error 1\n"); + if ($sortedPrivateUseUnicodeCharacterSlotsUsed[$i]>$lastPrivateUseUnicodeCharacterSlotUsed+1) + { + $asText=sprintf('0x%04x', $lastPrivateUseUnicodeCharacterSlotUsed); + if ($asText[-1] ne $asText) + { + $asText[-1].='-'.$asText; + } + push(@asText, sprintf('0x%04x', $sortedPrivateUseUnicodeCharacterSlotsUsed[$i])); + } + $lastPrivateUseUnicodeCharacterSlotUsed=$sortedPrivateUseUnicodeCharacterSlotsUsed[$i]; + } + $asText=sprintf('0x%04x', $lastPrivateUseUnicodeCharacterSlotUsed); + if ($asText[-1] ne $asText) + { + $asText[-1].='-'.$asText; + } + print(STDERR 'Warning: the following private-use Unicode character slots were used: ['.join(', ', @asText)."]\n"); + } + } + +sub resolveConflictsAndFlattenArraysToScalars + { + my $characterCodes=shift; + my $preferredCharacterCodesForConflictResolution=shift; + my $source=shift; + my $target=shift; + my $sourceCharacterCode; + my $candidateTargetCharacterCodes; + while (($sourceCharacterCode, $candidateTargetCharacterCodes)=each(%$characterCodes)) + { + my @candidateTargetCharacterCodes=keys(%$candidateTargetCharacterCodes); + if (@candidateTargetCharacterCodes<1) + { + die("Error: internal error 2\n"); + } + if (@candidateTargetCharacterCodes==1) + { + $characterCodes->{$sourceCharacterCode}=$candidateTargetCharacterCodes[0]; + } + else + { + if (!defined($preferredCharacterCodesForConflictResolution->{$sourceCharacterCode})) + { + die("Error: no preferred $target character code is specified for conflict resolution for the $source character code ".sprintf("0x%08x", $sourceCharacterCode)."\n"); + } + my $preferredCharacterCodeIsNotACandidateForConflictResolution=1; + my $candidateTargetCharacterCode; + foreach $candidateTargetCharacterCode (@candidateTargetCharacterCodes) + { + if ($preferredCharacterCodesForConflictResolution->{$sourceCharacterCode}==$candidateTargetCharacterCode) + { + $preferredCharacterCodeIsNotACandidateForConflictResolution=0; + last; + } + } + if ($preferredCharacterCodeIsNotACandidateForConflictResolution) + { + die("Error: the preferred $target character code ".sprintf("0x%08x", $preferredCharacterCodesForConflictResolution->{$sourceCharacterCode})." is not a candidate for conflict resolution for the $source character code ".sprintf("0x%08x", $sourceCharacterCode)."\n"); + } + $characterCodes->{$sourceCharacterCode}=$preferredCharacterCodesForConflictResolution->{$sourceCharacterCode}; + delete $preferredCharacterCodesForConflictResolution->{$sourceCharacterCode}; + } + } + my $numberOfPreferredCharacterCodesForConflictResolution=keys(%$preferredCharacterCodesForConflictResolution); + if ($numberOfPreferredCharacterCodesForConflictResolution!=0) + { + print(STDERR "Warning: there are $numberOfPreferredCharacterCodesForConflictResolution $target preferred character codes specified for which there are no conflicts to resolve\n"); + } + } + +sub checkForeignVariableByteData + { + my $endianness=shift; + my $foreignVariableByteData=shift; + my $foreignToUnicodeData=shift; + my $rangeData; + my %initialForeignBytes=(); + foreach $rangeData (@$foreignVariableByteData) + { + my $initialByte; + for ($initialByte=$rangeData->[0]; $initialByte<=$rangeData->[1]; ++$initialByte) + { + if (defined($initialForeignBytes{$initialByte})) + { + die("Error: the number of bytes subsequent to the initial foreign-byte $initialForeignBytes{$initialByte} is defined more than once\n"); + } + $initialForeignBytes{$initialByte}=1; + } + } +# if ($endianness!=0) # unfortunately, nothing can be checked if the $endianness is 0 (SCnvConversionData::EUnspecified) +# { +# foreach $rangeData (@$foreignToUnicodeData) +# { +# my $inputCharacterCode; +# for ($inputCharacterCode=$rangeData->[2]; $inputCharacterCode<=$rangeData->[3]; ++$inputCharacterCode) +# { +# my $initialByte; +# if ($endianness==1) # SCnvConversionData::EFixedLittleEndian +# { +# $initialByte=($inputCharacterCode&0xff); +# } +# elsif ($endianness==2) # SCnvConversionData::EFixedBigEndian +# { +# $initialByte=($inputCharacterCode&0xff00)>>8; ## this is hard-coded and needs to be done properly! +# } +# else +# { +# die("Error: internal error ??\n"); +# } +# if (!defined($initialForeignBytes{$initialByte})) +# { +# die("Error: no number-of-subsequent-bytes is specified for the initial byte $initialByte\n"); +# } +# } +# } +# } + } + +sub writeSourceCodeHeader + { + my $fileHandle=shift; + my $fileName=shift; + my $replacementForUnconvertibleUnicodeCharacters=shift; + while ($fileName=~/^.*\\(.*)$/i) + { + $fileName=$1; + } + print($fileHandle "// $fileName\n//\n// Copyright (c) Nokia Corporation and/or its subsidiary(-ies) ".(1900+(gmtime(time))[5]).". All rights reserved.\n//\n\n"); + print($fileHandle "#include \n#include \n#include \n\n#define ARRAY_LENGTH(aArray) (sizeof(aArray)/sizeof((aArray)\[0\]))\n\n#pragma warning (disable: 4049) // compiler limit : terminating line number emission\n\n"); + if (!$omitReplacementForUnconvertibleUnicodeCharacters) + { + print($fileHandle "_LIT8(KLit8ReplacementForUnconvertibleUnicodeCharacters, \"".&hexadecimalify($replacementForUnconvertibleUnicodeCharacters)."\");\n\n"); + print($fileHandle "GLDEF_C const TDesC8& ReplacementForUnconvertibleUnicodeCharacters_internal()\n\t{\n\treturn KLit8ReplacementForUnconvertibleUnicodeCharacters;\n\t}\n\n"); + } + } + +sub writeSourceCodeForeignVariableByteData + { + my $sourceCodeOfTopLevelStructures=shift; + my $foreignVariableByteData=shift; + push(@$sourceCodeOfTopLevelStructures, "LOCAL_D const SCnvConversionData::SVariableByteData::SRange foreignVariableByteDataRanges[]=\n\t\{\n"); + my $indexOfLastRange=$#$foreignVariableByteData; + my $i; + for ($i=0; $i<=$indexOfLastRange; ++$i) + { + my $rangeData=$foreignVariableByteData->[$i]; + if (@$rangeData!=3) + { + die("Error: internal error 3\n"); + } + my $firstInitialByteValueInRange=$rangeData->[0]; + my $lastInitialByteValueInRange=$rangeData->[1]; + if ($lastInitialByteValueInRange<$firstInitialByteValueInRange) + { + die("Error: internal error 4\n"); + } + my $numberOfSubsequentBytes=$rangeData->[2]; + push(@$sourceCodeOfTopLevelStructures, "\t\t\{\n\t\t".sprintf("0x%02x", $firstInitialByteValueInRange).",\n\t\t".sprintf("0x%02x", $lastInitialByteValueInRange).",\n\t\t$numberOfSubsequentBytes,\n\t\t0\n\t\t\}"); + if ($i<$indexOfLastRange) + { + push(@$sourceCodeOfTopLevelStructures, ','); + } + push(@$sourceCodeOfTopLevelStructures, "\n"); + } + push(@$sourceCodeOfTopLevelStructures, "\t\};\n\n"); + } + +sub writeSourceCodeOneDirectionData + { + my $sourceCodeOfTopLevelStructures=shift; + my $sourceCodeOfOneDirectionIndexedTables16=shift; + my $sourceCodeOfOneDirectionKeyedTables1616=shift; + my $sourceCodeOfOneDirectionKeyedTables16OfIndexedTables16_indexedEntries=shift; + my $sourceCodeOfOneDirectionKeyedTables16OfIndexedTables16_keyedEntries=shift; + # new for 32 bit encoding begin + my $sourceCodeOfOneDirectionIndexedTables32=shift; + my $sourceCodeOfOneDirectionKeyedTables3232=shift; + my $sourceCodeOfOneDirectionKeyedTables32OfIndexedTables32_indexedEntries=shift; + my $sourceCodeOfOneDirectionKeyedTables32OfIndexedTables32_keyedEntries=shift; + # new for 32 bit encoding end + + my $oneDirectionData=shift; + my $characterCodes=shift; + my $outputIsUnicode=shift; + push(@$sourceCodeOfTopLevelStructures, 'LOCAL_D const SCnvConversionData::SOneDirectionData::SRange '.($outputIsUnicode? 'foreignToUnicodeDataRanges': 'unicodeToForeignDataRanges')."[]=\n\t\{\n"); + my $formatForInputCharacters=$outputIsUnicode? '0x%02x': '0x%04x'; + my $formatForOutputCharacters=$outputIsUnicode? '0x%04x': '0x%02x'; + my $indexOfLastRange=$#$oneDirectionData; + my $i; + for ($i=0; $i<=$indexOfLastRange; ++$i) + { + my $rangeData=$oneDirectionData->[$i]; + # $rangeData is $includePriority, $searchPriority, $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange, $algorithm[, $sizeOfOutputCharacterCodeInBytes], $parameters + if (scalar(@$rangeData)!=($outputIsUnicode? 6: 7)) + { + die("Error: internal error 5\n"); + } + my $firstInputCharacterCodeInRange=$rangeData->[2]; + my $lastInputCharacterCodeInRange=$rangeData->[3]; + if ($lastInputCharacterCodeInRange<$firstInputCharacterCodeInRange) + { + die("Error: internal error 6\n"); + } + my $algorithmAsText=''; # set by the if-elsif stuff below + my $sizeOfOutputCharacterCodeInBytesIfForeign=$outputIsUnicode? 0: $rangeData->[5]; + my $parameters=$rangeData->[$outputIsUnicode? 5: 6]; + my $word1=0; # set by the if-elsif stuff below + my $algorithm=$rangeData->[4]; + if ($algorithm==0) # Direct + { + $algorithmAsText='Direct'; + my $characterCode; + for ($characterCode=$firstInputCharacterCodeInRange; $characterCode<=$lastInputCharacterCodeInRange; ++$characterCode) + { + if (!defined($characterCodes->{$characterCode})) + { + die("Error: There is no conversion defined for ".($outputIsUnicode? 'foreign': 'Unicode')." character code ".sprintf("0x%08x", $characterCode)."\n"); + } + if ($characterCodes->{$characterCode}!=$characterCode) + { + die("Error: the conversion from ".($outputIsUnicode? 'foreign': 'Unicode')." character code ".sprintf("0x%08x", $characterCode)." to ".($outputIsUnicode? 'Unicode': 'foreign')." character code ".sprintf("0x%08x", $characterCodes->{$characterCode})." is not a direct conversion\n"); + } + delete $characterCodes->{$characterCode}; + } + } + elsif ($algorithm==1) # Offset + { + $algorithmAsText='Offset'; + my $offset=$characterCodes->{$firstInputCharacterCodeInRange}-$firstInputCharacterCodeInRange; + delete $characterCodes->{$firstInputCharacterCodeInRange}; + my $characterCode; + for ($characterCode=$firstInputCharacterCodeInRange+1; $characterCode<=$lastInputCharacterCodeInRange; ++$characterCode) + { + if (!defined($characterCodes->{$characterCode})) + { + die("Error: There is no conversion defined for ".($outputIsUnicode? 'foreign': 'Unicode')." character code ".sprintf("0x%08x (0x%08x-0x%08x)", $characterCode, $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange)."\n"); + } + if ($characterCodes->{$characterCode}-$characterCode!=$offset) + { + die("Error: the conversion from ".($outputIsUnicode? 'foreign': 'Unicode')." character code ".sprintf("0x%08x", $characterCode)." to ".($outputIsUnicode? 'Unicode': 'foreign')." character code ".sprintf("0x%08x", $characterCodes->{$characterCode})." has a different offset from the previous one in the range\n"); + } + delete $characterCodes->{$characterCode}; + } + $word1="STATIC_CAST(TUint, $offset)"; + } + elsif ($algorithm==2) # IndexedTable16 + { + $algorithmAsText='IndexedTable16'; + my $nameOfNextOneDirectionIndexedTable16='indexedTable16_'.($outputIsUnicode? 'foreignToUnicode': 'unicodeToForeign').'_'.($#$sourceCodeOfOneDirectionIndexedTables16+2); + my $sourceCodeOfNextOneDirectionIndexedTable16=[]; + push(@$sourceCodeOfNextOneDirectionIndexedTable16, "LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SIndexedTable16::SEntry $nameOfNextOneDirectionIndexedTable16\[\]=\n\t\{\n"); + my $characterCode; + for ($characterCode=$firstInputCharacterCodeInRange; $characterCode<=$lastInputCharacterCodeInRange; ++$characterCode) + { + if (!defined($characterCodes->{$characterCode})) + { + die("Error: There is no conversion defined for ".($outputIsUnicode? 'foreign': 'Unicode')." character code ".sprintf("0x%08x", $characterCode)."\n"); + } + push(@$sourceCodeOfNextOneDirectionIndexedTable16, "\t\t\{\n\t\t".sprintf($formatForOutputCharacters, $characterCodes->{$characterCode})."\n\t\t\}"); + if ($characterCode<$lastInputCharacterCodeInRange) + { + push(@$sourceCodeOfNextOneDirectionIndexedTable16, ','); + } + push(@$sourceCodeOfNextOneDirectionIndexedTable16, "\n"); + delete $characterCodes->{$characterCode}; + } + push(@$sourceCodeOfNextOneDirectionIndexedTable16, "\t\};\n\n"); + push(@$sourceCodeOfOneDirectionIndexedTables16, $sourceCodeOfNextOneDirectionIndexedTable16); + $word1="UData_S$algorithmAsText($nameOfNextOneDirectionIndexedTable16)"; + } + elsif ($algorithm==3) # KeyedTable1616 + { + $algorithmAsText='KeyedTable1616'; + my $nameOfNextOneDirectionKeyedTable1616='keyedTable1616_'.($outputIsUnicode? 'foreignToUnicode': 'unicodeToForeign').'_'.($#$sourceCodeOfOneDirectionKeyedTables1616+2); + my $sourceCodeOfNextOneDirectionKeyedTable1616=[]; + push(@$sourceCodeOfNextOneDirectionKeyedTable1616, "LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable1616::SEntry $nameOfNextOneDirectionKeyedTable1616\[\]=\n\t\{\n"); + my @characterCodes=grep(($_>=$firstInputCharacterCodeInRange) && ($_<=$lastInputCharacterCodeInRange), sort({$a<=>$b} keys(%$characterCodes))); + if (@characterCodes==0) + { + die("Error: There are no ".($outputIsUnicode? 'foreign': 'Unicode').'-to-'.($outputIsUnicode? 'Unicode': 'foreign')." characters to convert using KeyedTable1616 (range ".sprintf("$formatForInputCharacters-$formatForInputCharacters", $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange).").\n"); + } + if ($characterCodes[0]!=$firstInputCharacterCodeInRange) + { + print(STDERR 'Warning: the specified start of the '.($outputIsUnicode? 'foreign': 'Unicode').' range '.sprintf("$formatForInputCharacters-$formatForInputCharacters", $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange).' could actually be '.sprintf("$formatForInputCharacters", $characterCodes[0])."\n"); + } + if ($characterCodes[-1]!=$lastInputCharacterCodeInRange) + { + print(STDERR 'Warning: the specified end of the '.($outputIsUnicode? 'foreign': 'Unicode').' range '.sprintf("$formatForInputCharacters-$formatForInputCharacters", $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange).' could actually be '.sprintf("$formatForInputCharacters", $characterCodes[-1])."\n"); + } + my $characterCode; + foreach $characterCode (@characterCodes) + { + if (defined($characterCodes->{$characterCode})) + { + push(@$sourceCodeOfNextOneDirectionKeyedTable1616, "\t\t\{\n\t\t".sprintf($formatForInputCharacters, $characterCode).",\n\t\t".sprintf($formatForOutputCharacters, $characterCodes->{$characterCode})."\n\t\t\}"); + if ($characterCode<$characterCodes[-1]) + { + push(@$sourceCodeOfNextOneDirectionKeyedTable1616, ','); + } + push(@$sourceCodeOfNextOneDirectionKeyedTable1616, "\n"); + delete $characterCodes->{$characterCode}; + } + } + push(@$sourceCodeOfNextOneDirectionKeyedTable1616, "\t\};\n\n"); + push(@$sourceCodeOfOneDirectionKeyedTables1616, $sourceCodeOfNextOneDirectionKeyedTable1616); + $word1="UData_S$algorithmAsText($nameOfNextOneDirectionKeyedTable1616)"; + } + elsif ($algorithm==4) # KeyedTable16OfIndexedTables16 + { + $algorithmAsText='KeyedTable16OfIndexedTables16'; + my $nameOfNextOneDirectionKeyedTables16OfIndexedTables16_keyedEntries='keyedTables16OfIndexedTables16_keyedEntries_'.($outputIsUnicode? 'foreignToUnicode': 'unicodeToForeign').'_'.($#$sourceCodeOfOneDirectionKeyedTables16OfIndexedTables16_keyedEntries+2); + my $sourceCodeOfNextOneDirectionKeyedTables16OfIndexedTables16_keyedEntries=[]; + push(@$sourceCodeOfNextOneDirectionKeyedTables16OfIndexedTables16_keyedEntries, "LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable16OfIndexedTables16::SKeyedEntry $nameOfNextOneDirectionKeyedTables16OfIndexedTables16_keyedEntries\[\]=\n\t\{\n"); + my @characterCodes=grep(($_>=$firstInputCharacterCodeInRange) && ($_<=$lastInputCharacterCodeInRange), sort({$a<=>$b} keys(%$characterCodes))); + if (@characterCodes==0) + { + die("Error: There are no ".($outputIsUnicode? 'foreign': 'Unicode').'-to-'.($outputIsUnicode? 'Unicode': 'foreign')." characters to convert using KeyedTable16OfIndexedTables16 (range ".sprintf("$formatForInputCharacters-$formatForInputCharacters", $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange).").\n"); + } + if ($characterCodes[0]!=$firstInputCharacterCodeInRange) + { + print(STDERR 'Warning: the specified start of the '.($outputIsUnicode? 'foreign': 'Unicode').' range '.sprintf("$formatForInputCharacters-$formatForInputCharacters", $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange).' could actually be '.sprintf("$formatForInputCharacters", $characterCodes[0])."\n"); + } + if ($characterCodes[-1]!=$lastInputCharacterCodeInRange) + { + print(STDERR 'Warning: the specified end of the '.($outputIsUnicode? 'foreign': 'Unicode').' range '.sprintf("$formatForInputCharacters-$formatForInputCharacters", $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange).' could actually be '.sprintf("$formatForInputCharacters", $characterCodes[-1])."\n"); + } + my @characterCodeRanges=(); + my $minimumNumberOfEntriesPerIndexedTable=($parameters ne '')? $parameters: 0; + my $firstInputCharacterCodeInIndexedTable=$characterCodes[0]; + my $previousCharacterCode=$firstInputCharacterCodeInIndexedTable; + my $characterCode; + foreach $characterCode (@characterCodes) + { + ($characterCode>=$previousCharacterCode) or die("Error: internal error 7\n"); + if ($characterCode>$previousCharacterCode+1) + { + if (($previousCharacterCode-$firstInputCharacterCodeInIndexedTable)+1>=$minimumNumberOfEntriesPerIndexedTable) + { + push(@characterCodeRanges, [$firstInputCharacterCodeInIndexedTable, $previousCharacterCode]); + } + $firstInputCharacterCodeInIndexedTable=$characterCode; + } + $previousCharacterCode=$characterCode; + } + push(@characterCodeRanges, [$firstInputCharacterCodeInIndexedTable, $previousCharacterCode]); + @characterCodes=(); + my $characterCodeRange; + foreach $characterCodeRange (@characterCodeRanges) + { + my $nameOfNextOneDirectionKeyedTables16OfIndexedTables16_indexedEntries='keyedTables16OfIndexedTables16_indexedEntries_'.($outputIsUnicode? 'foreignToUnicode': 'unicodeToForeign').'_'.($#$sourceCodeOfOneDirectionKeyedTables16OfIndexedTables16_indexedEntries+2); + my $sourceCodeOfNextOneDirectionKeyedTables16OfIndexedTables16_indexedEntries=[]; + push(@$sourceCodeOfNextOneDirectionKeyedTables16OfIndexedTables16_indexedEntries, "LOCAL_D const TUint16 $nameOfNextOneDirectionKeyedTables16OfIndexedTables16_indexedEntries\[\]=\n\t\{\n"); + my $characterCode; + my $lastInputCharacterCodeInIndexedTable=$characterCodeRange->[1]; + for ($characterCode=$characterCodeRange->[0]; $characterCode<=$lastInputCharacterCodeInIndexedTable; ++$characterCode) + { + if (!defined($characterCodes->{$characterCode})) + { + die("Error: internal error 8\n"); + } + push(@$sourceCodeOfNextOneDirectionKeyedTables16OfIndexedTables16_indexedEntries, "\t".sprintf($formatForOutputCharacters, $characterCodes->{$characterCode})); + if ($characterCode<$lastInputCharacterCodeInIndexedTable) + { + push(@$sourceCodeOfNextOneDirectionKeyedTables16OfIndexedTables16_indexedEntries, ','); + } + push(@$sourceCodeOfNextOneDirectionKeyedTables16OfIndexedTables16_indexedEntries, "\n"); + delete $characterCodes->{$characterCode}; + } + push(@$sourceCodeOfNextOneDirectionKeyedTables16OfIndexedTables16_indexedEntries, "\t\};\n\n"); + push(@$sourceCodeOfOneDirectionKeyedTables16OfIndexedTables16_indexedEntries, $sourceCodeOfNextOneDirectionKeyedTables16OfIndexedTables16_indexedEntries); + push(@$sourceCodeOfNextOneDirectionKeyedTables16OfIndexedTables16_keyedEntries, "\t\t\{\n\t\t".sprintf($formatForInputCharacters, $characterCodeRange->[0]).",\n\t\t".sprintf($formatForInputCharacters, $characterCodeRange->[1]).",\n\t\t$nameOfNextOneDirectionKeyedTables16OfIndexedTables16_indexedEntries\n\t\t\}"); + if ($characterCodeRange->[1]<$characterCodeRanges[-1]->[1]) + { + push(@$sourceCodeOfNextOneDirectionKeyedTables16OfIndexedTables16_keyedEntries, ','); + } + push(@$sourceCodeOfNextOneDirectionKeyedTables16OfIndexedTables16_keyedEntries, "\n"); + } + push(@$sourceCodeOfNextOneDirectionKeyedTables16OfIndexedTables16_keyedEntries, "\t\};\n\n"); + push(@$sourceCodeOfOneDirectionKeyedTables16OfIndexedTables16_keyedEntries, $sourceCodeOfNextOneDirectionKeyedTables16OfIndexedTables16_keyedEntries); + $word1="UData_S$algorithmAsText($nameOfNextOneDirectionKeyedTables16OfIndexedTables16_keyedEntries)"; + } + elsif ($algorithm==5) # IndexedTable32 + { + $algorithmAsText='IndexedTable32'; + my $nameOfNextOneDirectionIndexedTable32='indexedTable32_'.($outputIsUnicode? 'foreignToUnicode': 'unicodeToForeign').'_'.($#$sourceCodeOfOneDirectionIndexedTables32+2); + my $sourceCodeOfNextOneDirectionIndexedTable32=[]; + push(@$sourceCodeOfNextOneDirectionIndexedTable32, "LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SIndexedTable32::SEntry $nameOfNextOneDirectionIndexedTable32\[\]=\n\t\{\n"); + my $characterCode; + for ($characterCode=$firstInputCharacterCodeInRange; $characterCode<=$lastInputCharacterCodeInRange; ++$characterCode) + { + if (!defined($characterCodes->{$characterCode})) + { + die("Error: There is no conversion defined for ".($outputIsUnicode? 'foreign': 'Unicode')." character code ".sprintf("0x%08x", $characterCode)."\n"); + } + push(@$sourceCodeOfNextOneDirectionIndexedTable32, "\t\t\{\n\t\t".sprintf($formatForOutputCharacters, $characterCodes->{$characterCode})."\n\t\t\}"); + if ($characterCode<$lastInputCharacterCodeInRange) + { + push(@$sourceCodeOfNextOneDirectionIndexedTable32, ','); + } + push(@$sourceCodeOfNextOneDirectionIndexedTable32, "\n"); + delete $characterCodes->{$characterCode}; + } + push(@$sourceCodeOfNextOneDirectionIndexedTable32, "\t\};\n\n"); + push(@$sourceCodeOfOneDirectionIndexedTables32, $sourceCodeOfNextOneDirectionIndexedTable32); + $word1="UData_S$algorithmAsText($nameOfNextOneDirectionIndexedTable32)"; + } + elsif ($algorithm==6) # KeyedTable3232 + { + $algorithmAsText='KeyedTable3232'; + my $nameOfNextOneDirectionKeyedTable3232='keyedTable3232_'.($outputIsUnicode? 'foreignToUnicode': 'unicodeToForeign').'_'.($#$sourceCodeOfOneDirectionKeyedTables3232+2); + my $sourceCodeOfNextOneDirectionKeyedTable3232=[]; + push(@$sourceCodeOfNextOneDirectionKeyedTable3232, "LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable3232::SEntry $nameOfNextOneDirectionKeyedTable3232\[\]=\n\t\{\n"); + my @characterCodes=grep(($_>=$firstInputCharacterCodeInRange) && ($_<=$lastInputCharacterCodeInRange), sort({$a<=>$b} keys(%$characterCodes))); + if (@characterCodes==0) + { + die("Error: There are no ".($outputIsUnicode? 'foreign': 'Unicode').'-to-'.($outputIsUnicode? 'Unicode': 'foreign')." characters to convert using KeyedTable3232 (range ".sprintf("$formatForInputCharacters-$formatForInputCharacters", $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange).").\n"); + } + if ($characterCodes[0]!=$firstInputCharacterCodeInRange) + { + print(STDERR 'Warning: the specified start of the '.($outputIsUnicode? 'foreign': 'Unicode').' range '.sprintf("$formatForInputCharacters-$formatForInputCharacters", $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange).' could actually be '.sprintf($formatForInputCharacters, $characterCodes[0])."\n"); + } + if ($characterCodes[-1]!=$lastInputCharacterCodeInRange) + { + print(STDERR 'Warning: the specified end of the '.($outputIsUnicode? 'foreign': 'Unicode').' range '.sprintf("$formatForInputCharacters-$formatForInputCharacters", $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange).' could actually be '.sprintf($formatForInputCharacters, $characterCodes[-1])."\n"); + } + my $characterCode; + foreach $characterCode (@characterCodes) + { + if (defined($characterCodes->{$characterCode})) + { + push(@$sourceCodeOfNextOneDirectionKeyedTable3232, "\t\t\{\n\t\t".sprintf($formatForInputCharacters, $characterCode).",\n\t\t".sprintf($formatForOutputCharacters, $characterCodes->{$characterCode})."\n\t\t\}"); + if ($characterCode<$characterCodes[-1]) + { + push(@$sourceCodeOfNextOneDirectionKeyedTable3232, ','); + } + push(@$sourceCodeOfNextOneDirectionKeyedTable3232, "\n"); + delete $characterCodes->{$characterCode}; + } + } + push(@$sourceCodeOfNextOneDirectionKeyedTable3232, "\t\};\n\n"); + push(@$sourceCodeOfOneDirectionKeyedTables3232, $sourceCodeOfNextOneDirectionKeyedTable3232); + $word1="UData_S$algorithmAsText($nameOfNextOneDirectionKeyedTable3232)"; + } + elsif ($algorithm==7) # KeyedTable32OfIndexedTables32 + { + $algorithmAsText='KeyedTable32OfIndexedTables32'; + my $nameOfNextOneDirectionKeyedTables32OfIndexedTables32_keyedEntries='keyedTables32OfIndexedTables32_keyedEntries_'.($outputIsUnicode? 'foreignToUnicode': 'unicodeToForeign').'_'.($#$sourceCodeOfOneDirectionKeyedTables32OfIndexedTables32_keyedEntries+2); + my $sourceCodeOfNextOneDirectionKeyedTables32OfIndexedTables32_keyedEntries=[]; + push(@$sourceCodeOfNextOneDirectionKeyedTables32OfIndexedTables32_keyedEntries, "LOCAL_D const SCnvConversionData::SOneDirectionData::SRange::UData::SKeyedTable32OfIndexedTables32::SKeyedEntry $nameOfNextOneDirectionKeyedTables32OfIndexedTables32_keyedEntries\[\]=\n\t\{\n"); + my @characterCodes=grep(($_>=$firstInputCharacterCodeInRange) && ($_<=$lastInputCharacterCodeInRange), sort({$a<=>$b} keys(%$characterCodes))); + if (@characterCodes==0) + { + die("Error: There are no ".($outputIsUnicode? 'foreign': 'Unicode').'-to-'.($outputIsUnicode? 'Unicode': 'foreign')." characters to convert using KeyedTable32OfIndexedTables32 (range ".sprintf("$formatForInputCharacters-$formatForInputCharacters", $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange).").\n"); + } + if ($characterCodes[0]!=$firstInputCharacterCodeInRange) + { + print(STDERR 'Warning: the specified start of the '.($outputIsUnicode? 'foreign': 'Unicode').' range '.sprintf("$formatForInputCharacters-$formatForInputCharacters", $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange).' could actually be '.sprintf("$formatForInputCharacters", $characterCodes[0])."\n"); + } + if ($characterCodes[-1]!=$lastInputCharacterCodeInRange) + { + print(STDERR 'Warning: the specified end of the '.($outputIsUnicode? 'foreign': 'Unicode').' range '.sprintf("$formatForInputCharacters-$formatForInputCharacters", $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange).' could actually be '.sprintf("$formatForInputCharacters", $characterCodes[-1])."\n"); + } + my @characterCodeRanges=(); + my $minimumNumberOfEntriesPerIndexedTable=($parameters ne '')? $parameters: 0; + my $firstInputCharacterCodeInIndexedTable=$characterCodes[0]; + my $previousCharacterCode=$firstInputCharacterCodeInIndexedTable; + my $characterCode; + foreach $characterCode (@characterCodes) + { + ($characterCode>=$previousCharacterCode) or die("Error: internal error 7\n"); + if ($characterCode>$previousCharacterCode+1) + { + if (($previousCharacterCode-$firstInputCharacterCodeInIndexedTable)+1>=$minimumNumberOfEntriesPerIndexedTable) + { + push(@characterCodeRanges, [$firstInputCharacterCodeInIndexedTable, $previousCharacterCode]); + } + $firstInputCharacterCodeInIndexedTable=$characterCode; + } + $previousCharacterCode=$characterCode; + } + push(@characterCodeRanges, [$firstInputCharacterCodeInIndexedTable, $previousCharacterCode]); + @characterCodes=(); + my $characterCodeRange; + foreach $characterCodeRange (@characterCodeRanges) + { + my $nameOfNextOneDirectionKeyedTables32OfIndexedTables32_indexedEntries='keyedTables32OfIndexedTables32_indexedEntries_'.($outputIsUnicode? 'foreignToUnicode': 'unicodeToForeign').'_'.($#$sourceCodeOfOneDirectionKeyedTables32OfIndexedTables32_indexedEntries+2); + my $sourceCodeOfNextOneDirectionKeyedTables32OfIndexedTables32_indexedEntries=[]; + push(@$sourceCodeOfNextOneDirectionKeyedTables32OfIndexedTables32_indexedEntries, "LOCAL_D const TUint32 $nameOfNextOneDirectionKeyedTables32OfIndexedTables32_indexedEntries\[\]=\n\t\{\n"); + my $characterCode; + my $lastInputCharacterCodeInIndexedTable=$characterCodeRange->[1]; + for ($characterCode=$characterCodeRange->[0]; $characterCode<=$lastInputCharacterCodeInIndexedTable; ++$characterCode) + { + if (!defined($characterCodes->{$characterCode})) + { + die("Error: internal error 8\n"); + } + push(@$sourceCodeOfNextOneDirectionKeyedTables32OfIndexedTables32_indexedEntries, "\t".sprintf($formatForOutputCharacters, $characterCodes->{$characterCode})); + if ($characterCode<$lastInputCharacterCodeInIndexedTable) + { + push(@$sourceCodeOfNextOneDirectionKeyedTables32OfIndexedTables32_indexedEntries, ','); + } + push(@$sourceCodeOfNextOneDirectionKeyedTables32OfIndexedTables32_indexedEntries, "\n"); + delete $characterCodes->{$characterCode}; + } + push(@$sourceCodeOfNextOneDirectionKeyedTables32OfIndexedTables32_indexedEntries, "\t\};\n\n"); + push(@$sourceCodeOfOneDirectionKeyedTables32OfIndexedTables32_indexedEntries, $sourceCodeOfNextOneDirectionKeyedTables32OfIndexedTables32_indexedEntries); + push(@$sourceCodeOfNextOneDirectionKeyedTables32OfIndexedTables32_keyedEntries, "\t\t\{\n\t\t".sprintf($formatForInputCharacters, $characterCodeRange->[0]).",\n\t\t".sprintf($formatForInputCharacters, $characterCodeRange->[1]).",\n\t\t$nameOfNextOneDirectionKeyedTables32OfIndexedTables32_indexedEntries\n\t\t\}"); + if ($characterCodeRange->[1]<$characterCodeRanges[-1]->[1]) + { + push(@$sourceCodeOfNextOneDirectionKeyedTables32OfIndexedTables32_keyedEntries, ','); + } + push(@$sourceCodeOfNextOneDirectionKeyedTables32OfIndexedTables32_keyedEntries, "\n"); + } + push(@$sourceCodeOfNextOneDirectionKeyedTables32OfIndexedTables32_keyedEntries, "\t\};\n\n"); + push(@$sourceCodeOfOneDirectionKeyedTables32OfIndexedTables32_keyedEntries, $sourceCodeOfNextOneDirectionKeyedTables32OfIndexedTables32_keyedEntries); + $word1="UData_S$algorithmAsText($nameOfNextOneDirectionKeyedTables32OfIndexedTables32_keyedEntries)"; + } + else + { + die("Error: internal error 9\n"); + } + push(@$sourceCodeOfTopLevelStructures, "\t\t\{\n\t\t".sprintf($formatForInputCharacters, $firstInputCharacterCodeInRange).",\n\t\t".sprintf($formatForInputCharacters, $lastInputCharacterCodeInRange).",\n\t\tSCnvConversionData::SOneDirectionData::SRange::E$algorithmAsText,\n\t\t".$sizeOfOutputCharacterCodeInBytesIfForeign.",\n\t\t0,\n\t\t\t\{\n\t\t\t".$word1."\n\t\t\t\}\n\t\t\}"); + if ($i<$indexOfLastRange) + { + push(@$sourceCodeOfTopLevelStructures, ','); + } + push(@$sourceCodeOfTopLevelStructures, "\n"); + } + my @characterCodes=sort({$a<=>$b} keys(%$characterCodes)); + if (@characterCodes>0) + { + die('The following '.($outputIsUnicode? 'foreign': 'Unicode').' characters have no conversion algorithm specified: ['.join(', ', map(sprintf($formatForInputCharacters, $_), @characterCodes))."\]\n"); + } + push(@$sourceCodeOfTopLevelStructures, "\t\};\n\n"); + } + +sub writeSourceCodeFinalStuff + { + my $fileHandle=shift; + my $sourceCodeOfForeignToUnicodeIndexedTables16=shift; + my $sourceCodeOfForeignToUnicodeKeyedTables1616=shift; + my $sourceCodeOfForeignToUnicodeKeyedTables16OfIndexedTables16_indexedEntries=shift; + my $sourceCodeOfForeignToUnicodeKeyedTables16OfIndexedTables16_keyedEntries=shift; + my $sourceCodeOfUnicodeToForeignIndexedTables16=shift; + my $sourceCodeOfUnicodeToForeignKeyedTables1616=shift; + my $sourceCodeOfUnicodeToForeignKeyedTables16OfIndexedTables16_indexedEntries=shift; + my $sourceCodeOfUnicodeToForeignKeyedTables16OfIndexedTables16_keyedEntries=shift; + + my $sourceCodeOfForeignToUnicodeIndexedTables32=shift; + my $sourceCodeOfForeignToUnicodeKeyedTables3232=shift; + my $sourceCodeOfForeignToUnicodeKeyedTables32OfIndexedTables32_indexedEntries=shift; + my $sourceCodeOfForeignToUnicodeKeyedTables32OfIndexedTables32_keyedEntries=shift; + my $sourceCodeOfUnicodeToForeignIndexedTables32=shift; + my $sourceCodeOfUnicodeToForeignKeyedTables3232=shift; + my $sourceCodeOfUnicodeToForeignKeyedTables32OfIndexedTables32_indexedEntries=shift; + my $sourceCodeOfUnicodeToForeignKeyedTables32OfIndexedTables32_keyedEntries=shift; + + my $sourceCodeOfTopLevelStructures=shift; + my $endiannessAsText=shift; + my $additionalSubsetTables=shift; + my $sourceCodeChunk; + my $arrayOfSourceCodeChunks; + + foreach $arrayOfSourceCodeChunks (@$sourceCodeOfForeignToUnicodeIndexedTables16) + { + foreach $sourceCodeChunk (@$arrayOfSourceCodeChunks) + { + print($fileHandle $sourceCodeChunk); + } + } + foreach $arrayOfSourceCodeChunks (@$sourceCodeOfForeignToUnicodeKeyedTables1616) + { + foreach $sourceCodeChunk (@$arrayOfSourceCodeChunks) + { + print($fileHandle $sourceCodeChunk); + } + } + foreach $arrayOfSourceCodeChunks (@$sourceCodeOfForeignToUnicodeKeyedTables16OfIndexedTables16_indexedEntries) + { + foreach $sourceCodeChunk (@$arrayOfSourceCodeChunks) + { + print($fileHandle $sourceCodeChunk); + } + } + foreach $arrayOfSourceCodeChunks (@$sourceCodeOfForeignToUnicodeKeyedTables16OfIndexedTables16_keyedEntries) + { + foreach $sourceCodeChunk (@$arrayOfSourceCodeChunks) + { + print($fileHandle $sourceCodeChunk); + } + } + foreach $arrayOfSourceCodeChunks (@$sourceCodeOfUnicodeToForeignIndexedTables16) + { + foreach $sourceCodeChunk (@$arrayOfSourceCodeChunks) + { + print($fileHandle $sourceCodeChunk); + } + } + foreach $arrayOfSourceCodeChunks (@$sourceCodeOfUnicodeToForeignKeyedTables1616) + { + foreach $sourceCodeChunk (@$arrayOfSourceCodeChunks) + { + print($fileHandle $sourceCodeChunk); + } + } + foreach $arrayOfSourceCodeChunks (@$sourceCodeOfUnicodeToForeignKeyedTables16OfIndexedTables16_indexedEntries) + { + foreach $sourceCodeChunk (@$arrayOfSourceCodeChunks) + { + print($fileHandle $sourceCodeChunk); + } + } + foreach $arrayOfSourceCodeChunks (@$sourceCodeOfUnicodeToForeignKeyedTables16OfIndexedTables16_keyedEntries) + { + foreach $sourceCodeChunk (@$arrayOfSourceCodeChunks) + { + print($fileHandle $sourceCodeChunk); + } + } + # for 32 bit encoding begin + foreach $arrayOfSourceCodeChunks (@$sourceCodeOfForeignToUnicodeIndexedTables32) + { + foreach $sourceCodeChunk (@$arrayOfSourceCodeChunks) + { + print($fileHandle $sourceCodeChunk); + } + } + foreach $arrayOfSourceCodeChunks (@$sourceCodeOfForeignToUnicodeKeyedTables3232) + { + foreach $sourceCodeChunk (@$arrayOfSourceCodeChunks) + { + print($fileHandle $sourceCodeChunk); + } + } + foreach $arrayOfSourceCodeChunks (@$sourceCodeOfForeignToUnicodeKeyedTables32OfIndexedTables32_indexedEntries) + { + foreach $sourceCodeChunk (@$arrayOfSourceCodeChunks) + { + print($fileHandle $sourceCodeChunk); + } + } + foreach $arrayOfSourceCodeChunks (@$sourceCodeOfForeignToUnicodeKeyedTables32OfIndexedTables32_keyedEntries) + { + foreach $sourceCodeChunk (@$arrayOfSourceCodeChunks) + { + print($fileHandle $sourceCodeChunk); + } + } + foreach $arrayOfSourceCodeChunks (@$sourceCodeOfUnicodeToForeignIndexedTables32) + { + foreach $sourceCodeChunk (@$arrayOfSourceCodeChunks) + { + print($fileHandle $sourceCodeChunk); + } + } + foreach $arrayOfSourceCodeChunks (@$sourceCodeOfUnicodeToForeignKeyedTables3232) + { + foreach $sourceCodeChunk (@$arrayOfSourceCodeChunks) + { + print($fileHandle $sourceCodeChunk); + } + } + foreach $arrayOfSourceCodeChunks (@$sourceCodeOfUnicodeToForeignKeyedTables32OfIndexedTables32_indexedEntries) + { + foreach $sourceCodeChunk (@$arrayOfSourceCodeChunks) + { + print($fileHandle $sourceCodeChunk); + } + } + foreach $arrayOfSourceCodeChunks (@$sourceCodeOfUnicodeToForeignKeyedTables32OfIndexedTables32_keyedEntries) + { + foreach $sourceCodeChunk (@$arrayOfSourceCodeChunks) + { + print($fileHandle $sourceCodeChunk); + } + } + # for 32 bit encoding end + foreach $sourceCodeChunk (@$sourceCodeOfTopLevelStructures) + { + print($fileHandle $sourceCodeChunk); + } + + print($fileHandle "GLDEF_D const SCnvConversionData conversionData=\n\t\{\n\tSCnvConversionData::E$endiannessAsText,\n\t\t\{\n\t\tARRAY_LENGTH(foreignVariableByteDataRanges),\n\t\tforeignVariableByteDataRanges\n\t\t\},\n\t\t\{\n\t\tARRAY_LENGTH(foreignToUnicodeDataRanges),\n\t\tforeignToUnicodeDataRanges\n\t\t\},\n\t\t\{\n\t\tARRAY_LENGTH(unicodeToForeignDataRanges),\n\t\tunicodeToForeignDataRanges\n\t\t\},\n\tNULL,\n\tNULL\n\t\};\n\n"); + + my $additionalSubsetTableName; + my $additionalSubsetTableData; + while (($additionalSubsetTableName, $additionalSubsetTableData)=each(%$additionalSubsetTables)) + { + (defined($additionalSubsetTableData->[0]) && defined($additionalSubsetTableData->[1]) && defined($additionalSubsetTableData->[2]) && defined($additionalSubsetTableData->[3])) or die("Error: incomplete definition of \"$additionalSubsetTableName\"\n"); + print($fileHandle "GLREF_D const SCnvConversionData $additionalSubsetTableName;\n"); + print($fileHandle "GLDEF_D const SCnvConversionData $additionalSubsetTableName=\n\t\{\n\tSCnvConversionData::E$endiannessAsText,\n\t\t\{\n\t\tARRAY_LENGTH(foreignVariableByteDataRanges),\n\t\tforeignVariableByteDataRanges\n\t\t\},\n\t\t\{\n\t\t$additionalSubsetTableData->[1]-$additionalSubsetTableData->[0],\n\t\tforeignToUnicodeDataRanges+$additionalSubsetTableData->[0]\n\t\t\},\n\t\t\{\n\t\t$additionalSubsetTableData->[3]-$additionalSubsetTableData->[2],\n\t\tunicodeToForeignDataRanges+$additionalSubsetTableData->[2]\n\t\t\}\n\t\};\n\n"); + } + } + +sub writeBinaryHeader + { + my $fileHandle=shift; + my $uid=shift; + my $endianness=shift; + my $replacementForUnconvertibleUnicodeCharacters=shift; + &writeUids($fileHandle, 0x100011bd, $uid, 0); + &write32($fileHandle, 1); # version number of the file format + &write32($fileHandle, 0); # not currently used + &write8($fileHandle, 0); # number of Unicode characters in the name (which is now derived from the file-name, hence why this is zero) + &write8($fileHandle, $endianness); + &write8($fileHandle, length($replacementForUnconvertibleUnicodeCharacters)); + &writeString($fileHandle, $replacementForUnconvertibleUnicodeCharacters); + } + +sub writeBinaryForeignVariableByteData + { + my $fileHandle=shift; + my $foreignVariableByteData=shift; + &writePositiveIntegerCompacted30($fileHandle, scalar(@$foreignVariableByteData)); + my $rangeData; + foreach $rangeData (@$foreignVariableByteData) + { + if (@$rangeData!=3) + { + die("Error: internal error 10\n"); + } + my $firstInitialByteValueInRange=$rangeData->[0]; + my $lastInitialByteValueInRange=$rangeData->[1]; + if ($lastInitialByteValueInRange<$firstInitialByteValueInRange) + { + die("Error: internal error 11\n"); + } + &write8($fileHandle, $firstInitialByteValueInRange); + &write8($fileHandle, $lastInitialByteValueInRange); + &write8($fileHandle, $rangeData->[2]); # numberOfSubsequentBytes + } + } + +sub writeBinaryOneDirectionData + { + my $fileHandle=shift; + my $oneDirectionData=shift; + my $characterCodes=shift; + my $outputIsUnicode=shift; + &writePositiveIntegerCompacted30($fileHandle, scalar(@$oneDirectionData)); + my $rangeData; + foreach $rangeData (@$oneDirectionData) + { +## $rangeData is $includePriority, $searchPriority, $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange, $algorithm[, $sizeOfOutputCharacterCodeInBytes], $parameters + if (scalar(@$rangeData)!=($outputIsUnicode? 6: 7)) + { + die("Error: internal error 12\n"); + } + my $firstInputCharacterCodeInRange=$rangeData->[2]; + my $lastInputCharacterCodeInRange=$rangeData->[3]; + if ($lastInputCharacterCodeInRange<$firstInputCharacterCodeInRange) + { + die("Error: internal error 13\n"); + } + &writePositiveIntegerCompacted30($fileHandle, $firstInputCharacterCodeInRange); + &writePositiveIntegerCompacted30($fileHandle, $lastInputCharacterCodeInRange); + my $algorithm=$rangeData->[4]; + &write8($fileHandle, $algorithm); + if (!$outputIsUnicode) + { + &write8($fileHandle, $rangeData->[5]); # sizeOfOutputCharacterCodeInBytesIfForeign + } + if ($algorithm==0) # Direct + { + my $characterCode; + for ($characterCode=$firstInputCharacterCodeInRange; $characterCode<=$lastInputCharacterCodeInRange; ++$characterCode) + { + if (!defined($characterCodes->{$characterCode})) + { + die("Error: There is no conversion defined for ".($outputIsUnicode? 'foreign': 'Unicode')." character code ".sprintf("0x%08x", $characterCode)."\n"); + } + if ($characterCodes->{$characterCode}!=$characterCode) + { + die("Error: the conversion from ".($outputIsUnicode? 'foreign': 'Unicode')." character code ".sprintf("0x%08x", $characterCode)." to ".($outputIsUnicode? 'Unicode': 'foreign')." character code ".sprintf("0x%08x", $characterCodes->{$characterCode})." is not a direct conversion\n"); + } + delete $characterCodes->{$characterCode}; + } + } + elsif ($algorithm==1) # Offset + { + my $offset=$characterCodes->{$firstInputCharacterCodeInRange}-$firstInputCharacterCodeInRange; + delete $characterCodes->{$firstInputCharacterCodeInRange}; + my $characterCode; + for ($characterCode=$firstInputCharacterCodeInRange+1; $characterCode<=$lastInputCharacterCodeInRange; ++$characterCode) + { + if (!defined($characterCodes->{$characterCode})) + { + die("Error: There is no conversion defined for ".($outputIsUnicode? 'foreign': 'Unicode')." character code ".sprintf("0x%08x (0x%08x-0x%08x)", $characterCode, $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange)."\n"); + } + if ($characterCodes->{$characterCode}-$characterCode!=$offset) + { + die("Error: the conversion from ".($outputIsUnicode? 'foreign': 'Unicode')." character code ".sprintf("0x%08x", $characterCode)." to ".($outputIsUnicode? 'Unicode': 'foreign')." character code ".sprintf("0x%08x", $characterCodes->{$characterCode})." has a different offset from the previous one in the range\n"); + } + delete $characterCodes->{$characterCode}; + } + &writeSignedIntegerCompacted29($fileHandle, $offset); + } + elsif ($algorithm==2) # IndexedTable16 + { + my $characterCode; + for ($characterCode=$firstInputCharacterCodeInRange; $characterCode<=$lastInputCharacterCodeInRange; ++$characterCode) + { + if (!defined($characterCodes->{$characterCode})) + { + die("Error: There is no conversion defined for ".($outputIsUnicode? 'foreign': 'Unicode')." character code ".sprintf("0x%08x", $characterCode)."\n"); + } + &write16($fileHandle, $characterCodes->{$characterCode}); + delete $characterCodes->{$characterCode}; + } + } + elsif ($algorithm==3) # KeyedTable1616 + { + my $characterCode; + my @table=(); + for ($characterCode=$firstInputCharacterCodeInRange; $characterCode<=$lastInputCharacterCodeInRange; ++$characterCode) + { + if (defined($characterCodes->{$characterCode})) + { + push(@table, [$characterCode, $characterCodes->{$characterCode}]); + delete $characterCodes->{$characterCode}; + } + } + my $firstIteration=1; + my $lastKey; + &writePositiveIntegerCompacted30($fileHandle, scalar(@table)); + if ($table[0][0]!=$firstInputCharacterCodeInRange) + { + die("Error: no conversion is specified for the first ".($outputIsUnicode? 'foreign': 'Unicode')." character code in the KeyedTable1616 range ".sprintf("0x%08x to 0x%08x", $firstInputCharacterCodeInRange, $lastInputCharacterCodeInRange)."\n"); + } + my $pair; + foreach $pair (@table) + { + my $key=$pair->[0]; + if ($firstIteration) + { + $firstIteration=0; + } + else + { + if ($key<=$lastKey) + { + die("Error: internal error 14\n"); + } + &writePositiveIntegerCompacted15($fileHandle, $key-$lastKey); + } + &write16($fileHandle, $pair->[1]); + $lastKey=$key; + } + } + elsif ($algorithm==4) # KeyedTable16OfIndexedTables16 + { + die("Error: \"KeyedTable16OfIndexedTables16\" is only supported if generating source code\n"); + } + else + { + die("Error: internal error 15\n"); + } + } + my @characterCodes=sort({$a<=>$b} keys(%$characterCodes)); + if (@characterCodes>0) + { + die('The following '.($outputIsUnicode? 'foreign': 'Unicode').'characters have no conversion algorithm specified: ['.join(', ', map({sprintf('0x%x', $_)} @characterCodes))."\]\n"); + } + } +