charconvfw/Charconv/ongoing/data/ANALYSE.PL
changeset 0 1fb32624e06b
child 16 56cd22a7a1cb
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 #
       
     2 # Copyright (c) 1997-2000 Nokia Corporation and/or its subsidiary(-ies).
       
     3 # All rights reserved.
       
     4 # This component and the accompanying materials are made available
       
     5 # under the terms of "Eclipse Public License v1.0"
       
     6 # which accompanies this distribution, and is available
       
     7 # at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 #
       
     9 # Initial Contributors:
       
    10 # Nokia Corporation - initial contribution.
       
    11 #
       
    12 # Contributors:
       
    13 #
       
    14 # Description:      
       
    15 #
       
    16 
       
    17 use strict;
       
    18 use integer;
       
    19 
       
    20 BEGIN
       
    21 	{
       
    22 	my $perlScriptPath=$0;
       
    23 	$perlScriptPath=~s/\//\\/g; # replace any forward-slashes with back-slashes
       
    24 	$perlScriptPath=~s/\\?[^\\]+$//; # get rid of this Perl-script's file-name
       
    25 	if ($perlScriptPath eq '')
       
    26 		{
       
    27 		$perlScriptPath='..\group';
       
    28 		}
       
    29 	else
       
    30 		{
       
    31 		$perlScriptPath=~s/(\\?)[^\\]+$/$1group/;
       
    32 		}
       
    33 	unshift(@INC, $perlScriptPath); # can't do "use lib $perlScriptPath" here as "use lib" only seems to work with *hard-coded* directory names
       
    34 	}
       
    35 use PARSER;
       
    36 
       
    37 if ((@ARGV==0) || ($ARGV[0]=~/\?/i) || ($ARGV[0]=~/-h/i) || ($ARGV[0]=~/\/h/i) || ($ARGV[0]=~/help/i))
       
    38 	{
       
    39 	die("\nVersion 021\n\nCharacter-set conversion-table generating tool\nCopyright (c) 2000 Symbian Ltd\n\nUsage:\n\n\tperl ANALYSE.PL <source-file> <output-file> foreign|Unicode [options]\n\nwhere the following options are available (each has a short form and a long form which are shown below separated by a '|'):\n\n\t-c | -columns(<a>: <b>, <c>)\n\t-p | -cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed\n\t-u | -sourceFilesToSubtract(<a>, <b>, ...)\n\n");
       
    40 	}
       
    41 my @columns=(2, 1, 2);
       
    42 my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=0;
       
    43 my @sourceFilesToSubtract=();
       
    44 &extractCommandLineFlags(\@columns, \$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, \@sourceFilesToSubtract);
       
    45 my $sourceFileName=shift;
       
    46 my $outputFileName=shift;
       
    47 my $columnToSortOn=shift;
       
    48 my %characterCodesOfOtherColumn=();
       
    49 my %linesSorted=();
       
    50 open(SOURCE_FILE, "< $sourceFileName") or die("Error: could not open \"$sourceFileName\" for reading");
       
    51 &readSourceFile(\*SOURCE_FILE, $sourceFileName, \%characterCodesOfOtherColumn, \%linesSorted, $columnToSortOn, \@columns, $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, 0);
       
    52 close(SOURCE_FILE) or die("Error: could not close \"$sourceFileName\"\n");
       
    53 my $sourceFileToSubtract;
       
    54 foreach $sourceFileToSubtract (@sourceFilesToSubtract)
       
    55 	{
       
    56 	open(SOURCE_FILE_TO_SUBTRACT, "< $sourceFileToSubtract") or die("Error: could not open \"$sourceFileToSubtract\" for reading\n");
       
    57 	&readSourceFile(\*SOURCE_FILE_TO_SUBTRACT, $sourceFileToSubtract, \%characterCodesOfOtherColumn, \%linesSorted, $columnToSortOn, \@columns, $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, 1);
       
    58 	close(SOURCE_FILE_TO_SUBTRACT) or die("Error: could not close \"$sourceFileToSubtract\"\n");
       
    59 	}
       
    60 open(OUTPUT_FILE, "> $outputFileName") or die("Error: could not open \"$outputFileName\" for writing");
       
    61 my $numberOfBreaks=0;
       
    62 my $numberOfMissingSpaces=0;
       
    63 my $numberOfLinesSorted=0;
       
    64 my $previousKey="";
       
    65 my $offset=0;
       
    66 my $key;
       
    67 foreach $key (sort {$a<=>$b} (keys(%linesSorted)))
       
    68 	{
       
    69 	if ($previousKey ne "")
       
    70 		{
       
    71 		$previousKey<$key or die("Error: there appears to be a mix up with the keys \"$previousKey\" and \"$key\"");
       
    72 		if ($previousKey!=$key-1)
       
    73 			{
       
    74 			++$numberOfBreaks;
       
    75 			$numberOfMissingSpaces+=$key-$previousKey;
       
    76 			print(OUTPUT_FILE "# End of contiguous block - relationship between the columns in this block: ".((!defined $offset)? "RANDOM": ($offset==0)? "DIRECT": "OFFSET ($offset)")."\n\n");
       
    77 			}
       
    78 		}
       
    79 	if (($previousKey eq "") || ($previousKey!=$key-1))
       
    80 		{
       
    81 		$offset=$characterCodesOfOtherColumn{$key}-$key;
       
    82 		}
       
    83 	elsif ((defined $offset) && ($offset!=$characterCodesOfOtherColumn{$key}-$key))
       
    84 		{
       
    85 		undef $offset;
       
    86 		}
       
    87 	print(OUTPUT_FILE "$linesSorted{$key}");
       
    88 	++$numberOfLinesSorted;
       
    89 	$previousKey=$key;
       
    90 	}
       
    91 print(OUTPUT_FILE "# End of contiguous block - relationship between the columns in this block: ".((!defined $offset)? "RANDOM": ($offset==0)? "DIRECT": "OFFSET ($offset)")."\n\n");
       
    92 close(OUTPUT_FILE);
       
    93 my $maximumNumberOfIterationsWhenBinarySearching=1;
       
    94 while (($numberOfLinesSorted>>$maximumNumberOfIterationsWhenBinarySearching)>0)
       
    95 	{
       
    96 	++$maximumNumberOfIterationsWhenBinarySearching;
       
    97 	}
       
    98 print("The number of breaks was $numberOfBreaks\nThe number of missing spaces was $numberOfMissingSpaces\nThe number of lines sorted was $numberOfLinesSorted\nThe maximum number of iterations when binary searching would be $maximumNumberOfIterationsWhenBinarySearching");
       
    99 
       
   100 sub extractCommandLineFlags()
       
   101 	{
       
   102 	my $columns=shift;
       
   103 	my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=shift;
       
   104 	my $sourceFilesToSubtract=shift;
       
   105 	my $i;
       
   106 	for ($i=0; $i<=$#ARGV;) # (i) not cache-ing $#ARGV into a variable as @ARGV may change length in this loop (ii) iterate forwards as some parameters may occupy more than one element in @ARGV
       
   107 		{
       
   108 		if (($ARGV[$i]=~/^-c\b(.*)$/i) || ($ARGV[$i]=~/^-columns\b(.*)$/i))
       
   109 			{
       
   110 			my $columnsData=$1;
       
   111 			splice(@ARGV, $i, 1);
       
   112 			for (;;)
       
   113 				{
       
   114 				if ($columnsData=~/^\s*\(\s*(\d+)\s*:\s*(\d+)\s*,\s*(\d+)\s*\)\s*$/)
       
   115 					{
       
   116 					@$columns=($1, $2, $3);
       
   117 					last;
       
   118 					}
       
   119 				($#ARGV>=$i) or die("Error: bad \"-columns\" format\n");
       
   120 				$columnsData.=(splice(@ARGV, $i, 1))[0];
       
   121 				}
       
   122 			}
       
   123 		elsif (($ARGV[$i]=~/^-p$/i) || ($ARGV[$i]=~/^-cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed$/i))
       
   124 			{
       
   125 			splice(@ARGV, $i, 1);
       
   126 			$$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=1;
       
   127 			}
       
   128 		elsif (($ARGV[$i]=~/^-u\b(.*)$/i) || ($ARGV[$i]=~/^-sourceFilesToSubtract\b(.*)$/i))
       
   129 			{
       
   130 			my $sourceFilesData=$1;
       
   131 			splice(@ARGV, $i, 1);
       
   132 			for (;;)
       
   133 				{
       
   134 				if ($sourceFilesData=~/^\s*\(\s*(.+)\)\s*$/)
       
   135 					{
       
   136 					my $sourceFilesData=$1;
       
   137 					@$sourceFilesToSubtract=split(/,/, $sourceFilesData, -1);
       
   138 					my $j;
       
   139 					for ($j=$#$sourceFilesToSubtract; $j>=0; --$j)
       
   140 						{
       
   141 						$sourceFilesToSubtract->[$j]=~s/^\s+//;
       
   142 						$sourceFilesToSubtract->[$j]=~s/\s+$//;
       
   143 						($sourceFilesToSubtract->[$j] ne '') or die("Error: bad \"-sourceFilesToSubtract\" format (1)\n");
       
   144 						}
       
   145 					last;
       
   146 					}
       
   147 				($#ARGV>=$i) or die("Error: bad \"-sourceFilesToSubtract\" format (2)\n");
       
   148 				$sourceFilesData.=(splice(@ARGV, $i, 1))[0];
       
   149 				}
       
   150 			}
       
   151 		else
       
   152 			{
       
   153 			++$i;
       
   154 			}
       
   155 		}
       
   156 	}
       
   157 
       
   158 sub readSourceFile
       
   159 	{
       
   160 	my $fileHandle=shift;
       
   161 	my $fileName=shift;
       
   162 	my $characterCodesOfOtherColumn=shift;
       
   163 	my $linesSorted=shift;
       
   164 	my $columnToSortOn=shift;
       
   165 	my $columns=shift;
       
   166 	my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=shift;
       
   167 	my $subtract=shift;
       
   168 	my $foreignCharacterCodeProcessingCode='';
       
   169 	if (!(($columns->[0]>0) && ($columns->[1]>0) && ($columns->[2]>0) && ($columns->[1]<=$columns->[0]) && ($columns->[2]<=$columns->[0]) && ($columns->[1]!=$columns->[2])))
       
   170 		{
       
   171 		close($fileHandle);
       
   172 		die("Error: bad \"-columns\" data\n");
       
   173 		}
       
   174 	my $patternOfLineContainingCharacterCodes=join('\s+', ('0x([0-9a-f]+)') x $columns->[0]);
       
   175 	my $line;
       
   176 	my $strippedDownLine;
       
   177 	for (;;)
       
   178 		{
       
   179 		($line, $strippedDownLine)=&nextNonEmptyStrippedDownLine($fileHandle);
       
   180 		if ($strippedDownLine eq '')
       
   181 			{
       
   182 			last;
       
   183 			}
       
   184 		if ($strippedDownLine=~/^SET_FOREIGN_CHARACTER_CODE_PROCESSING_CODE\s+(.*)$/i)
       
   185 			{
       
   186 			$foreignCharacterCodeProcessingCode=$1;
       
   187 			}
       
   188 		elsif ($strippedDownLine=~/^$patternOfLineContainingCharacterCodes$/i)
       
   189 			{
       
   190 			no strict 'refs'; # so that we can use symbolic references for $1, $2, etc
       
   191 			my $foreignCharacterCode=hex(${$columns->[1]});
       
   192 			my $unicodeCharacterCode=hex(${$columns->[2]});
       
   193 			use strict 'refs';
       
   194 			if ($foreignCharacterCodeProcessingCode ne '')
       
   195 				{
       
   196 				$foreignCharacterCode=eval($foreignCharacterCodeProcessingCode);
       
   197 				}
       
   198 			my $characterCodeOfColumnToSortOn;
       
   199 			my $characterCodeOfOtherColumn;
       
   200 			if ($columnToSortOn=~/^foreign$/i)
       
   201 				{
       
   202 				$characterCodeOfColumnToSortOn=$foreignCharacterCode;
       
   203 				$characterCodeOfOtherColumn=$unicodeCharacterCode;
       
   204 				}
       
   205 			elsif ($columnToSortOn=~/^Unicode$/i)
       
   206 				{
       
   207 				$characterCodeOfColumnToSortOn=$unicodeCharacterCode;
       
   208 				$characterCodeOfOtherColumn=$foreignCharacterCode;
       
   209 				}
       
   210 			else
       
   211 				{
       
   212 				die("Error: bad parameter \"$columnToSortOn\"");
       
   213 				}
       
   214 			if ((!$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed) || !((($unicodeCharacterCode>=0xe000) && ($unicodeCharacterCode<=0xf8ff)) || (($unicodeCharacterCode>=0xf0000) && ($unicodeCharacterCode<=0x10ffff))))
       
   215 				{
       
   216 				if ($subtract)
       
   217 					{
       
   218 					$linesSorted->{$characterCodeOfColumnToSortOn}='### '.$linesSorted->{$characterCodeOfColumnToSortOn};
       
   219 					if ($characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}!=$characterCodeOfOtherColumn)
       
   220 						{
       
   221 						printf(STDERR "Warning: 0x%x maps to 0x%x in the main source file, but to 0x%x in a source file to be extracted\n", $characterCodeOfColumnToSortOn, $characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}, $characterCodeOfOtherColumn);
       
   222 						}
       
   223 					}
       
   224 				else
       
   225 					{
       
   226 					$linesSorted->{$characterCodeOfColumnToSortOn}=$line;
       
   227 					$characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}=$characterCodeOfOtherColumn;
       
   228 					}
       
   229 				}
       
   230 			}
       
   231 		elsif ($line!~/^\s*0x([0-9a-f]+)\s*#\s*undefined.*$/i)
       
   232 			{
       
   233 			close($fileHandle);
       
   234 			die("Error: unexpected line in \"$fileName\":\n    $line\n");
       
   235 			}
       
   236 		}
       
   237 	}
       
   238