|
1 # |
|
2 # Copyright (c) 1997-2000 Nokia Corporation and/or its subsidiary(-ies). |
|
3 # All rights reserved. |
|
4 # This component and the accompanying materials are made available |
|
5 # under the terms of "Eclipse Public License v1.0" |
|
6 # which accompanies this distribution, and is available |
|
7 # at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 # |
|
9 # Initial Contributors: |
|
10 # Nokia Corporation - initial contribution. |
|
11 # |
|
12 # Contributors: |
|
13 # |
|
14 # Description: |
|
15 # |
|
16 |
|
17 use strict; |
|
18 use integer; |
|
19 |
|
20 BEGIN |
|
21 { |
|
22 my $perlScriptPath=$0; |
|
23 $perlScriptPath=~s/\//\\/g; # replace any forward-slashes with back-slashes |
|
24 $perlScriptPath=~s/\\?[^\\]+$//; # get rid of this Perl-script's file-name |
|
25 if ($perlScriptPath eq '') |
|
26 { |
|
27 $perlScriptPath='..\group'; |
|
28 } |
|
29 else |
|
30 { |
|
31 $perlScriptPath=~s/(\\?)[^\\]+$/$1group/; |
|
32 } |
|
33 unshift(@INC, $perlScriptPath); # can't do "use lib $perlScriptPath" here as "use lib" only seems to work with *hard-coded* directory names |
|
34 } |
|
35 use PARSER; |
|
36 |
|
37 if ((@ARGV==0) || ($ARGV[0]=~/\?/i) || ($ARGV[0]=~/-h/i) || ($ARGV[0]=~/\/h/i) || ($ARGV[0]=~/help/i)) |
|
38 { |
|
39 die("\nVersion 021\n\nCharacter-set conversion-table generating tool\nCopyright (c) 2000 Symbian Ltd\n\nUsage:\n\n\tperl ANALYSE.PL <source-file> <output-file> foreign|Unicode [options]\n\nwhere the following options are available (each has a short form and a long form which are shown below separated by a '|'):\n\n\t-c | -columns(<a>: <b>, <c>)\n\t-p | -cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed\n\t-u | -sourceFilesToSubtract(<a>, <b>, ...)\n\n"); |
|
40 } |
|
41 my @columns=(2, 1, 2); |
|
42 my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=0; |
|
43 my @sourceFilesToSubtract=(); |
|
44 &extractCommandLineFlags(\@columns, \$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, \@sourceFilesToSubtract); |
|
45 my $sourceFileName=shift; |
|
46 my $outputFileName=shift; |
|
47 my $columnToSortOn=shift; |
|
48 my %characterCodesOfOtherColumn=(); |
|
49 my %linesSorted=(); |
|
50 open(SOURCE_FILE, "< $sourceFileName") or die("Error: could not open \"$sourceFileName\" for reading"); |
|
51 &readSourceFile(\*SOURCE_FILE, $sourceFileName, \%characterCodesOfOtherColumn, \%linesSorted, $columnToSortOn, \@columns, $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, 0); |
|
52 close(SOURCE_FILE) or die("Error: could not close \"$sourceFileName\"\n"); |
|
53 my $sourceFileToSubtract; |
|
54 foreach $sourceFileToSubtract (@sourceFilesToSubtract) |
|
55 { |
|
56 open(SOURCE_FILE_TO_SUBTRACT, "< $sourceFileToSubtract") or die("Error: could not open \"$sourceFileToSubtract\" for reading\n"); |
|
57 &readSourceFile(\*SOURCE_FILE_TO_SUBTRACT, $sourceFileToSubtract, \%characterCodesOfOtherColumn, \%linesSorted, $columnToSortOn, \@columns, $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed, 1); |
|
58 close(SOURCE_FILE_TO_SUBTRACT) or die("Error: could not close \"$sourceFileToSubtract\"\n"); |
|
59 } |
|
60 open(OUTPUT_FILE, "> $outputFileName") or die("Error: could not open \"$outputFileName\" for writing"); |
|
61 my $numberOfBreaks=0; |
|
62 my $numberOfMissingSpaces=0; |
|
63 my $numberOfLinesSorted=0; |
|
64 my $previousKey=""; |
|
65 my $offset=0; |
|
66 my $key; |
|
67 foreach $key (sort {$a<=>$b} (keys(%linesSorted))) |
|
68 { |
|
69 if ($previousKey ne "") |
|
70 { |
|
71 $previousKey<$key or die("Error: there appears to be a mix up with the keys \"$previousKey\" and \"$key\""); |
|
72 if ($previousKey!=$key-1) |
|
73 { |
|
74 ++$numberOfBreaks; |
|
75 $numberOfMissingSpaces+=$key-$previousKey; |
|
76 print(OUTPUT_FILE "# End of contiguous block - relationship between the columns in this block: ".((!defined $offset)? "RANDOM": ($offset==0)? "DIRECT": "OFFSET ($offset)")."\n\n"); |
|
77 } |
|
78 } |
|
79 if (($previousKey eq "") || ($previousKey!=$key-1)) |
|
80 { |
|
81 $offset=$characterCodesOfOtherColumn{$key}-$key; |
|
82 } |
|
83 elsif ((defined $offset) && ($offset!=$characterCodesOfOtherColumn{$key}-$key)) |
|
84 { |
|
85 undef $offset; |
|
86 } |
|
87 print(OUTPUT_FILE "$linesSorted{$key}"); |
|
88 ++$numberOfLinesSorted; |
|
89 $previousKey=$key; |
|
90 } |
|
91 print(OUTPUT_FILE "# End of contiguous block - relationship between the columns in this block: ".((!defined $offset)? "RANDOM": ($offset==0)? "DIRECT": "OFFSET ($offset)")."\n\n"); |
|
92 close(OUTPUT_FILE); |
|
93 my $maximumNumberOfIterationsWhenBinarySearching=1; |
|
94 while (($numberOfLinesSorted>>$maximumNumberOfIterationsWhenBinarySearching)>0) |
|
95 { |
|
96 ++$maximumNumberOfIterationsWhenBinarySearching; |
|
97 } |
|
98 print("The number of breaks was $numberOfBreaks\nThe number of missing spaces was $numberOfMissingSpaces\nThe number of lines sorted was $numberOfLinesSorted\nThe maximum number of iterations when binary searching would be $maximumNumberOfIterationsWhenBinarySearching"); |
|
99 |
|
100 sub extractCommandLineFlags() |
|
101 { |
|
102 my $columns=shift; |
|
103 my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=shift; |
|
104 my $sourceFilesToSubtract=shift; |
|
105 my $i; |
|
106 for ($i=0; $i<=$#ARGV;) # (i) not cache-ing $#ARGV into a variable as @ARGV may change length in this loop (ii) iterate forwards as some parameters may occupy more than one element in @ARGV |
|
107 { |
|
108 if (($ARGV[$i]=~/^-c\b(.*)$/i) || ($ARGV[$i]=~/^-columns\b(.*)$/i)) |
|
109 { |
|
110 my $columnsData=$1; |
|
111 splice(@ARGV, $i, 1); |
|
112 for (;;) |
|
113 { |
|
114 if ($columnsData=~/^\s*\(\s*(\d+)\s*:\s*(\d+)\s*,\s*(\d+)\s*\)\s*$/) |
|
115 { |
|
116 @$columns=($1, $2, $3); |
|
117 last; |
|
118 } |
|
119 ($#ARGV>=$i) or die("Error: bad \"-columns\" format\n"); |
|
120 $columnsData.=(splice(@ARGV, $i, 1))[0]; |
|
121 } |
|
122 } |
|
123 elsif (($ARGV[$i]=~/^-p$/i) || ($ARGV[$i]=~/^-cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed$/i)) |
|
124 { |
|
125 splice(@ARGV, $i, 1); |
|
126 $$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=1; |
|
127 } |
|
128 elsif (($ARGV[$i]=~/^-u\b(.*)$/i) || ($ARGV[$i]=~/^-sourceFilesToSubtract\b(.*)$/i)) |
|
129 { |
|
130 my $sourceFilesData=$1; |
|
131 splice(@ARGV, $i, 1); |
|
132 for (;;) |
|
133 { |
|
134 if ($sourceFilesData=~/^\s*\(\s*(.+)\)\s*$/) |
|
135 { |
|
136 my $sourceFilesData=$1; |
|
137 @$sourceFilesToSubtract=split(/,/, $sourceFilesData, -1); |
|
138 my $j; |
|
139 for ($j=$#$sourceFilesToSubtract; $j>=0; --$j) |
|
140 { |
|
141 $sourceFilesToSubtract->[$j]=~s/^\s+//; |
|
142 $sourceFilesToSubtract->[$j]=~s/\s+$//; |
|
143 ($sourceFilesToSubtract->[$j] ne '') or die("Error: bad \"-sourceFilesToSubtract\" format (1)\n"); |
|
144 } |
|
145 last; |
|
146 } |
|
147 ($#ARGV>=$i) or die("Error: bad \"-sourceFilesToSubtract\" format (2)\n"); |
|
148 $sourceFilesData.=(splice(@ARGV, $i, 1))[0]; |
|
149 } |
|
150 } |
|
151 else |
|
152 { |
|
153 ++$i; |
|
154 } |
|
155 } |
|
156 } |
|
157 |
|
158 sub readSourceFile |
|
159 { |
|
160 my $fileHandle=shift; |
|
161 my $fileName=shift; |
|
162 my $characterCodesOfOtherColumn=shift; |
|
163 my $linesSorted=shift; |
|
164 my $columnToSortOn=shift; |
|
165 my $columns=shift; |
|
166 my $cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed=shift; |
|
167 my $subtract=shift; |
|
168 my $foreignCharacterCodeProcessingCode=''; |
|
169 if (!(($columns->[0]>0) && ($columns->[1]>0) && ($columns->[2]>0) && ($columns->[1]<=$columns->[0]) && ($columns->[2]<=$columns->[0]) && ($columns->[1]!=$columns->[2]))) |
|
170 { |
|
171 close($fileHandle); |
|
172 die("Error: bad \"-columns\" data\n"); |
|
173 } |
|
174 my $patternOfLineContainingCharacterCodes=join('\s+', ('0x([0-9a-f]+)') x $columns->[0]); |
|
175 my $line; |
|
176 my $strippedDownLine; |
|
177 for (;;) |
|
178 { |
|
179 ($line, $strippedDownLine)=&nextNonEmptyStrippedDownLine($fileHandle); |
|
180 if ($strippedDownLine eq '') |
|
181 { |
|
182 last; |
|
183 } |
|
184 if ($strippedDownLine=~/^SET_FOREIGN_CHARACTER_CODE_PROCESSING_CODE\s+(.*)$/i) |
|
185 { |
|
186 $foreignCharacterCodeProcessingCode=$1; |
|
187 } |
|
188 elsif ($strippedDownLine=~/^$patternOfLineContainingCharacterCodes$/i) |
|
189 { |
|
190 no strict 'refs'; # so that we can use symbolic references for $1, $2, etc |
|
191 my $foreignCharacterCode=hex(${$columns->[1]}); |
|
192 my $unicodeCharacterCode=hex(${$columns->[2]}); |
|
193 use strict 'refs'; |
|
194 if ($foreignCharacterCodeProcessingCode ne '') |
|
195 { |
|
196 $foreignCharacterCode=eval($foreignCharacterCodeProcessingCode); |
|
197 } |
|
198 my $characterCodeOfColumnToSortOn; |
|
199 my $characterCodeOfOtherColumn; |
|
200 if ($columnToSortOn=~/^foreign$/i) |
|
201 { |
|
202 $characterCodeOfColumnToSortOn=$foreignCharacterCode; |
|
203 $characterCodeOfOtherColumn=$unicodeCharacterCode; |
|
204 } |
|
205 elsif ($columnToSortOn=~/^Unicode$/i) |
|
206 { |
|
207 $characterCodeOfColumnToSortOn=$unicodeCharacterCode; |
|
208 $characterCodeOfOtherColumn=$foreignCharacterCode; |
|
209 } |
|
210 else |
|
211 { |
|
212 die("Error: bad parameter \"$columnToSortOn\""); |
|
213 } |
|
214 if ((!$cutOutAnyPrivateUseUnicodeCharacterSlotsBeingUsed) || !((($unicodeCharacterCode>=0xe000) && ($unicodeCharacterCode<=0xf8ff)) || (($unicodeCharacterCode>=0xf0000) && ($unicodeCharacterCode<=0x10ffff)))) |
|
215 { |
|
216 if ($subtract) |
|
217 { |
|
218 $linesSorted->{$characterCodeOfColumnToSortOn}='### '.$linesSorted->{$characterCodeOfColumnToSortOn}; |
|
219 if ($characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}!=$characterCodeOfOtherColumn) |
|
220 { |
|
221 printf(STDERR "Warning: 0x%x maps to 0x%x in the main source file, but to 0x%x in a source file to be extracted\n", $characterCodeOfColumnToSortOn, $characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}, $characterCodeOfOtherColumn); |
|
222 } |
|
223 } |
|
224 else |
|
225 { |
|
226 $linesSorted->{$characterCodeOfColumnToSortOn}=$line; |
|
227 $characterCodesOfOtherColumn->{$characterCodeOfColumnToSortOn}=$characterCodeOfOtherColumn; |
|
228 } |
|
229 } |
|
230 } |
|
231 elsif ($line!~/^\s*0x([0-9a-f]+)\s*#\s*undefined.*$/i) |
|
232 { |
|
233 close($fileHandle); |
|
234 die("Error: unexpected line in \"$fileName\":\n $line\n"); |
|
235 } |
|
236 } |
|
237 } |
|
238 |