|
1 #!/usr/bin/perl -w |
|
2 |
|
3 # Copyright (C) 2006, 2007, 2009, 2010 Apple Inc. All rights reserved. |
|
4 # |
|
5 # Redistribution and use in source and binary forms, with or without |
|
6 # modification, are permitted provided that the following conditions |
|
7 # are met: |
|
8 # |
|
9 # 1. Redistributions of source code must retain the above copyright |
|
10 # notice, this list of conditions and the following disclaimer. |
|
11 # 2. Redistributions in binary form must reproduce the above copyright |
|
12 # notice, this list of conditions and the following disclaimer in the |
|
13 # documentation and/or other materials provided with the distribution. |
|
14 # 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of |
|
15 # its contributors may be used to endorse or promote products derived |
|
16 # from this software without specific prior written permission. |
|
17 # |
|
18 # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY |
|
19 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
|
20 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|
21 # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY |
|
22 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
|
23 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
|
24 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|
25 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
|
27 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
28 |
|
29 # This script is like the genstrings tool (minus most of the options) with these differences. |
|
30 # |
|
31 # 1) It uses the names UI_STRING and UI_STRING_WITH_KEY for the macros, rather than the macros |
|
32 # from NSBundle.h, and doesn't support tables (although they would be easy to add). |
|
33 # 2) It supports UTF-8 in key strings (and hence uses "" strings rather than @"" strings; |
|
34 # @"" strings only reliably support ASCII since they are decoded based on the system encoding |
|
35 # at runtime, so give different results on US and Japanese systems for example). |
|
36 # 3) It looks for strings that are not marked for localization, using both macro names that are |
|
37 # known to be used for debugging in Intrigue source code and an exceptions file. |
|
38 # 4) It finds the files to work on rather than taking them as parameters, and also uses a |
|
39 # hardcoded location for both the output file and the exceptions file. |
|
40 # It would have been nice to use the project to find the source files, but it's too hard to |
|
41 # locate source files after parsing a .pbxproj file. |
|
42 |
|
43 # The exceptions file has a list of strings in quotes, filenames, and filename/string pairs separated by :. |
|
44 |
|
45 use strict; |
|
46 |
|
47 sub UnescapeHexSequence($); |
|
48 |
|
49 my %isDebugMacro = ( ASSERT_WITH_MESSAGE => 1, LOG_ERROR => 1, ERROR => 1, NSURL_ERROR => 1, FATAL => 1, LOG => 1, LOG_WARNING => 1, UI_STRING_LOCALIZE_LATER => 1, LPCTSTR_UI_STRING_LOCALIZE_LATER => 1, UNLOCALIZED_STRING => 1, UNLOCALIZED_LPCTSTR => 1, dprintf => 1, NSException => 1, NSLog => 1, printf => 1 ); |
|
50 |
|
51 @ARGV >= 2 or die "Usage: extract-localizable-strings <exceptions file> <file to update> [ directory... ]\nDid you mean to run update-webkit-localizable-strings instead?\n"; |
|
52 |
|
53 my $exceptionsFile = shift @ARGV; |
|
54 -f $exceptionsFile or die "Couldn't find exceptions file $exceptionsFile\n"; |
|
55 |
|
56 my $fileToUpdate = shift @ARGV; |
|
57 -f $fileToUpdate or die "Couldn't find file to update $fileToUpdate\n"; |
|
58 |
|
59 my @directories = (); |
|
60 my @directoriesToSkip = (); |
|
61 if (@ARGV < 1) { |
|
62 push(@directories, "."); |
|
63 } else { |
|
64 for my $dir (@ARGV) { |
|
65 if ($dir =~ /^-(.*)$/) { |
|
66 push @directoriesToSkip, $1; |
|
67 } else { |
|
68 push @directories, $dir; |
|
69 } |
|
70 } |
|
71 } |
|
72 |
|
73 my $sawError = 0; |
|
74 |
|
75 my $localizedCount = 0; |
|
76 my $keyCollisionCount = 0; |
|
77 my $notLocalizedCount = 0; |
|
78 my $NSLocalizeCount = 0; |
|
79 |
|
80 my %exception; |
|
81 my %usedException; |
|
82 |
|
83 if (open EXCEPTIONS, $exceptionsFile) { |
|
84 while (<EXCEPTIONS>) { |
|
85 chomp; |
|
86 if (/^"([^\\"]|\\.)*"$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp)$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp):"([^\\"]|\\.)*"$/) { |
|
87 if ($exception{$_}) { |
|
88 print "$exceptionsFile:$.:exception for $_ appears twice\n"; |
|
89 print "$exceptionsFile:$exception{$_}:first appearance\n"; |
|
90 } else { |
|
91 $exception{$_} = $.; |
|
92 } |
|
93 } else { |
|
94 print "$exceptionsFile:$.:syntax error\n"; |
|
95 } |
|
96 } |
|
97 close EXCEPTIONS; |
|
98 } |
|
99 |
|
100 my $quotedDirectoriesString = '"' . join('" "', @directories) . '"'; |
|
101 for my $dir (@directoriesToSkip) { |
|
102 $quotedDirectoriesString .= ' -path "' . $dir . '" -prune -o'; |
|
103 } |
|
104 |
|
105 my @files = ( split "\n", `find $quotedDirectoriesString \\( -name "*.h" -o -name "*.m" -o -name "*.mm" -o -name "*.c" -o -name "*.cpp" \\)` ); |
|
106 |
|
107 for my $file (sort @files) { |
|
108 next if $file =~ /\/\w+LocalizableStrings\.h$/; |
|
109 |
|
110 $file =~ s-^./--; |
|
111 |
|
112 open SOURCE, $file or die "can't open $file\n"; |
|
113 |
|
114 my $inComment = 0; |
|
115 |
|
116 my $expected = ""; |
|
117 my $macroLine; |
|
118 my $macro; |
|
119 my $UIString; |
|
120 my $key; |
|
121 my $comment; |
|
122 |
|
123 my $string; |
|
124 my $stringLine; |
|
125 my $nestingLevel; |
|
126 |
|
127 my $previousToken = ""; |
|
128 |
|
129 while (<SOURCE>) { |
|
130 chomp; |
|
131 |
|
132 # Handle continued multi-line comment. |
|
133 if ($inComment) { |
|
134 next unless s-.*\*/--; |
|
135 $inComment = 0; |
|
136 } |
|
137 |
|
138 # Handle all the tokens in the line. |
|
139 while (s-^\s*([#\w]+|/\*|//|[^#\w/'"()\[\],]+|.)--) { |
|
140 my $token = $1; |
|
141 |
|
142 if ($token eq "\"") { |
|
143 if ($expected and $expected ne "a quoted string") { |
|
144 print "$file:$.:ERROR:found a quoted string but expected $expected\n"; |
|
145 $sawError = 1; |
|
146 $expected = ""; |
|
147 } |
|
148 if (s-^(([^\\$token]|\\.)*?)$token--) { |
|
149 if (!defined $string) { |
|
150 $stringLine = $.; |
|
151 $string = $1; |
|
152 } else { |
|
153 $string .= $1; |
|
154 } |
|
155 } else { |
|
156 print "$file:$.:ERROR:mismatched quotes\n"; |
|
157 $sawError = 1; |
|
158 $_ = ""; |
|
159 } |
|
160 next; |
|
161 } |
|
162 |
|
163 if (defined $string) { |
|
164 handleString: |
|
165 if ($expected) { |
|
166 if (!defined $UIString) { |
|
167 # FIXME: Validate UTF-8 here? |
|
168 $UIString = $string; |
|
169 $expected = ","; |
|
170 } elsif (($macro =~ /UI_STRING_KEY$/) and !defined $key) { |
|
171 # FIXME: Validate UTF-8 here? |
|
172 $key = $string; |
|
173 $expected = ","; |
|
174 } elsif (!defined $comment) { |
|
175 # FIXME: Validate UTF-8 here? |
|
176 $comment = $string; |
|
177 $expected = ")"; |
|
178 } |
|
179 } else { |
|
180 if (defined $nestingLevel) { |
|
181 # In a debug macro, no need to localize. |
|
182 } elsif ($previousToken eq "#include" or $previousToken eq "#import") { |
|
183 # File name, no need to localize. |
|
184 } elsif ($previousToken eq "extern" and $string eq "C") { |
|
185 # extern "C", no need to localize. |
|
186 } elsif ($string eq "") { |
|
187 # Empty string can sometimes be localized, but we need not complain if not. |
|
188 } elsif ($exception{$file}) { |
|
189 $usedException{$file} = 1; |
|
190 } elsif ($exception{"\"$string\""}) { |
|
191 $usedException{"\"$string\""} = 1; |
|
192 } elsif ($exception{"$file:\"$string\""}) { |
|
193 $usedException{"$file:\"$string\""} = 1; |
|
194 } else { |
|
195 print "$file:$stringLine:\"$string\" is not marked for localization\n"; |
|
196 $notLocalizedCount++; |
|
197 } |
|
198 } |
|
199 $string = undef; |
|
200 last if !defined $token; |
|
201 } |
|
202 |
|
203 $previousToken = $token; |
|
204 |
|
205 if ($token =~ /^NSLocalized/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedStringFromTableInBundle/) { |
|
206 print "$file:$.:ERROR:found a use of an NSLocalized macro; not supported\n"; |
|
207 $nestingLevel = 0 if !defined $nestingLevel; |
|
208 $sawError = 1; |
|
209 $NSLocalizeCount++; |
|
210 } elsif ($token eq "/*") { |
|
211 if (!s-^.*?\*/--) { |
|
212 $_ = ""; # If the comment doesn't end, discard the result of the line and set flag |
|
213 $inComment = 1; |
|
214 } |
|
215 } elsif ($token eq "//") { |
|
216 $_ = ""; # Discard the rest of the line |
|
217 } elsif ($token eq "'") { |
|
218 if (!s-([^\\]|\\.)'--) { #' <-- that single quote makes the Project Builder editor less confused |
|
219 print "$file:$.:ERROR:mismatched single quote\n"; |
|
220 $sawError = 1; |
|
221 $_ = ""; |
|
222 } |
|
223 } else { |
|
224 if ($expected and $expected ne $token) { |
|
225 print "$file:$.:ERROR:found $token but expected $expected\n"; |
|
226 $sawError = 1; |
|
227 $expected = ""; |
|
228 } |
|
229 if ($token =~ /UI_STRING(_KEY)?$/) { |
|
230 $expected = "("; |
|
231 $macro = $token; |
|
232 $UIString = undef; |
|
233 $key = undef; |
|
234 $comment = undef; |
|
235 $macroLine = $.; |
|
236 } elsif ($token eq "(" or $token eq "[") { |
|
237 ++$nestingLevel if defined $nestingLevel; |
|
238 $expected = "a quoted string" if $expected; |
|
239 } elsif ($token eq ",") { |
|
240 $expected = "a quoted string" if $expected; |
|
241 } elsif ($token eq ")" or $token eq "]") { |
|
242 $nestingLevel = undef if defined $nestingLevel && !--$nestingLevel; |
|
243 if ($expected) { |
|
244 $key = $UIString if !defined $key; |
|
245 HandleUIString($UIString, $key, $comment, $file, $macroLine); |
|
246 $macro = ""; |
|
247 $expected = ""; |
|
248 $localizedCount++; |
|
249 } |
|
250 } elsif ($isDebugMacro{$token}) { |
|
251 $nestingLevel = 0 if !defined $nestingLevel; |
|
252 } |
|
253 } |
|
254 } |
|
255 |
|
256 } |
|
257 |
|
258 goto handleString if defined $string; |
|
259 |
|
260 if ($expected) { |
|
261 print "$file:ERROR:reached end of file but expected $expected\n"; |
|
262 $sawError = 1; |
|
263 } |
|
264 |
|
265 close SOURCE; |
|
266 } |
|
267 |
|
268 # Unescapes C language hexadecimal escape sequences. |
|
269 sub UnescapeHexSequence($) |
|
270 { |
|
271 my ($originalStr) = @_; |
|
272 |
|
273 my $escapedStr = $originalStr; |
|
274 my $unescapedStr = ""; |
|
275 |
|
276 for (;;) { |
|
277 if ($escapedStr =~ s-^\\x([[:xdigit:]]+)--) { |
|
278 if (256 <= hex($1)) { |
|
279 print "Hexadecimal escape sequence out of range: \\x$1\n"; |
|
280 return undef; |
|
281 } |
|
282 $unescapedStr .= pack("H*", $1); |
|
283 } elsif ($escapedStr =~ s-^(.)--) { |
|
284 $unescapedStr .= $1; |
|
285 } else { |
|
286 return $unescapedStr; |
|
287 } |
|
288 } |
|
289 } |
|
290 |
|
291 my %stringByKey; |
|
292 my %commentByKey; |
|
293 my %fileByKey; |
|
294 my %lineByKey; |
|
295 |
|
296 sub HandleUIString |
|
297 { |
|
298 my ($string, $key, $comment, $file, $line) = @_; |
|
299 |
|
300 my $bad = 0; |
|
301 $string = UnescapeHexSequence($string); |
|
302 if (!defined($string)) { |
|
303 print "$file:$line:ERROR:string has an illegal hexadecimal escape sequence\n"; |
|
304 $bad = 1; |
|
305 } |
|
306 $key = UnescapeHexSequence($key); |
|
307 if (!defined($key)) { |
|
308 print "$file:$line:ERROR:key has an illegal hexadecimal escape sequence\n"; |
|
309 $bad = 1; |
|
310 } |
|
311 $comment = UnescapeHexSequence($comment); |
|
312 if (!defined($comment)) { |
|
313 print "$file:$line:ERROR:comment has an illegal hexadecimal escape sequence\n"; |
|
314 $bad = 1; |
|
315 } |
|
316 if (grep { $_ == 0xFFFD } unpack "U*", $string) { |
|
317 print "$file:$line:ERROR:string for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; |
|
318 $bad = 1; |
|
319 } |
|
320 if ($string ne $key && grep { $_ == 0xFFFD } unpack "U*", $key) { |
|
321 print "$file:$line:ERROR:key has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; |
|
322 $bad = 1; |
|
323 } |
|
324 if (grep { $_ == 0xFFFD } unpack "U*", $comment) { |
|
325 print "$file:$line:ERROR:comment for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; |
|
326 $bad = 1; |
|
327 } |
|
328 if ($bad) { |
|
329 $sawError = 1; |
|
330 return; |
|
331 } |
|
332 |
|
333 if ($stringByKey{$key} && $stringByKey{$key} ne $string) { |
|
334 print "$file:$line:encountered the same key, \"$key\", twice, with different strings\n"; |
|
335 print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n"; |
|
336 $keyCollisionCount++; |
|
337 return; |
|
338 } |
|
339 if ($commentByKey{$key} && $commentByKey{$key} ne $comment) { |
|
340 print "$file:$line:encountered the same key, \"$key\", twice, with different comments\n"; |
|
341 print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n"; |
|
342 $keyCollisionCount++; |
|
343 return; |
|
344 } |
|
345 |
|
346 $fileByKey{$key} = $file; |
|
347 $lineByKey{$key} = $line; |
|
348 $stringByKey{$key} = $string; |
|
349 $commentByKey{$key} = $comment; |
|
350 } |
|
351 |
|
352 print "\n" if $sawError || $notLocalizedCount || $NSLocalizeCount; |
|
353 |
|
354 my @unusedExceptions = sort grep { !$usedException{$_} } keys %exception; |
|
355 if (@unusedExceptions) { |
|
356 for my $unused (@unusedExceptions) { |
|
357 print "$exceptionsFile:$exception{$unused}:exception $unused not used\n"; |
|
358 } |
|
359 print "\n"; |
|
360 } |
|
361 |
|
362 print "$localizedCount localizable strings\n" if $localizedCount; |
|
363 print "$keyCollisionCount key collisions\n" if $keyCollisionCount; |
|
364 print "$notLocalizedCount strings not marked for localization\n" if $notLocalizedCount; |
|
365 print "$NSLocalizeCount uses of NSLocalize\n" if $NSLocalizeCount; |
|
366 print scalar(@unusedExceptions), " unused exceptions\n" if @unusedExceptions; |
|
367 |
|
368 if ($sawError) { |
|
369 print "\nErrors encountered. Exiting without writing to $fileToUpdate.\n"; |
|
370 exit 1; |
|
371 } |
|
372 |
|
373 my $localizedStrings = ""; |
|
374 |
|
375 for my $key (sort keys %commentByKey) { |
|
376 $localizedStrings .= "/* $commentByKey{$key} */\n\"$key\" = \"$stringByKey{$key}\";\n\n"; |
|
377 } |
|
378 |
|
379 # Write out the strings file in UTF-16 with a BOM. |
|
380 utf8::decode($localizedStrings) if $^V ge v5.8; |
|
381 my $output = pack "n*", (0xFEFF, unpack "U*", $localizedStrings); |
|
382 |
|
383 if (-e "$fileToUpdate") { |
|
384 open STRINGS, ">", "$fileToUpdate" or die; |
|
385 print STRINGS $output; |
|
386 close STRINGS; |
|
387 } else { |
|
388 print "$fileToUpdate does not exist\n"; |
|
389 exit 1; |
|
390 } |