WebKitTools/Scripts/extract-localizable-strings
changeset 0 4f2f89ce4247
equal deleted inserted replaced
-1:000000000000 0:4f2f89ce4247
       
     1 #!/usr/bin/perl -w
       
     2 
       
     3 # Copyright (C) 2006, 2007, 2009, 2010 Apple Inc. All rights reserved.
       
     4 #
       
     5 # Redistribution and use in source and binary forms, with or without
       
     6 # modification, are permitted provided that the following conditions
       
     7 # are met:
       
     8 #
       
     9 # 1.  Redistributions of source code must retain the above copyright
       
    10 #     notice, this list of conditions and the following disclaimer. 
       
    11 # 2.  Redistributions in binary form must reproduce the above copyright
       
    12 #     notice, this list of conditions and the following disclaimer in the
       
    13 #     documentation and/or other materials provided with the distribution. 
       
    14 # 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
       
    15 #     its contributors may be used to endorse or promote products derived
       
    16 #     from this software without specific prior written permission. 
       
    17 #
       
    18 # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
       
    19 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
       
    20 # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
       
    21 # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
       
    22 # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
       
    23 # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
       
    24 # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
       
    25 # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
       
    26 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
       
    27 # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
       
    28 
       
    29 # This script is like the genstrings tool (minus most of the options) with these differences.
       
    30 #
       
    31 #    1) It uses the names UI_STRING and UI_STRING_WITH_KEY for the macros, rather than the macros
       
    32 #       from NSBundle.h, and doesn't support tables (although they would be easy to add).
       
    33 #    2) It supports UTF-8 in key strings (and hence uses "" strings rather than @"" strings;
       
    34 #       @"" strings only reliably support ASCII since they are decoded based on the system encoding
       
    35 #       at runtime, so give different results on US and Japanese systems for example).
       
    36 #    3) It looks for strings that are not marked for localization, using both macro names that are
       
    37 #       known to be used for debugging in Intrigue source code and an exceptions file.
       
    38 #    4) It finds the files to work on rather than taking them as parameters, and also uses a
       
    39 #       hardcoded location for both the output file and the exceptions file.
       
    40 #       It would have been nice to use the project to find the source files, but it's too hard to
       
    41 #       locate source files after parsing a .pbxproj file.
       
    42 
       
    43 # The exceptions file has a list of strings in quotes, filenames, and filename/string pairs separated by :.
       
    44 
       
    45 use strict;
       
    46 
       
    47 sub UnescapeHexSequence($);
       
    48 
       
    49 my %isDebugMacro = ( ASSERT_WITH_MESSAGE => 1, LOG_ERROR => 1, ERROR => 1, NSURL_ERROR => 1, FATAL => 1, LOG => 1, LOG_WARNING => 1, UI_STRING_LOCALIZE_LATER => 1, LPCTSTR_UI_STRING_LOCALIZE_LATER => 1, UNLOCALIZED_STRING => 1, UNLOCALIZED_LPCTSTR => 1, dprintf => 1, NSException => 1, NSLog => 1, printf => 1 );
       
    50 
       
    51 @ARGV >= 2 or die "Usage: extract-localizable-strings <exceptions file> <file to update> [ directory... ]\nDid you mean to run update-webkit-localizable-strings instead?\n";
       
    52 
       
    53 my $exceptionsFile = shift @ARGV;
       
    54 -f $exceptionsFile or die "Couldn't find exceptions file $exceptionsFile\n";
       
    55 
       
    56 my $fileToUpdate = shift @ARGV;
       
    57 -f $fileToUpdate or die "Couldn't find file to update $fileToUpdate\n";
       
    58 
       
    59 my @directories = ();
       
    60 my @directoriesToSkip = ();
       
    61 if (@ARGV < 1) {
       
    62     push(@directories, ".");
       
    63 } else {
       
    64     for my $dir (@ARGV) {
       
    65         if ($dir =~ /^-(.*)$/) {
       
    66             push @directoriesToSkip, $1;
       
    67         } else {
       
    68             push @directories, $dir;
       
    69         }
       
    70     }
       
    71 }
       
    72 
       
    73 my $sawError = 0;
       
    74 
       
    75 my $localizedCount = 0;
       
    76 my $keyCollisionCount = 0;
       
    77 my $notLocalizedCount = 0;
       
    78 my $NSLocalizeCount = 0;
       
    79 
       
    80 my %exception;
       
    81 my %usedException;
       
    82 
       
    83 if (open EXCEPTIONS, $exceptionsFile) {
       
    84     while (<EXCEPTIONS>) {
       
    85         chomp;
       
    86         if (/^"([^\\"]|\\.)*"$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp)$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp):"([^\\"]|\\.)*"$/) {
       
    87             if ($exception{$_}) {
       
    88                 print "$exceptionsFile:$.:exception for $_ appears twice\n";
       
    89                 print "$exceptionsFile:$exception{$_}:first appearance\n";
       
    90             } else {
       
    91                 $exception{$_} = $.;
       
    92             }
       
    93         } else {
       
    94             print "$exceptionsFile:$.:syntax error\n";
       
    95         }
       
    96     }
       
    97     close EXCEPTIONS;
       
    98 }
       
    99 
       
   100 my $quotedDirectoriesString = '"' . join('" "', @directories) . '"';
       
   101 for my $dir (@directoriesToSkip) {
       
   102     $quotedDirectoriesString .= ' -path "' . $dir . '" -prune -o';
       
   103 }
       
   104 
       
   105 my @files = ( split "\n", `find $quotedDirectoriesString \\( -name "*.h" -o -name "*.m" -o -name "*.mm" -o -name "*.c" -o -name "*.cpp" \\)` );
       
   106 
       
   107 for my $file (sort @files) {
       
   108     next if $file =~ /\/\w+LocalizableStrings\.h$/;
       
   109 
       
   110     $file =~ s-^./--;
       
   111 
       
   112     open SOURCE, $file or die "can't open $file\n";
       
   113     
       
   114     my $inComment = 0;
       
   115     
       
   116     my $expected = "";
       
   117     my $macroLine;
       
   118     my $macro;
       
   119     my $UIString;
       
   120     my $key;
       
   121     my $comment;
       
   122     
       
   123     my $string;
       
   124     my $stringLine;
       
   125     my $nestingLevel;
       
   126     
       
   127     my $previousToken = "";
       
   128 
       
   129     while (<SOURCE>) {
       
   130         chomp;
       
   131         
       
   132         # Handle continued multi-line comment.
       
   133         if ($inComment) {
       
   134             next unless s-.*\*/--;
       
   135             $inComment = 0;
       
   136         }
       
   137     
       
   138         # Handle all the tokens in the line.
       
   139         while (s-^\s*([#\w]+|/\*|//|[^#\w/'"()\[\],]+|.)--) {
       
   140             my $token = $1;
       
   141             
       
   142             if ($token eq "\"") {
       
   143                 if ($expected and $expected ne "a quoted string") {
       
   144                     print "$file:$.:ERROR:found a quoted string but expected $expected\n";
       
   145                     $sawError = 1;
       
   146                     $expected = "";
       
   147                 }
       
   148                 if (s-^(([^\\$token]|\\.)*?)$token--) {
       
   149                     if (!defined $string) {
       
   150                         $stringLine = $.;
       
   151                         $string = $1;
       
   152                     } else {
       
   153                         $string .= $1;
       
   154                     }
       
   155                 } else {
       
   156                     print "$file:$.:ERROR:mismatched quotes\n";
       
   157                     $sawError = 1;
       
   158                     $_ = "";
       
   159                 }
       
   160                 next;
       
   161             }
       
   162             
       
   163             if (defined $string) {
       
   164 handleString:
       
   165                 if ($expected) {
       
   166                     if (!defined $UIString) {
       
   167                         # FIXME: Validate UTF-8 here?
       
   168                         $UIString = $string;
       
   169                         $expected = ",";
       
   170                     } elsif (($macro =~ /UI_STRING_KEY$/) and !defined $key) {
       
   171                         # FIXME: Validate UTF-8 here?
       
   172                         $key = $string;
       
   173                         $expected = ",";
       
   174                     } elsif (!defined $comment) {
       
   175                         # FIXME: Validate UTF-8 here?
       
   176                         $comment = $string;
       
   177                         $expected = ")";
       
   178                     }
       
   179                 } else {
       
   180                     if (defined $nestingLevel) {
       
   181                         # In a debug macro, no need to localize.
       
   182                     } elsif ($previousToken eq "#include" or $previousToken eq "#import") {
       
   183                         # File name, no need to localize.
       
   184                     } elsif ($previousToken eq "extern" and $string eq "C") {
       
   185                         # extern "C", no need to localize.
       
   186                     } elsif ($string eq "") {
       
   187                         # Empty string can sometimes be localized, but we need not complain if not.
       
   188                     } elsif ($exception{$file}) {
       
   189                         $usedException{$file} = 1;
       
   190                     } elsif ($exception{"\"$string\""}) {
       
   191                         $usedException{"\"$string\""} = 1;
       
   192                     } elsif ($exception{"$file:\"$string\""}) {
       
   193                         $usedException{"$file:\"$string\""} = 1;
       
   194                     } else {
       
   195                         print "$file:$stringLine:\"$string\" is not marked for localization\n";
       
   196                         $notLocalizedCount++;
       
   197                     }
       
   198                 }
       
   199                 $string = undef;
       
   200                 last if !defined $token;
       
   201             }
       
   202             
       
   203             $previousToken = $token;
       
   204 
       
   205             if ($token =~ /^NSLocalized/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedStringFromTableInBundle/) {
       
   206                 print "$file:$.:ERROR:found a use of an NSLocalized macro; not supported\n";
       
   207                 $nestingLevel = 0 if !defined $nestingLevel;
       
   208                 $sawError = 1;
       
   209                 $NSLocalizeCount++;
       
   210             } elsif ($token eq "/*") {
       
   211                 if (!s-^.*?\*/--) {
       
   212                     $_ = ""; # If the comment doesn't end, discard the result of the line and set flag
       
   213                     $inComment = 1;
       
   214                 }
       
   215             } elsif ($token eq "//") {
       
   216                 $_ = ""; # Discard the rest of the line
       
   217             } elsif ($token eq "'") {
       
   218                 if (!s-([^\\]|\\.)'--) { #' <-- that single quote makes the Project Builder editor less confused
       
   219                     print "$file:$.:ERROR:mismatched single quote\n";
       
   220                     $sawError = 1;
       
   221                     $_ = "";
       
   222                 }
       
   223             } else {
       
   224                 if ($expected and $expected ne $token) {
       
   225                     print "$file:$.:ERROR:found $token but expected $expected\n";
       
   226                     $sawError = 1;
       
   227                     $expected = "";
       
   228                 }
       
   229                 if ($token =~ /UI_STRING(_KEY)?$/) {
       
   230                     $expected = "(";
       
   231                     $macro = $token;
       
   232                     $UIString = undef;
       
   233                     $key = undef;
       
   234                     $comment = undef;
       
   235                     $macroLine = $.;
       
   236                 } elsif ($token eq "(" or $token eq "[") {
       
   237                     ++$nestingLevel if defined $nestingLevel;
       
   238                     $expected = "a quoted string" if $expected;
       
   239                 } elsif ($token eq ",") {
       
   240                     $expected = "a quoted string" if $expected;
       
   241                 } elsif ($token eq ")" or $token eq "]") {
       
   242                     $nestingLevel = undef if defined $nestingLevel && !--$nestingLevel;
       
   243                     if ($expected) {
       
   244                         $key = $UIString if !defined $key;
       
   245                         HandleUIString($UIString, $key, $comment, $file, $macroLine);
       
   246                         $macro = "";
       
   247                         $expected = "";
       
   248                         $localizedCount++;
       
   249                     }
       
   250                 } elsif ($isDebugMacro{$token}) {
       
   251                     $nestingLevel = 0 if !defined $nestingLevel;
       
   252                 }
       
   253             }
       
   254         }
       
   255             
       
   256     }
       
   257     
       
   258     goto handleString if defined $string;
       
   259     
       
   260     if ($expected) {
       
   261         print "$file:ERROR:reached end of file but expected $expected\n";
       
   262         $sawError = 1;
       
   263     }
       
   264     
       
   265     close SOURCE;
       
   266 }
       
   267 
       
   268 # Unescapes C language hexadecimal escape sequences.
       
   269 sub UnescapeHexSequence($)
       
   270 {
       
   271     my ($originalStr) = @_;
       
   272 
       
   273     my $escapedStr = $originalStr;
       
   274     my $unescapedStr = "";
       
   275 
       
   276     for (;;) {
       
   277         if ($escapedStr =~ s-^\\x([[:xdigit:]]+)--) {
       
   278             if (256 <= hex($1)) {
       
   279                 print "Hexadecimal escape sequence out of range: \\x$1\n";
       
   280                 return undef;
       
   281             }
       
   282             $unescapedStr .= pack("H*", $1);
       
   283         } elsif ($escapedStr =~ s-^(.)--) {
       
   284             $unescapedStr .= $1;
       
   285         } else {
       
   286             return $unescapedStr;
       
   287         }
       
   288     }
       
   289 }
       
   290 
       
   291 my %stringByKey;
       
   292 my %commentByKey;
       
   293 my %fileByKey;
       
   294 my %lineByKey;
       
   295 
       
   296 sub HandleUIString
       
   297 {
       
   298     my ($string, $key, $comment, $file, $line) = @_;
       
   299 
       
   300     my $bad = 0;
       
   301     $string = UnescapeHexSequence($string);
       
   302     if (!defined($string)) {
       
   303         print "$file:$line:ERROR:string has an illegal hexadecimal escape sequence\n";
       
   304         $bad = 1;
       
   305     }
       
   306     $key = UnescapeHexSequence($key);
       
   307     if (!defined($key)) {
       
   308         print "$file:$line:ERROR:key has an illegal hexadecimal escape sequence\n";
       
   309         $bad = 1;
       
   310     }
       
   311     $comment = UnescapeHexSequence($comment);
       
   312     if (!defined($comment)) {
       
   313         print "$file:$line:ERROR:comment has an illegal hexadecimal escape sequence\n";
       
   314         $bad = 1;
       
   315     }
       
   316     if (grep { $_ == 0xFFFD } unpack "U*", $string) {
       
   317         print "$file:$line:ERROR:string for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
       
   318         $bad = 1;
       
   319     }
       
   320     if ($string ne $key && grep { $_ == 0xFFFD } unpack "U*", $key) {
       
   321         print "$file:$line:ERROR:key has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
       
   322         $bad = 1;
       
   323     }
       
   324     if (grep { $_ == 0xFFFD } unpack "U*", $comment) {
       
   325         print "$file:$line:ERROR:comment for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
       
   326         $bad = 1;
       
   327     }
       
   328     if ($bad) {
       
   329         $sawError = 1;
       
   330         return;
       
   331     }
       
   332     
       
   333     if ($stringByKey{$key} && $stringByKey{$key} ne $string) {
       
   334         print "$file:$line:encountered the same key, \"$key\", twice, with different strings\n";
       
   335         print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n";
       
   336         $keyCollisionCount++;
       
   337         return;
       
   338     }
       
   339     if ($commentByKey{$key} && $commentByKey{$key} ne $comment) {
       
   340         print "$file:$line:encountered the same key, \"$key\", twice, with different comments\n";
       
   341         print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n";
       
   342         $keyCollisionCount++;
       
   343         return;
       
   344     }
       
   345 
       
   346     $fileByKey{$key} = $file;
       
   347     $lineByKey{$key} = $line;
       
   348     $stringByKey{$key} = $string;
       
   349     $commentByKey{$key} = $comment;
       
   350 }
       
   351 
       
   352 print "\n" if $sawError || $notLocalizedCount || $NSLocalizeCount;
       
   353 
       
   354 my @unusedExceptions = sort grep { !$usedException{$_} } keys %exception;
       
   355 if (@unusedExceptions) {
       
   356     for my $unused (@unusedExceptions) {
       
   357         print "$exceptionsFile:$exception{$unused}:exception $unused not used\n";
       
   358     }
       
   359     print "\n";
       
   360 }
       
   361 
       
   362 print "$localizedCount localizable strings\n" if $localizedCount;
       
   363 print "$keyCollisionCount key collisions\n" if $keyCollisionCount;
       
   364 print "$notLocalizedCount strings not marked for localization\n" if $notLocalizedCount;
       
   365 print "$NSLocalizeCount uses of NSLocalize\n" if $NSLocalizeCount;
       
   366 print scalar(@unusedExceptions), " unused exceptions\n" if @unusedExceptions;
       
   367 
       
   368 if ($sawError) {
       
   369     print "\nErrors encountered. Exiting without writing to $fileToUpdate.\n";
       
   370     exit 1;
       
   371 }
       
   372 
       
   373 my $localizedStrings = "";
       
   374 
       
   375 for my $key (sort keys %commentByKey) {
       
   376     $localizedStrings .= "/* $commentByKey{$key} */\n\"$key\" = \"$stringByKey{$key}\";\n\n";
       
   377 }
       
   378 
       
   379 # Write out the strings file in UTF-16 with a BOM.
       
   380 utf8::decode($localizedStrings) if $^V ge v5.8;
       
   381 my $output = pack "n*", (0xFEFF, unpack "U*", $localizedStrings);
       
   382 
       
   383 if (-e "$fileToUpdate") {
       
   384     open STRINGS, ">", "$fileToUpdate" or die;
       
   385     print STRINGS $output;
       
   386     close STRINGS;
       
   387 } else {
       
   388     print "$fileToUpdate does not exist\n";
       
   389     exit 1;
       
   390 }