diff -r 000000000000 -r dd21522fd290 webengine/osswebengine/WebKitTools/Scripts/extract-localizable-strings --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/webengine/osswebengine/WebKitTools/Scripts/extract-localizable-strings Mon Mar 30 12:54:55 2009 +0300 @@ -0,0 +1,345 @@ +#!/usr/bin/perl -w + +# Copyright (C) 2006, 2007 Apple Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of +# its contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This script is like the genstrings tool (minus most of the options) with these differences. +# +# 1) It uses the names UI_STRING and UI_STRING_WITH_KEY for the macros, rather than the macros +# from NSBundle.h, and doesn't support tables (although they would be easy to add). +# 2) It supports UTF-8 in key strings (and hence uses "" strings rather than @"" strings; +# @"" strings only reliably support ASCII since they are decoded based on the system encoding +# at runtime, so give different results on US and Japanese systems for example). +# 3) It looks for strings that are not marked for localization, using both macro names that are +# known to be used for debugging in Intrigue source code and an exceptions file. +# 4) It finds the files to work on rather than taking them as parameters, and also uses a +# hardcoded location for both the output file and the exceptions file. +# It would have been nice to use the project to find the source files, but it's too hard to +# locate source files after parsing a .pbxproj file. + +# The exceptions file has a list of strings in quotes, filenames, and filename/string pairs separated by :. + +use strict; + +my $stringsFile = "English.lproj/Localizable.strings"; +my %isDebugMacro = ( ASSERT_WITH_MESSAGE => 1, LOG_ERROR => 1, ERROR => 1, NSURL_ERROR => 1, FATAL => 1, LOG => 1, dprintf => 1, NSException => 1, NSLog => 1, printf => 1 ); + +@ARGV >= 1 or die "Usage: extract-localizable-strings [ directory... ]\nDid you mean to run extract-webkit-localizable-strings instead?\n"; + +my $exceptionsFile = shift @ARGV; +-f $exceptionsFile or die "Couldn't find exceptions file $exceptionsFile\n"; + +my @directories = (); +my @directoriesToSkip = (); +if (@ARGV < 1) { + push(@directories, "."); +} else { + for my $dir (@ARGV) { + if ($dir =~ /^-(.*)$/) { + push @directoriesToSkip, $1; + } else { + push @directories, $dir; + } + } +} + +my $sawError = 0; + +my $localizedCount = 0; +my $keyCollisionCount = 0; +my $notLocalizedCount = 0; +my $NSLocalizeCount = 0; + +my %exception; +my %usedException; + +if (open EXCEPTIONS, $exceptionsFile) { + while () { + chomp; + if (/^"([^\\"]|\\.)*"$/ or /^[-_\/\w.]+.(h|m|mm|cpp)$/ or /^[-_\/\w.]+.(h|m|mm|cpp):"([^\\"]|\\.)*"$/) { + if ($exception{$_}) { + print "$exceptionsFile:$.:exception for $_ appears twice\n"; + print "$exceptionsFile:$exception{$_}:first appearance\n"; + } else { + $exception{$_} = $.; + } + } else { + print "$exceptionsFile:$.:syntax error\n"; + } + } + close EXCEPTIONS; +} + +my $quotedDirectoriesString = '"' . join('" "', @directories) . '"'; +for my $dir (@directoriesToSkip) { + $quotedDirectoriesString .= ' -path "' . $dir . '" -prune'; +} + +my @files = ( split "\n", `find $quotedDirectoriesString -name "*.h" -o -name "*.m" -o -name "*.mm" -o -name "*.cpp"` ); + +for my $file (sort @files) { + next if $file =~ /\/WebLocalizableStrings\.h$/; + next if $file =~ /\/icu\//; + + $file =~ s-^./--; + + open SOURCE, $file or die "can't open $file\n"; + + my $inComment = 0; + + my $expected = ""; + my $macroLine; + my $macro; + my $UIString; + my $key; + my $comment; + + my $string; + my $stringLine; + my $nestingLevel; + + my $previousToken = ""; + + while () { + chomp; + + # Handle continued multi-line comment. + if ($inComment) { + next unless s-.*\*/--; + $inComment = 0; + } + + # Handle all the tokens in the line. + while (s-^\s*([#\w]+|/\*|//|[^#\w/'"()\[\],]+|.)--) { + my $token = $1; + + if ($token eq "\"") { + if ($expected and $expected ne "a quoted string") { + print "$file:$.:ERROR:found a quoted string but expected $expected\n"; + $sawError = 1; + $expected = ""; + } + if (s-^(([^\\$token]|\\.)*?)$token--) { + if (!defined $string) { + $stringLine = $.; + $string = $1; + } else { + $string .= $1; + } + } else { + print "$file:$.:ERROR:mismatched quotes\n"; + $sawError = 1; + $_ = ""; + } + next; + } + + if (defined $string) { +handleString: + if ($expected) { + if (!defined $UIString) { + # FIXME: Validate UTF-8 here? + $UIString = $string; + $expected = ","; + } elsif (($macro eq "UI_STRING_KEY" or $macro eq "LPCTSTR_UI_STRING_KEY") and !defined $key) { + # FIXME: Validate UTF-8 here? + $key = $string; + $expected = ","; + } elsif (!defined $comment) { + # FIXME: Validate UTF-8 here? + $comment = $string; + $expected = ")"; + } + } else { + if (defined $nestingLevel) { + # In a debug macro, no need to localize. + } elsif ($previousToken eq "#include" or $previousToken eq "#import") { + # File name, no need to localize. + } elsif ($previousToken eq "extern" and $string eq "C") { + # extern "C", no need to localize. + } elsif ($string eq "") { + # Empty string can sometimes be localized, but we need not complain if not. + } elsif ($exception{$file}) { + $usedException{$file} = 1; + } elsif ($exception{"\"$string\""}) { + $usedException{"\"$string\""} = 1; + } elsif ($exception{"$file:\"$string\""}) { + $usedException{"$file:\"$string\""} = 1; + } else { + print "$file:$stringLine:\"$string\" is not marked for localization\n"; + $notLocalizedCount++; + } + } + $string = undef; + last if !defined $token; + } + + $previousToken = $token; + + if ($token =~ /^NSLocalized/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedStringFromTableInBundle/) { + print "$file:$.:ERROR:found a use of an NSLocalized macro; not supported\n"; + $nestingLevel = 0 if !defined $nestingLevel; + $sawError = 1; + $NSLocalizeCount++; + } elsif ($token eq "/*") { + if (!s-^.*?\*/--) { + $_ = ""; # If the comment doesn't end, discard the result of the line and set flag + $inComment = 1; + } + } elsif ($token eq "//") { + $_ = ""; # Discard the rest of the line + } elsif ($token eq "'") { + if (!s-([^\\]|\\.)'--) { #' <-- that single quote makes the Project Builder editor less confused + print "$file:$.:ERROR:mismatched single quote\n"; + $sawError = 1; + $_ = ""; + } + } else { + if ($expected and $expected ne $token) { + print "$file:$.:ERROR:found $token but expected $expected\n"; + $sawError = 1; + $expected = ""; + } + if ($token eq "UI_STRING" or $token eq "UI_STRING_KEY" or $token eq "LPCTSTR_UI_STRING" or $token eq "LPCTSTR_UI_STRING_KEY") { + $expected = "("; + $macro = $token; + $UIString = undef; + $key = undef; + $comment = undef; + $macroLine = $.; + } elsif ($token eq "(" or $token eq "[") { + ++$nestingLevel if defined $nestingLevel; + $expected = "a quoted string" if $expected; + } elsif ($token eq ",") { + $expected = "a quoted string" if $expected; + } elsif ($token eq ")" or $token eq "]") { + $nestingLevel = undef if defined $nestingLevel && !--$nestingLevel; + if ($expected) { + $key = $UIString if !defined $key; + HandleUIString($UIString, $key, $comment, $file, $macroLine); + $macro = ""; + $expected = ""; + $localizedCount++; + } + } elsif ($isDebugMacro{$token}) { + $nestingLevel = 0 if !defined $nestingLevel; + } + } + } + + } + + goto handleString if defined $string; + + if ($expected) { + print "$file:ERROR:reached end of file but expected $expected\n"; + $sawError = 1; + } + + close SOURCE; +} + +my %stringByKey; +my %commentByKey; +my %fileByKey; +my %lineByKey; + +sub HandleUIString +{ + my ($string, $key, $comment, $file, $line) = @_; + + my $bad = 0; + if (grep { $_ == 0xFFFD } unpack "U*", $string) { + print "$file:$line:ERROR:string for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; + $bad = 1; + } + if ($string ne $key && grep { $_ == 0xFFFD } unpack "U*", $key) { + print "$file:$line:ERROR:key has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; + $bad = 1; + } + if (grep { $_ == 0xFFFD } unpack "U*", $comment) { + print "$file:$line:ERROR:comment for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; + $bad = 1; + } + if ($bad) { + $sawError = 1; + return; + } + + if ($stringByKey{$key} && $stringByKey{$key} ne $string) { + print "$file:$line:encountered the same key, \"$key\", twice, with different strings\n"; + print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n"; + $keyCollisionCount++; + return; + } + if ($commentByKey{$key} && $commentByKey{$key} ne $comment) { + print "$file:$line:encountered the same key, \"$key\", twice, with different comments\n"; + print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n"; + $keyCollisionCount++; + return; + } + + $fileByKey{$key} = $file; + $lineByKey{$key} = $line; + $stringByKey{$key} = $string; + $commentByKey{$key} = $comment; +} + +print "\n" if $sawError || $notLocalizedCount || $NSLocalizeCount; + +my @unusedExceptions = sort grep { !$usedException{$_} } keys %exception; +if (@unusedExceptions) { + for my $unused (@unusedExceptions) { + print "$exceptionsFile:$exception{$unused}:exception $unused not used\n"; + } + print "\n"; +} + +print "$localizedCount localizable strings\n" if $localizedCount; +print "$keyCollisionCount key collisions\n" if $keyCollisionCount; +print "$notLocalizedCount strings not marked for localization\n" if $notLocalizedCount; +print "$NSLocalizeCount uses of NSLocalize\n" if $NSLocalizeCount; +print scalar(@unusedExceptions), " unused exceptions\n" if @unusedExceptions; + +if ($sawError) { + print "\nErrors encountered. Exiting without writing a $stringsFile file.\n"; + exit 1; +} + +my $localizedStrings = ""; + +for my $key (sort keys %commentByKey) { + $localizedStrings .= "/* $commentByKey{$key} */\n\"$key\" = \"$stringByKey{$key}\";\n\n"; +} + +# Write out the strings file in UTF-16 with a BOM. +utf8::decode($localizedStrings) if $^V ge chr(5).chr(8); +my $output = pack "n*", (0xFEFF, unpack "U*", $localizedStrings); +foreach my $directory (@directories) { + open STRINGS, ">", "$directory/$stringsFile" or die; + print STRINGS $output; + close STRINGS; +}