webengine/osswebengine/WebKitTools/Scripts/extract-localizable-strings
changeset 0 dd21522fd290
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/webengine/osswebengine/WebKitTools/Scripts/extract-localizable-strings	Mon Mar 30 12:54:55 2009 +0300
@@ -0,0 +1,345 @@
+#!/usr/bin/perl -w
+
+# Copyright (C) 2006, 2007 Apple Inc.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1.  Redistributions of source code must retain the above copyright
+#     notice, this list of conditions and the following disclaimer. 
+# 2.  Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions and the following disclaimer in the
+#     documentation and/or other materials provided with the distribution. 
+# 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+#     its contributors may be used to endorse or promote products derived
+#     from this software without specific prior written permission. 
+#
+# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# This script is like the genstrings tool (minus most of the options) with these differences.
+#
+#    1) It uses the names UI_STRING and UI_STRING_WITH_KEY for the macros, rather than the macros
+#       from NSBundle.h, and doesn't support tables (although they would be easy to add).
+#    2) It supports UTF-8 in key strings (and hence uses "" strings rather than @"" strings;
+#       @"" strings only reliably support ASCII since they are decoded based on the system encoding
+#       at runtime, so give different results on US and Japanese systems for example).
+#    3) It looks for strings that are not marked for localization, using both macro names that are
+#       known to be used for debugging in Intrigue source code and an exceptions file.
+#    4) It finds the files to work on rather than taking them as parameters, and also uses a
+#       hardcoded location for both the output file and the exceptions file.
+#       It would have been nice to use the project to find the source files, but it's too hard to
+#       locate source files after parsing a .pbxproj file.
+
+# The exceptions file has a list of strings in quotes, filenames, and filename/string pairs separated by :.
+
+use strict;
+
+my $stringsFile = "English.lproj/Localizable.strings";
+my %isDebugMacro = ( ASSERT_WITH_MESSAGE => 1, LOG_ERROR => 1, ERROR => 1, NSURL_ERROR => 1, FATAL => 1, LOG => 1, dprintf => 1, NSException => 1, NSLog => 1, printf => 1 );
+
+@ARGV >= 1 or die "Usage: extract-localizable-strings <exceptions file> [ directory... ]\nDid you mean to run extract-webkit-localizable-strings instead?\n";
+
+my $exceptionsFile = shift @ARGV;
+-f $exceptionsFile or die "Couldn't find exceptions file $exceptionsFile\n";
+
+my @directories = ();
+my @directoriesToSkip = ();
+if (@ARGV < 1) {
+    push(@directories, ".");
+} else {
+    for my $dir (@ARGV) {
+        if ($dir =~ /^-(.*)$/) {
+            push @directoriesToSkip, $1;
+        } else {
+            push @directories, $dir;
+        }
+    }
+}
+
+my $sawError = 0;
+
+my $localizedCount = 0;
+my $keyCollisionCount = 0;
+my $notLocalizedCount = 0;
+my $NSLocalizeCount = 0;
+
+my %exception;
+my %usedException;
+
+if (open EXCEPTIONS, $exceptionsFile) {
+    while (<EXCEPTIONS>) {
+        chomp;
+        if (/^"([^\\"]|\\.)*"$/ or /^[-_\/\w.]+.(h|m|mm|cpp)$/ or /^[-_\/\w.]+.(h|m|mm|cpp):"([^\\"]|\\.)*"$/) {
+            if ($exception{$_}) {
+                print "$exceptionsFile:$.:exception for $_ appears twice\n";
+                print "$exceptionsFile:$exception{$_}:first appearance\n";
+            } else {
+                $exception{$_} = $.;
+            }
+        } else {
+            print "$exceptionsFile:$.:syntax error\n";
+        }
+    }
+    close EXCEPTIONS;
+}
+
+my $quotedDirectoriesString = '"' . join('" "', @directories) . '"';
+for my $dir (@directoriesToSkip) {
+    $quotedDirectoriesString .= ' -path "' . $dir . '" -prune';
+}
+
+my @files = ( split "\n", `find $quotedDirectoriesString -name "*.h" -o -name "*.m" -o -name "*.mm" -o -name "*.cpp"` );
+
+for my $file (sort @files) {
+    next if $file =~ /\/WebLocalizableStrings\.h$/;
+    next if $file =~ /\/icu\//;
+
+    $file =~ s-^./--;
+
+    open SOURCE, $file or die "can't open $file\n";
+    
+    my $inComment = 0;
+    
+    my $expected = "";
+    my $macroLine;
+    my $macro;
+    my $UIString;
+    my $key;
+    my $comment;
+    
+    my $string;
+    my $stringLine;
+    my $nestingLevel;
+    
+    my $previousToken = "";
+
+    while (<SOURCE>) {
+        chomp;
+        
+        # Handle continued multi-line comment.
+        if ($inComment) {
+            next unless s-.*\*/--;
+            $inComment = 0;
+        }
+    
+        # Handle all the tokens in the line.
+        while (s-^\s*([#\w]+|/\*|//|[^#\w/'"()\[\],]+|.)--) {
+            my $token = $1;
+            
+            if ($token eq "\"") {
+                if ($expected and $expected ne "a quoted string") {
+                    print "$file:$.:ERROR:found a quoted string but expected $expected\n";
+                    $sawError = 1;
+                    $expected = "";
+                }
+                if (s-^(([^\\$token]|\\.)*?)$token--) {
+                    if (!defined $string) {
+                        $stringLine = $.;
+                        $string = $1;
+                    } else {
+                        $string .= $1;
+                    }
+                } else {
+                    print "$file:$.:ERROR:mismatched quotes\n";
+                    $sawError = 1;
+                    $_ = "";
+                }
+                next;
+            }
+            
+            if (defined $string) {
+handleString:
+                if ($expected) {
+                    if (!defined $UIString) {
+                        # FIXME: Validate UTF-8 here?
+                        $UIString = $string;
+                        $expected = ",";
+                    } elsif (($macro eq "UI_STRING_KEY" or $macro eq "LPCTSTR_UI_STRING_KEY") and !defined $key) {
+                        # FIXME: Validate UTF-8 here?
+                        $key = $string;
+                        $expected = ",";
+                    } elsif (!defined $comment) {
+                        # FIXME: Validate UTF-8 here?
+                        $comment = $string;
+                        $expected = ")";
+                    }
+                } else {
+                    if (defined $nestingLevel) {
+                        # In a debug macro, no need to localize.
+                    } elsif ($previousToken eq "#include" or $previousToken eq "#import") {
+                        # File name, no need to localize.
+                    } elsif ($previousToken eq "extern" and $string eq "C") {
+                        # extern "C", no need to localize.
+                    } elsif ($string eq "") {
+                        # Empty string can sometimes be localized, but we need not complain if not.
+                    } elsif ($exception{$file}) {
+                        $usedException{$file} = 1;
+                    } elsif ($exception{"\"$string\""}) {
+                        $usedException{"\"$string\""} = 1;
+                    } elsif ($exception{"$file:\"$string\""}) {
+                        $usedException{"$file:\"$string\""} = 1;
+                    } else {
+                        print "$file:$stringLine:\"$string\" is not marked for localization\n";
+                        $notLocalizedCount++;
+                    }
+                }
+                $string = undef;
+                last if !defined $token;
+            }
+            
+            $previousToken = $token;
+
+            if ($token =~ /^NSLocalized/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedStringFromTableInBundle/) {
+                print "$file:$.:ERROR:found a use of an NSLocalized macro; not supported\n";
+                $nestingLevel = 0 if !defined $nestingLevel;
+                $sawError = 1;
+                $NSLocalizeCount++;
+            } elsif ($token eq "/*") {
+                if (!s-^.*?\*/--) {
+                    $_ = ""; # If the comment doesn't end, discard the result of the line and set flag
+                    $inComment = 1;
+                }
+            } elsif ($token eq "//") {
+                $_ = ""; # Discard the rest of the line
+            } elsif ($token eq "'") {
+                if (!s-([^\\]|\\.)'--) { #' <-- that single quote makes the Project Builder editor less confused
+                    print "$file:$.:ERROR:mismatched single quote\n";
+                    $sawError = 1;
+                    $_ = "";
+                }
+            } else {
+                if ($expected and $expected ne $token) {
+                    print "$file:$.:ERROR:found $token but expected $expected\n";
+                    $sawError = 1;
+                    $expected = "";
+                }
+                if ($token eq "UI_STRING" or $token eq "UI_STRING_KEY" or $token eq "LPCTSTR_UI_STRING" or $token eq "LPCTSTR_UI_STRING_KEY") {
+                    $expected = "(";
+                    $macro = $token;
+                    $UIString = undef;
+                    $key = undef;
+                    $comment = undef;
+                    $macroLine = $.;
+                } elsif ($token eq "(" or $token eq "[") {
+                    ++$nestingLevel if defined $nestingLevel;
+                    $expected = "a quoted string" if $expected;
+                } elsif ($token eq ",") {
+                    $expected = "a quoted string" if $expected;
+                } elsif ($token eq ")" or $token eq "]") {
+                    $nestingLevel = undef if defined $nestingLevel && !--$nestingLevel;
+                    if ($expected) {
+                        $key = $UIString if !defined $key;
+                        HandleUIString($UIString, $key, $comment, $file, $macroLine);
+                        $macro = "";
+                        $expected = "";
+                        $localizedCount++;
+                    }
+                } elsif ($isDebugMacro{$token}) {
+                    $nestingLevel = 0 if !defined $nestingLevel;
+                }
+            }
+        }
+            
+    }
+    
+    goto handleString if defined $string;
+    
+    if ($expected) {
+        print "$file:ERROR:reached end of file but expected $expected\n";
+        $sawError = 1;
+    }
+    
+    close SOURCE;
+}
+
+my %stringByKey;
+my %commentByKey;
+my %fileByKey;
+my %lineByKey;
+
+sub HandleUIString
+{
+    my ($string, $key, $comment, $file, $line) = @_;
+
+    my $bad = 0;
+    if (grep { $_ == 0xFFFD } unpack "U*", $string) {
+        print "$file:$line:ERROR:string for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
+        $bad = 1;
+    }
+    if ($string ne $key && grep { $_ == 0xFFFD } unpack "U*", $key) {
+        print "$file:$line:ERROR:key has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
+        $bad = 1;
+    }
+    if (grep { $_ == 0xFFFD } unpack "U*", $comment) {
+        print "$file:$line:ERROR:comment for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
+        $bad = 1;
+    }
+    if ($bad) {
+        $sawError = 1;
+        return;
+    }
+    
+    if ($stringByKey{$key} && $stringByKey{$key} ne $string) {
+        print "$file:$line:encountered the same key, \"$key\", twice, with different strings\n";
+        print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n";
+        $keyCollisionCount++;
+        return;
+    }
+    if ($commentByKey{$key} && $commentByKey{$key} ne $comment) {
+        print "$file:$line:encountered the same key, \"$key\", twice, with different comments\n";
+        print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n";
+        $keyCollisionCount++;
+        return;
+    }
+
+    $fileByKey{$key} = $file;
+    $lineByKey{$key} = $line;
+    $stringByKey{$key} = $string;
+    $commentByKey{$key} = $comment;
+}
+
+print "\n" if $sawError || $notLocalizedCount || $NSLocalizeCount;
+
+my @unusedExceptions = sort grep { !$usedException{$_} } keys %exception;
+if (@unusedExceptions) {
+    for my $unused (@unusedExceptions) {
+        print "$exceptionsFile:$exception{$unused}:exception $unused not used\n";
+    }
+    print "\n";
+}
+
+print "$localizedCount localizable strings\n" if $localizedCount;
+print "$keyCollisionCount key collisions\n" if $keyCollisionCount;
+print "$notLocalizedCount strings not marked for localization\n" if $notLocalizedCount;
+print "$NSLocalizeCount uses of NSLocalize\n" if $NSLocalizeCount;
+print scalar(@unusedExceptions), " unused exceptions\n" if @unusedExceptions;
+
+if ($sawError) {
+    print "\nErrors encountered. Exiting without writing a $stringsFile file.\n";
+    exit 1;
+}
+
+my $localizedStrings = "";
+
+for my $key (sort keys %commentByKey) {
+    $localizedStrings .= "/* $commentByKey{$key} */\n\"$key\" = \"$stringByKey{$key}\";\n\n";
+}
+
+# Write out the strings file in UTF-16 with a BOM.
+utf8::decode($localizedStrings) if $^V ge chr(5).chr(8);
+my $output = pack "n*", (0xFEFF, unpack "U*", $localizedStrings);
+foreach my $directory (@directories) {
+    open STRINGS, ">", "$directory/$stringsFile" or die;
+    print STRINGS $output;
+    close STRINGS;
+}