--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/webengine/osswebengine/WebCore/platform/make-charset-table.pl Mon Mar 30 12:54:55 2009 +0300
@@ -0,0 +1,225 @@
+#!/usr/bin/perl -w
+
+# Copyright (C) 2003, 2004, 2005, 2006 Apple Computer, Inc. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of
+# its contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+use strict;
+
+my %aliasesFromCharsetsFile;
+my %namesWritten;
+
+my $output = "";
+
+my $error = 0;
+
+sub error ($)
+{
+ print STDERR @_, "\n";
+ $error = 1;
+}
+
+sub emit_line
+{
+ my ($name, $prefix, $encoding, $flags) = @_;
+
+ error "$name shows up twice in output" if $namesWritten{$name};
+ $namesWritten{$name} = 1;
+
+ $output .= " { \"$name\", $prefix$encoding },\n";
+}
+
+sub process_platform_encodings
+{
+ my ($filename, $PlatformPrefix) = @_;
+ my $baseFilename = $filename;
+ $baseFilename =~ s|.*/||;
+
+ my %seenPlatformNames;
+ my %seenIANANames;
+
+ open PLATFORM_ENCODINGS, $filename or die;
+
+ while (<PLATFORM_ENCODINGS>) {
+ chomp;
+ s/\#.*$//;
+ s/\s+$//;
+ if (my ($PlatformName, undef, $flags, $IANANames) = /^(.+?)(, (.+))?: (.+)$/) {
+ my %aliases;
+
+ my $PlatformNameWithFlags = $PlatformName;
+ if ($flags) {
+ $PlatformNameWithFlags .= ", " . $flags;
+ } else {
+ $flags = "NoEncodingFlags";
+ }
+ error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformNameWithFlags};
+ $seenPlatformNames{$PlatformNameWithFlags} = 1;
+
+ # Build the aliases list.
+ # Also check that no two names are part of the same entry in the charsets file.
+ my @IANANames = split ", ", $IANANames;
+ my $firstName = "";
+ my $canonicalFirstName = "";
+ my $prevName = "";
+ for my $name (@IANANames) {
+ if ($firstName eq "") {
+ if ($name !~ /^[-A-Za-z0-9_]+$/) {
+ error "$name, in $baseFilename, has illegal characters in it";
+ next;
+ }
+ $firstName = $name;
+ } else {
+ if ($name !~ /^[a-z0-9]+$/) {
+ error "$name, in $baseFilename, has illegal characters in it (must be all lowercase alphanumeric)";
+ next;
+ }
+ if ($name le $prevName) {
+ error "$name comes after $prevName in $baseFilename, but everything must be in alphabetical order";
+ }
+ $prevName = $name;
+ }
+
+ my $canonicalName = lc $name;
+ $canonicalName =~ tr/-_//d;
+
+ $canonicalFirstName = $canonicalName if $canonicalFirstName eq "";
+
+ error "$name is mentioned twice in $baseFilename" if $seenIANANames{$canonicalName};
+ $seenIANANames{$canonicalName} = 1;
+
+ $aliases{$canonicalName} = 1;
+ next if !$aliasesFromCharsetsFile{$canonicalName};
+ for my $alias (@{$aliasesFromCharsetsFile{$canonicalName}}) {
+ $aliases{$alias} = 1;
+ }
+ for my $otherName (@IANANames) {
+ next if $canonicalName eq $otherName;
+ if ($aliasesFromCharsetsFile{$otherName}
+ && $aliasesFromCharsetsFile{$canonicalName} eq $aliasesFromCharsetsFile{$otherName}
+ && $canonicalName le $otherName) {
+ error "$baseFilename lists both $name and $otherName under $PlatformName, but that aliasing is already specified in character-sets.txt";
+ }
+ }
+ }
+
+ # write out
+ emit_line($firstName, $PlatformPrefix, $PlatformName, $flags);
+ for my $alias (sort keys %aliases) {
+ emit_line($alias, $PlatformPrefix, $PlatformName, $flags) if $alias ne $canonicalFirstName;
+ }
+ } elsif (/^([a-zA-Z0-9_]+)(, (.+))?$/) {
+ my $PlatformName = $1;
+
+ error "Platform encoding name $PlatformName is mentioned twice in $baseFilename" if $seenPlatformNames{$PlatformName};
+ $seenPlatformNames{$PlatformName} = 1;
+ } elsif (/./) {
+ error "syntax error in platform-encodings.txt, line $.";
+ }
+ }
+
+ close PLATFORM_ENCODINGS;
+}
+
+sub process_iana_charset
+{
+ my ($canonical_name, @aliases) = @_;
+
+ return if !$canonical_name;
+
+ my @names = sort $canonical_name, @aliases;
+
+ for my $name (@names) {
+ $aliasesFromCharsetsFile{$name} = \@names;
+ }
+}
+
+sub process_iana_charsets
+{
+ my ($filename) = @_;
+
+ open CHARSETS, $filename or die;
+
+ my %seen;
+
+ my $canonical_name;
+ my @aliases;
+
+ my %exceptions = ( isoir91 => 1, isoir92 => 1 );
+
+ while (<CHARSETS>) {
+ chomp;
+ if ((my $new_canonical_name) = /Name: ([^ \t]*).*/) {
+ $new_canonical_name = lc $new_canonical_name;
+ $new_canonical_name =~ tr/a-z0-9//cd;
+
+ error "saw $new_canonical_name twice in character-sets.txt", if $seen{$new_canonical_name};
+ $seen{$new_canonical_name} = $new_canonical_name;
+
+ process_iana_charset $canonical_name, @aliases;
+
+ $canonical_name = $new_canonical_name;
+ @aliases = ();
+ } elsif ((my $new_alias) = /Alias: ([^ \t]*).*/) {
+ $new_alias = lc $new_alias;
+ $new_alias =~ tr/a-z0-9//cd;
+
+ # do this after normalizing the alias, sometimes character-sets.txt
+ # has weird escape characters, e.g. \b after None
+ next if $new_alias eq "none";
+
+ error "saw $new_alias twice in character-sets.txt $seen{$new_alias}, $canonical_name", if $seen{$new_alias} && $seen{$new_alias} ne $canonical_name && !$exceptions{$new_alias};
+ push @aliases, $new_alias if !$seen{$new_alias};
+ $seen{$new_alias} = $canonical_name;
+ }
+ }
+
+ process_iana_charset $canonical_name, @aliases;
+
+ close CHARSETS;
+}
+
+# Program body
+
+process_iana_charsets($ARGV[0]);
+process_platform_encodings($ARGV[1], $ARGV[2]);
+
+exit 1 if $error;
+
+print <<EOF
+// File generated by make-charset-table.pl. Do not edit!
+
+#include "config.h"
+#include "CharsetData.h"
+
+namespace WebCore {
+
+ const CharsetEntry CharsetTable[] = {
+$output
+ { 0, 0 }
+ };
+
+}
+EOF