Switch convert_to_epl from Perl to Python, adding unicode file support and a "--check" option
--- a/williamr/convert_to_epl.pl Fri Feb 05 14:53:29 2010 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,163 +0,0 @@
-#!/usr/bin/perl
-
-# Copyright (c) 2009 Symbian Foundation Ltd
-# This component and the accompanying materials are made available
-# under the terms of the License "Eclipse Public License v1.0"
-# which accompanies this distribution, and is available
-# at the URL "http://www.eclipse.org/legal/epl-v10.html".
-#
-# Initial Contributors:
-# Symbian Foundation Ltd - initial contribution.
-#
-# Contributors:
-#
-# Description:
-# Map the SFL license to the EPL license, keeping a copy of the original file
-# in a parallel tree
-
-use strict;
-use File::Copy;
-use File::Path;
-
-if (scalar @ARGV != 2)
- {
- print <<'EOF';
-Incorrect number of arguments
-
-Usage: perl convert_to_epl.pl workdir savedir
-
-Recursively processes workdir to examine all of the text files and convert
-all perfectly formed instances of the SFL copyright notice into EPL notices.
-
-If a file is modified, the original is first copied to the corresponding place
-under savedir.
-
-It is safe to rerun this script if it stopped for any reason, as no converted
-SFL notice will ever match on the second run through.
-EOF
- exit 1;
- }
-
-my $work_root = $ARGV[0];
-my $saved_root = $ARGV[1];
-
-$work_root =~ s/\\/\//g; # convert to Unix separators please
-$saved_root =~ s/\\/\//g;
-
-print "* Processing $work_root, leaving the original of any modified file in $saved_root\n";
-
-my $debug = 0;
-
-my @oldtext = (
- 'terms of the License "Symbian Foundation License v1.0"',
- 'the URL "http://www.symbianfoundation.org/legal/sfl-v10.html"'
-);
-my @newtext = (
- 'terms of the License "Eclipse Public License v1.0"',
- 'the URL "http://www.eclipse.org/legal/epl-v10.html"'
-);
-
-my @errorfiles = ();
-my @multinoticefiles = ();
-
-sub map_epl($$$)
- {
- my ($file,$shadowdir,$name) = @_;
-
- open FILE, "<$file" or print "ERROR: Cannot open $file: $!\n" and return "Cannot open";
- my @lines = <FILE>;
- close FILE;
-
- my $updated = 0;
- my @newlines = ();
- while (my $line = shift @lines)
- {
- # under the terms of the License "Symbian Foundation License v1.0"
- # which accompanies this distribution, and is available
- # at the URL "http://www.symbianfoundation.org/legal/sfl-v10.html".
- my $pos1 = index $line, $oldtext[0];
- if ($pos1 >= 0)
- {
- # be careful - oldtext is a prefix of newtext!
- if (index($line, $newtext[0]) >= 0)
- {
- # line already converted - nothing to do
- push @newlines, $line;
- next;
- }
- my $midline = shift @lines;
- my $urlline = shift @lines;
- my $pos2 = index $urlline, $oldtext[1];
- if ($pos2 >= 0)
- {
- # Found it - assume that there's only one instance
- substr $line, $pos1, length($oldtext[0]), $newtext[0];
- substr $urlline, $pos2, length($oldtext[1]), $newtext[1];
- push @newlines, $line, $midline, $urlline;
- $updated += 1;
- next;
- }
- else
- {
- if(!$updated)
- {
- my $lineno = 1 + (scalar @newlines);
- print STDERR "Problem in $file at $lineno: incorrectly formatted >\n$line$midline$urlline\n";
- push @errorfiles, $file;
- }
- last;
- }
- }
- push @newlines, $line;
- }
-
- return if (!$updated);
-
- if ($updated > 1)
- {
- push @multinoticefiles, $file;
- print "! found $updated SFL notices in $file\n";
- }
-
- mkpath($shadowdir, {verbose=>0});
- move($file, "$shadowdir/$name") or die("Cannot move $file to $shadowdir/$name: $!\n");
- open NEWFILE, ">$file" or die("Cannot overwrite $file: $!\n");
- print NEWFILE @newlines, @lines;
- close NEWFILE or die("Failed to update $file: $!\n");
- print "* updated $file\n";
- }
-
-# Process tree
-
-sub scan_directory($$)
- {
- my ($path, $shadow) = @_;
-
- opendir DIR, $path;
- my @files = grep !/^\.\.?$/, readdir DIR;
- closedir DIR;
-
- foreach my $file (@files)
- {
- my $newpath = "$path/$file";
- my $newshadow = "$shadow/$file";
-
- if (-d $newpath)
- {
- scan_directory($newpath, $newshadow);
- next;
- }
- next if (-B $newpath); # ignore binary files
-
- map_epl($newpath, $shadow, $file);
- }
- }
-
-scan_directory($work_root, $saved_root);
-
-printf "%d problem files\n", scalar @errorfiles;
-print "\t", join("\n\t", @errorfiles), "\n";
-
-printf "%d files with multiple notices\n", scalar @multinoticefiles;
-print "\t", join("\n\t", @multinoticefiles), "\n";
-
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/williamr/convert_to_epl.py Tue Feb 09 12:50:02 2010 +0000
@@ -0,0 +1,160 @@
+#!/usr/bin/python
+# Copyright (c) 2009 Symbian Foundation.
+# All rights reserved.
+# This component and the accompanying materials are made available
+# under the terms of the License "Eclipse Public License v1.0"
+# which accompanies this distribution, and is available
+# at the URL "http://www.eclipse.org/legal/epl-v10.html".
+#
+# Initial Contributors:
+# Symbian Foundation - Initial contribution
+#
+# Description:
+# Map the SFL license to the EPL license
+
+import os
+import os.path
+import re
+import codecs
+from optparse import OptionParser
+import sys
+
+oldtext0 = re.compile('terms of the License "Symbian Foundation License v1.0"(to Symbian Foundation)?')
+oldtext1 = re.compile('the URL "http:..www.symbianfoundation.org/legal/sfl-v10.html"')
+
+newtext = [
+ 'terms of the License "Eclipse Public License v1.0"',
+ 'the URL "http://www.eclipse.org/legal/epl-v10.html"'
+]
+
+errorfiles = []
+multinoticefiles = []
+shadowroot = 'shadow_epoc32'
+
+def file_type(file) :
+ f = open(file, 'r')
+ data = f.read(256)
+ f.close()
+ if len(data) < 2:
+ return None # too short to be worth bothering about anyway
+ if data[0] == chr(255) and data[1] == chr(254) :
+ return 'utf_16_le'
+ if data.find(chr(0)) >= 0 :
+ return None # zero byte implies binary file
+ return 'text'
+
+def map_eula(dir, name, encoded) :
+ global oldtext0
+ global newtext1
+ global newtext
+ file = os.path.join(dir, name)
+ if encoded == 'text':
+ f = open(file, 'r')
+ else:
+ f = codecs.open(file, 'r', encoding=encoded)
+ lines = f.readlines()
+ # print ">> %s encoded as %s" % (file, f.encoding)
+ f.close()
+
+ updated = 0
+ newlines = []
+ while len(lines) > 0:
+ line = lines.pop(0)
+ pos1 = oldtext0.search(line)
+ if pos1 != None:
+ # be careful - oldtext is a prefix of newtext
+ if pos1.group(1) != None:
+ # line already converted - nothing to do
+ newlines.append(line)
+ continue
+ midlines = []
+ midlinecount = 1
+ while len(lines) > 0:
+ nextline = lines.pop(0)
+ if not re.match('^\s$', nextline):
+ # non-blank line
+ if midlinecount == 0:
+ break
+ midlinecount -= 1
+ midlines.append(nextline)
+ urlline = nextline
+ pos2 = oldtext1.search(urlline)
+ if pos2 != None:
+ # found it - assume that there's only one instance
+ newline = oldtext0.sub(newtext[0], line)
+ newurl = oldtext1.sub(newtext[1], urlline)
+ newlines.append(newline)
+ newlines.extend(midlines)
+ newlines.append(newurl)
+ updated += 1
+ continue
+ else:
+ if updated != 0:
+ lineno = 1 + len(newlines)
+ print "Problem in " + file + " at " + lineno + ": incorrectly formatted >"
+ print line
+ print midlines
+ print urlline
+ global errorfiles
+ errorfiles.append(file)
+ break
+ newlines.append(line)
+
+ if updated == 0:
+ # print " = no change to " + file
+ return 0
+
+ if updated > 1:
+ global multinoticefiles
+ multinoticefiles.append(file)
+ print '! found %d SFL notices in %s' % (updated, file)
+
+ # global shadowroot
+ # shadowdir = os.path.join(shadowroot, dir)
+ # if not os.path.exists(shadowdir) :
+ # os.makedirs(shadowdir)
+ # newfile = os.path.join(shadowroot,file)
+ # os.rename(file, newfile)
+
+ global options
+ if not options.dryrun:
+ if encoded == 'text':
+ f = open(file, 'w')
+ else:
+ f = codecs.open(file, 'w', encoding=encoded)
+ f.writelines(newlines)
+ f.close()
+ print "* updated %s (encoding %s)" % (file, encoded)
+ return 1
+
+parser = OptionParser(version="%prog 0.2", usage="Usage: %prog [options]")
+parser.add_option("-n", "--check", action="store_true", dest="dryrun",
+ help="report the files which would be updated, but don't change anything")
+parser.set_defaults(dryrun=False)
+
+(options, args) = parser.parse_args()
+if len(args) != 0:
+ parser.error("Unexpected commandline arguments")
+
+# process tree
+
+update_count = 0
+for root, dirs, files in os.walk('.', topdown=True):
+ if '.hg' in dirs:
+ dirs.remove('.hg') # don't recurse into the Mercurial repository storage
+ for name in files:
+ encoding = file_type(os.path.join(root, name))
+ if encoding:
+ update_count += map_eula(root, name, encoding)
+
+print '%d problem files' % len(errorfiles)
+print errorfiles
+
+print '%d files with multiple notices' % len(multinoticefiles)
+print multinoticefiles
+
+if options.dryrun and update_count > 0:
+ print "%d files need updating" % update_count
+ sys.exit(1)
+
+