Catchup Merge
authorChetan Kapoor <chetank@symbian.org>
Wed, 10 Feb 2010 12:36:27 +0000
changeset 159 d50cda9d0682
parent 158 27cf0a88d449 (current diff)
parent 157 0df3a90af030 (diff)
child 160 20b456b3eebf
Catchup Merge
williamr/convert_to_epl.pl
--- a/downloadkit/downloadkit.py	Mon Feb 08 16:26:02 2010 +0000
+++ b/downloadkit/downloadkit.py	Wed Feb 10 12:36:27 2010 +0000
@@ -22,6 +22,8 @@
 import time
 from BeautifulSoup import BeautifulSoup
 from optparse import OptionParser
+import hashlib
+import xml.etree.ElementTree as ET 
 
 user_agent = 'downloadkit.py script'
 headers = { 'User-Agent' : user_agent }
@@ -162,8 +164,11 @@
 
 def orderResults(x,y) :
 	def ranking(name) :
+		# 0th = release_metadata
+		if re.match(r"release_metadata", name):
+			return 0000;
 		# 1st = release_metadata, build_BOM.zip (both small things!)
-		if re.match(r"(build_BOM|release_metadata)", name):
+		if re.match(r"build_BOM", name):
 			return 1000;
 		# 2nd = tools, binaries (required for execution and compilation)
 		elif re.match(r"(binaries_|tools_)", name):
@@ -183,48 +188,97 @@
 	ytitle = y['title']
 	return cmp(ranking(xtitle)+cmp(xtitle,ytitle), ranking(ytitle))
 
+def md5_checksum(filename):
+	MD5_BLOCK_SIZE = 128 * 1024
+	md5 = hashlib.md5()
+	try:
+		file = open(filename,"rb")
+	except IOError:
+		print "Terminating script: Unable to open %S" % filename
+		sys.exit()
+	while True:
+		data = file.read(MD5_BLOCK_SIZE)
+		if not data:
+			break
+		md5.update(data)
+	file.close()
+	return md5.hexdigest().upper()
+
+checksums = {}
+def parse_release_metadata(filename):
+	if os.path.exists(filename):
+		tree = ET.parse(filename)
+		iter = tree.getiterator('package')
+		for element in iter:
+			if element.keys():
+				file = element.get("name")
+				md5 = element.get("md5checksum")
+				checksums[file] = md5.upper()
+
 def download_file(filename,url):
 	global options
-	if options.dryrun :
+	global checksums
+	if os.path.exists(filename):
+		if filename in checksums:
+			print 'Checking existing ' + filename
+			file_checksum = md5_checksum(filename)
+			if file_checksum == checksums[filename]:
+				if options.progress:
+					print '- OK ' + filename
+				return True
+
+	if options.dryrun and not re.match(r"release_metadata", filename):
 		global download_list
 		download_info = "download %s %s" % (filename, url)
 		download_list.append(download_info)
 		return True
-	
+
 	print 'Downloading ' + filename
 	global headers
 	req = urllib2.Request(url, None, headers)
 	
+	CHUNK = 128 * 1024
+	size = 0
+	filesize = -1
+	start_time = time.time()
+	last_time = start_time
+	last_size = size
 	try:
 		response = urllib2.urlopen(req)
-		CHUNK = 128 * 1024
-		size = 0
-		filesize = -1
-		last_time = time.time()
-		last_size = size
-		fp = open(filename, 'wb')
-		while True:
+		chunk = response.read(CHUNK)
+		if chunk.find('<div id="sign_in_box">') != -1:
+			# our urllib2 cookies have gone awol - login again
+			login(False)
+			req = urllib2.Request(url, None, headers)
+			response = urllib2.urlopen(req)
 			chunk = response.read(CHUNK)
-			if not chunk: break
-			if size == 0 and chunk.find('<div id="sign_in_box">') != -1:
-				# our urllib2 cookies have gone awol - login again
-				login(False)
-				req = urllib2.Request(url, None, headers)
-				response = urllib2.urlopen(req)
-				chunk = response.read(CHUNK)
-				if chunk.find('<div id="sign_in_box">') != -1:
-					# still broken - give up on this one
-					print "*** ERROR trying to download %s" % (filename)
-					break;
-			if size == 0:
-				info = response.info()
-				if 'Content-Length' in info:
-					filesize = int(info['Content-Length'])
-				else:
-					print "*** HTTP response did not contain 'Content-Length' when expected"
-					print info
-					break
+			if chunk.find('<div id="sign_in_box">') != -1:
+				# still broken - give up on this one
+				print "*** ERROR trying to download %s" % (filename)
+				return False
+		info = response.info()
+		if 'Content-Length' in info:
+			filesize = int(info['Content-Length'])
+		else:
+			print "*** HTTP response did not contain 'Content-Length' when expected"
+			print info
+			return False
+
+	except urllib2.HTTPError, e:
+		print "HTTP Error:",e.code , url
+		return False
+	except urllib2.URLError, e:
+		print "URL Error:",e.reason , url
+		return False
+
+	# we are now up and running, and chunk contains the start of the download
+	
+	try:
+		fp = open(filename, 'wb')
+		md5 = hashlib.md5()
+		while True:
 			fp.write(chunk)
+			md5.update(chunk)
 			size += len(chunk)
 			now = time.time()
 			if options.progress and now-last_time > 20:
@@ -240,10 +294,13 @@
 				print "- %d Kb (%d Kb/s) %s" % (size/1024, (rate/1024)+0.5, estimate)
 				last_time = now
 				last_size = size
+			chunk = response.read(CHUNK)
+			if not chunk: break
+
 		fp.close()
 		if options.progress:
 			now = time.time()
-			print "- Completed %s - %d Kb in %d seconds" % (filename, (filesize/1024)+0.5, now-last_time)
+			print "- Completed %s - %d Kb in %d seconds" % (filename, (filesize/1024)+0.5, now-start_time)
 
 	#handle errors
 	except urllib2.HTTPError, e:
@@ -252,6 +309,12 @@
 	except urllib2.URLError, e:
 		print "URL Error:",e.reason , url
 		return False
+
+	if filename in checksums:
+		download_checksum = md5.hexdigest().upper()
+		if download_checksum != checksums[filename]:
+			print '- WARNING: %s checksum does not match' % filename
+
 	return True
 
 def downloadkit(version):	
@@ -298,7 +361,9 @@
 		if re.match(r"patch", filename):
 			complete_outstanding_unzips()	# ensure that the thing we are patching is completed first
 			
-		if re.match(r"(bin|tools).*\.zip", filename):
+		if re.match(r"release_metadata", filename):
+			parse_release_metadata(filename)	# read the md5 checksums etc
+		elif re.match(r"(bin|tools).*\.zip", filename):
 			schedule_unzip(filename, 1, 0)   # unzip once, don't delete
 		elif re.match(r"src_.*\.zip", filename):
 			schedule_unzip(filename, 1, 1)   # zip of zips, delete top level
@@ -310,7 +375,7 @@
 
 	return 1
 
-parser = OptionParser(version="%prog 0.6.1", usage="Usage: %prog [options] version")
+parser = OptionParser(version="%prog 0.7", usage="Usage: %prog [options] version")
 parser.add_option("-n", "--dryrun", action="store_true", dest="dryrun",
 	help="print the files to be downloaded, the 7z commands, and the recommended deletions")
 parser.add_option("--nosrc", action="store_true", dest="nosrc",
--- a/williamr/convert_to_epl.pl	Mon Feb 08 16:26:02 2010 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,163 +0,0 @@
-#!/usr/bin/perl
-
-# Copyright (c) 2009 Symbian Foundation Ltd
-# This component and the accompanying materials are made available
-# under the terms of the License "Eclipse Public License v1.0"
-# which accompanies this distribution, and is available
-# at the URL "http://www.eclipse.org/legal/epl-v10.html".
-#
-# Initial Contributors:
-# Symbian Foundation Ltd - initial contribution.
-# 
-# Contributors:
-#
-# Description:
-# Map the SFL license to the EPL license, keeping a copy of the original file
-# in a parallel tree 
-
-use strict;
-use File::Copy;
-use File::Path;
-
-if (scalar @ARGV != 2)
-  {
-	print <<'EOF';
-Incorrect number of arguments
-
-Usage: perl convert_to_epl.pl workdir savedir
-
-Recursively processes workdir to examine all of the text files and convert
-all perfectly formed instances of the SFL copyright notice into EPL notices.
-
-If a file is modified, the original is first copied to the corresponding place
-under savedir. 
-
-It is safe to rerun this script if it stopped for any reason, as no converted 
-SFL notice will ever match on the second run through.
-EOF
-  exit 1;
-  }
-
-my $work_root = $ARGV[0];
-my $saved_root = $ARGV[1];
-
-$work_root =~ s/\\/\//g;    # convert to Unix separators please
-$saved_root =~ s/\\/\//g;
-
-print "* Processing $work_root, leaving the original of any modified file in $saved_root\n";
-
-my $debug = 0;
-
-my @oldtext = (
-  'terms of the License "Symbian Foundation License v1.0"',
-  'the URL "http://www.symbianfoundation.org/legal/sfl-v10.html"'
-);
-my @newtext = (
-  'terms of the License "Eclipse Public License v1.0"',
-  'the URL "http://www.eclipse.org/legal/epl-v10.html"'
-);
-
-my @errorfiles = ();
-my @multinoticefiles = ();
-
-sub map_epl($$$)
-  {
-  my ($file,$shadowdir,$name) = @_;
-  
-  open FILE, "<$file" or print "ERROR: Cannot open $file: $!\n" and return "Cannot open";
-  my @lines = <FILE>;
-  close FILE;
-  
-  my $updated = 0;
-  my @newlines = ();
-  while (my $line = shift @lines)
-    { 
-    # under the terms of the License "Symbian Foundation License v1.0"
-    # which accompanies this distribution, and is available
-    # at the URL "http://www.symbianfoundation.org/legal/sfl-v10.html".
-    my $pos1 = index $line, $oldtext[0];
-    if ($pos1 >= 0)
-      {
-      # be careful - oldtext is a prefix of newtext!
-      if (index($line, $newtext[0]) >= 0)
-        {
-        # line already converted - nothing to do
-        push @newlines, $line;
-        next;
-        }
-      my $midline = shift @lines;
-      my $urlline = shift @lines;
-      my $pos2 = index $urlline, $oldtext[1];
-      if ($pos2 >= 0)
-        {
-        # Found it - assume that there's only one instance
-        substr $line, $pos1, length($oldtext[0]), $newtext[0];
-        substr $urlline, $pos2, length($oldtext[1]), $newtext[1];
-        push @newlines, $line, $midline, $urlline;
-        $updated += 1;
-        next;
-        }
-      else
-        {
-        if(!$updated)
-          {
-          my $lineno = 1 + (scalar @newlines);
-          print STDERR "Problem in $file at $lineno: incorrectly formatted >\n$line$midline$urlline\n";
-          push @errorfiles, $file;
-          }	
-        last;
-        }
-      }
-    push @newlines, $line;
-    }
-
-  return if (!$updated);
-  
-  if ($updated > 1)
-    {
-    push @multinoticefiles, $file;
-    print "! found $updated SFL notices in $file\n";
-    }
- 
-  mkpath($shadowdir, {verbose=>0});
-  move($file, "$shadowdir/$name") or die("Cannot move $file to $shadowdir/$name: $!\n");
-  open NEWFILE, ">$file" or die("Cannot overwrite $file: $!\n");
-  print NEWFILE @newlines, @lines;
-  close NEWFILE or die("Failed to update $file: $!\n");
-  print "* updated $file\n";
-  }
-
-# Process tree
-
-sub scan_directory($$)
-  {
-  my ($path, $shadow) = @_;
-  
-  opendir DIR, $path;
-  my @files = grep !/^\.\.?$/, readdir DIR;
-  closedir DIR;
-  
-  foreach my $file (@files)
-    {
-    my $newpath = "$path/$file";
-    my $newshadow = "$shadow/$file";
-    
-    if (-d $newpath)
-      {
-      scan_directory($newpath, $newshadow);
-      next;
-      }
-    next if (-B $newpath);  # ignore binary files
-    
-    map_epl($newpath, $shadow, $file);
-    }
-  }
-
-scan_directory($work_root, $saved_root);
-
-printf "%d problem files\n", scalar @errorfiles;
-print "\t", join("\n\t", @errorfiles), "\n";
-
-printf "%d files with multiple notices\n", scalar @multinoticefiles;
-print "\t", join("\n\t", @multinoticefiles), "\n";
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/williamr/convert_to_epl.py	Wed Feb 10 12:36:27 2010 +0000
@@ -0,0 +1,160 @@
+#!/usr/bin/python
+# Copyright (c) 2009 Symbian Foundation.
+# All rights reserved.
+# This component and the accompanying materials are made available
+# under the terms of the License "Eclipse Public License v1.0"
+# which accompanies this distribution, and is available
+# at the URL "http://www.eclipse.org/legal/epl-v10.html".
+#
+# Initial Contributors:
+# Symbian Foundation - Initial contribution
+# 
+# Description:
+# Map the SFL license to the EPL license
+
+import os
+import os.path
+import re
+import codecs
+from optparse import OptionParser
+import sys
+
+oldtext0 = re.compile('terms of the License "Symbian Foundation License v1.0"(to Symbian Foundation)?')
+oldtext1 = re.compile('the URL "http:..www.symbianfoundation.org/legal/sfl-v10.html"')
+
+newtext = [
+  'terms of the License "Eclipse Public License v1.0"',
+  'the URL "http://www.eclipse.org/legal/epl-v10.html"'
+]
+
+errorfiles = []
+multinoticefiles = []
+shadowroot = 'shadow_epoc32'
+
+def file_type(file) :
+	f = open(file, 'r')
+	data = f.read(256)
+	f.close()
+	if len(data) < 2:
+		return None # too short to be worth bothering about anyway
+	if data[0] == chr(255) and data[1] == chr(254) :
+		return 'utf_16_le'
+	if data.find(chr(0)) >= 0 : 
+		return None	# zero byte implies binary file
+	return 'text'
+	
+def map_eula(dir, name, encoded) :
+	global oldtext0
+	global newtext1
+	global newtext
+	file = os.path.join(dir, name)
+	if encoded == 'text':
+		f = open(file, 'r')
+	else:
+		f = codecs.open(file, 'r', encoding=encoded)
+	lines = f.readlines()
+	# print ">> %s encoded as %s" % (file, f.encoding)
+	f.close()
+	
+	updated = 0
+	newlines = []
+	while len(lines) > 0:
+		line = lines.pop(0)
+		pos1 = oldtext0.search(line)
+		if pos1 != None:
+			# be careful - oldtext is a prefix of newtext
+			if pos1.group(1) != None:
+				# line already converted - nothing to do
+				newlines.append(line)
+				continue
+			midlines = []
+			midlinecount = 1
+			while len(lines) > 0:
+				nextline = lines.pop(0)
+				if not re.match('^\s$', nextline):
+					# non-blank line
+					if midlinecount == 0:
+						break
+					midlinecount -= 1
+				midlines.append(nextline)
+			urlline = nextline
+			pos2 = oldtext1.search(urlline)
+			if pos2 != None:
+				# found it - assume that there's only one instance
+				newline = oldtext0.sub(newtext[0], line)
+				newurl  = oldtext1.sub(newtext[1], urlline)
+				newlines.append(newline)
+				newlines.extend(midlines)
+				newlines.append(newurl)
+				updated += 1
+				continue
+			else:
+			  if updated != 0:
+			  	lineno = 1 + len(newlines)
+			  	print "Problem in " + file + " at " + lineno + ": incorrectly formatted >"
+			  	print line
+			  	print midlines
+			  	print urlline
+			  	global errorfiles
+			  	errorfiles.append(file)
+			  break
+		newlines.append(line)
+	
+	if updated == 0:
+		# print " = no change to " + file
+		return 0
+	
+	if updated > 1:
+	  global multinoticefiles
+	  multinoticefiles.append(file)
+	  print '! found %d SFL notices in %s' % (updated, file)
+	
+	# global shadowroot
+	# shadowdir = os.path.join(shadowroot, dir)
+	# if not os.path.exists(shadowdir) :
+	# 	os.makedirs(shadowdir)
+	# newfile = os.path.join(shadowroot,file)
+	# os.rename(file, newfile)
+	
+	global options
+	if not options.dryrun:
+		if encoded == 'text':
+			f = open(file, 'w')
+		else:
+			f = codecs.open(file, 'w', encoding=encoded)
+		f.writelines(newlines)
+		f.close()
+	print "* updated %s (encoding %s)" % (file, encoded)
+	return 1
+
+parser = OptionParser(version="%prog 0.2", usage="Usage: %prog [options]")
+parser.add_option("-n", "--check", action="store_true", dest="dryrun",
+	help="report the files which would be updated, but don't change anything")
+parser.set_defaults(dryrun=False)
+
+(options, args) = parser.parse_args()
+if len(args) != 0:
+	parser.error("Unexpected commandline arguments")
+
+# process tree
+
+update_count = 0
+for root, dirs, files in os.walk('.', topdown=True):
+	if '.hg' in dirs:
+			dirs.remove('.hg') # don't recurse into the Mercurial repository storage
+	for name in files:
+		encoding = file_type(os.path.join(root, name))
+		if encoding:
+			update_count += map_eula(root, name, encoding)
+	
+print '%d problem files' % len(errorfiles)
+print errorfiles
+
+print '%d files with multiple notices' % len(multinoticefiles)
+print multinoticefiles
+
+if options.dryrun and update_count > 0:
+	print "%d files need updating" % update_count
+	sys.exit(1)
+	
+