# HG changeset patch # User Chetan Kapoor # Date 1265805387 0 # Node ID d50cda9d0682aeed2cd373f5f22739388da58404 # Parent 27cf0a88d449338f0df3bab12026d51d27e7aa30# Parent 0df3a90af03089c5c34bd4e1d2635a6bf297bd26 Catchup Merge diff -r 27cf0a88d449 -r d50cda9d0682 downloadkit/downloadkit.py --- a/downloadkit/downloadkit.py Mon Feb 08 16:26:02 2010 +0000 +++ b/downloadkit/downloadkit.py Wed Feb 10 12:36:27 2010 +0000 @@ -22,6 +22,8 @@ import time from BeautifulSoup import BeautifulSoup from optparse import OptionParser +import hashlib +import xml.etree.ElementTree as ET user_agent = 'downloadkit.py script' headers = { 'User-Agent' : user_agent } @@ -162,8 +164,11 @@ def orderResults(x,y) : def ranking(name) : + # 0th = release_metadata + if re.match(r"release_metadata", name): + return 0000; # 1st = release_metadata, build_BOM.zip (both small things!) - if re.match(r"(build_BOM|release_metadata)", name): + if re.match(r"build_BOM", name): return 1000; # 2nd = tools, binaries (required for execution and compilation) elif re.match(r"(binaries_|tools_)", name): @@ -183,48 +188,97 @@ ytitle = y['title'] return cmp(ranking(xtitle)+cmp(xtitle,ytitle), ranking(ytitle)) +def md5_checksum(filename): + MD5_BLOCK_SIZE = 128 * 1024 + md5 = hashlib.md5() + try: + file = open(filename,"rb") + except IOError: + print "Terminating script: Unable to open %S" % filename + sys.exit() + while True: + data = file.read(MD5_BLOCK_SIZE) + if not data: + break + md5.update(data) + file.close() + return md5.hexdigest().upper() + +checksums = {} +def parse_release_metadata(filename): + if os.path.exists(filename): + tree = ET.parse(filename) + iter = tree.getiterator('package') + for element in iter: + if element.keys(): + file = element.get("name") + md5 = element.get("md5checksum") + checksums[file] = md5.upper() + def download_file(filename,url): global options - if options.dryrun : + global checksums + if os.path.exists(filename): + if filename in checksums: + print 'Checking existing ' + filename + file_checksum = md5_checksum(filename) + if file_checksum == checksums[filename]: + if options.progress: + print '- OK ' + filename + return True + + if options.dryrun and not re.match(r"release_metadata", filename): global download_list download_info = "download %s %s" % (filename, url) download_list.append(download_info) return True - + print 'Downloading ' + filename global headers req = urllib2.Request(url, None, headers) + CHUNK = 128 * 1024 + size = 0 + filesize = -1 + start_time = time.time() + last_time = start_time + last_size = size try: response = urllib2.urlopen(req) - CHUNK = 128 * 1024 - size = 0 - filesize = -1 - last_time = time.time() - last_size = size - fp = open(filename, 'wb') - while True: + chunk = response.read(CHUNK) + if chunk.find('
') != -1: + # our urllib2 cookies have gone awol - login again + login(False) + req = urllib2.Request(url, None, headers) + response = urllib2.urlopen(req) chunk = response.read(CHUNK) - if not chunk: break - if size == 0 and chunk.find('
') != -1: - # our urllib2 cookies have gone awol - login again - login(False) - req = urllib2.Request(url, None, headers) - response = urllib2.urlopen(req) - chunk = response.read(CHUNK) - if chunk.find('
') != -1: - # still broken - give up on this one - print "*** ERROR trying to download %s" % (filename) - break; - if size == 0: - info = response.info() - if 'Content-Length' in info: - filesize = int(info['Content-Length']) - else: - print "*** HTTP response did not contain 'Content-Length' when expected" - print info - break + if chunk.find('
') != -1: + # still broken - give up on this one + print "*** ERROR trying to download %s" % (filename) + return False + info = response.info() + if 'Content-Length' in info: + filesize = int(info['Content-Length']) + else: + print "*** HTTP response did not contain 'Content-Length' when expected" + print info + return False + + except urllib2.HTTPError, e: + print "HTTP Error:",e.code , url + return False + except urllib2.URLError, e: + print "URL Error:",e.reason , url + return False + + # we are now up and running, and chunk contains the start of the download + + try: + fp = open(filename, 'wb') + md5 = hashlib.md5() + while True: fp.write(chunk) + md5.update(chunk) size += len(chunk) now = time.time() if options.progress and now-last_time > 20: @@ -240,10 +294,13 @@ print "- %d Kb (%d Kb/s) %s" % (size/1024, (rate/1024)+0.5, estimate) last_time = now last_size = size + chunk = response.read(CHUNK) + if not chunk: break + fp.close() if options.progress: now = time.time() - print "- Completed %s - %d Kb in %d seconds" % (filename, (filesize/1024)+0.5, now-last_time) + print "- Completed %s - %d Kb in %d seconds" % (filename, (filesize/1024)+0.5, now-start_time) #handle errors except urllib2.HTTPError, e: @@ -252,6 +309,12 @@ except urllib2.URLError, e: print "URL Error:",e.reason , url return False + + if filename in checksums: + download_checksum = md5.hexdigest().upper() + if download_checksum != checksums[filename]: + print '- WARNING: %s checksum does not match' % filename + return True def downloadkit(version): @@ -298,7 +361,9 @@ if re.match(r"patch", filename): complete_outstanding_unzips() # ensure that the thing we are patching is completed first - if re.match(r"(bin|tools).*\.zip", filename): + if re.match(r"release_metadata", filename): + parse_release_metadata(filename) # read the md5 checksums etc + elif re.match(r"(bin|tools).*\.zip", filename): schedule_unzip(filename, 1, 0) # unzip once, don't delete elif re.match(r"src_.*\.zip", filename): schedule_unzip(filename, 1, 1) # zip of zips, delete top level @@ -310,7 +375,7 @@ return 1 -parser = OptionParser(version="%prog 0.6.1", usage="Usage: %prog [options] version") +parser = OptionParser(version="%prog 0.7", usage="Usage: %prog [options] version") parser.add_option("-n", "--dryrun", action="store_true", dest="dryrun", help="print the files to be downloaded, the 7z commands, and the recommended deletions") parser.add_option("--nosrc", action="store_true", dest="nosrc", diff -r 27cf0a88d449 -r d50cda9d0682 williamr/convert_to_epl.pl --- a/williamr/convert_to_epl.pl Mon Feb 08 16:26:02 2010 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,163 +0,0 @@ -#!/usr/bin/perl - -# Copyright (c) 2009 Symbian Foundation Ltd -# This component and the accompanying materials are made available -# under the terms of the License "Eclipse Public License v1.0" -# which accompanies this distribution, and is available -# at the URL "http://www.eclipse.org/legal/epl-v10.html". -# -# Initial Contributors: -# Symbian Foundation Ltd - initial contribution. -# -# Contributors: -# -# Description: -# Map the SFL license to the EPL license, keeping a copy of the original file -# in a parallel tree - -use strict; -use File::Copy; -use File::Path; - -if (scalar @ARGV != 2) - { - print <<'EOF'; -Incorrect number of arguments - -Usage: perl convert_to_epl.pl workdir savedir - -Recursively processes workdir to examine all of the text files and convert -all perfectly formed instances of the SFL copyright notice into EPL notices. - -If a file is modified, the original is first copied to the corresponding place -under savedir. - -It is safe to rerun this script if it stopped for any reason, as no converted -SFL notice will ever match on the second run through. -EOF - exit 1; - } - -my $work_root = $ARGV[0]; -my $saved_root = $ARGV[1]; - -$work_root =~ s/\\/\//g; # convert to Unix separators please -$saved_root =~ s/\\/\//g; - -print "* Processing $work_root, leaving the original of any modified file in $saved_root\n"; - -my $debug = 0; - -my @oldtext = ( - 'terms of the License "Symbian Foundation License v1.0"', - 'the URL "http://www.symbianfoundation.org/legal/sfl-v10.html"' -); -my @newtext = ( - 'terms of the License "Eclipse Public License v1.0"', - 'the URL "http://www.eclipse.org/legal/epl-v10.html"' -); - -my @errorfiles = (); -my @multinoticefiles = (); - -sub map_epl($$$) - { - my ($file,$shadowdir,$name) = @_; - - open FILE, "<$file" or print "ERROR: Cannot open $file: $!\n" and return "Cannot open"; - my @lines = ; - close FILE; - - my $updated = 0; - my @newlines = (); - while (my $line = shift @lines) - { - # under the terms of the License "Symbian Foundation License v1.0" - # which accompanies this distribution, and is available - # at the URL "http://www.symbianfoundation.org/legal/sfl-v10.html". - my $pos1 = index $line, $oldtext[0]; - if ($pos1 >= 0) - { - # be careful - oldtext is a prefix of newtext! - if (index($line, $newtext[0]) >= 0) - { - # line already converted - nothing to do - push @newlines, $line; - next; - } - my $midline = shift @lines; - my $urlline = shift @lines; - my $pos2 = index $urlline, $oldtext[1]; - if ($pos2 >= 0) - { - # Found it - assume that there's only one instance - substr $line, $pos1, length($oldtext[0]), $newtext[0]; - substr $urlline, $pos2, length($oldtext[1]), $newtext[1]; - push @newlines, $line, $midline, $urlline; - $updated += 1; - next; - } - else - { - if(!$updated) - { - my $lineno = 1 + (scalar @newlines); - print STDERR "Problem in $file at $lineno: incorrectly formatted >\n$line$midline$urlline\n"; - push @errorfiles, $file; - } - last; - } - } - push @newlines, $line; - } - - return if (!$updated); - - if ($updated > 1) - { - push @multinoticefiles, $file; - print "! found $updated SFL notices in $file\n"; - } - - mkpath($shadowdir, {verbose=>0}); - move($file, "$shadowdir/$name") or die("Cannot move $file to $shadowdir/$name: $!\n"); - open NEWFILE, ">$file" or die("Cannot overwrite $file: $!\n"); - print NEWFILE @newlines, @lines; - close NEWFILE or die("Failed to update $file: $!\n"); - print "* updated $file\n"; - } - -# Process tree - -sub scan_directory($$) - { - my ($path, $shadow) = @_; - - opendir DIR, $path; - my @files = grep !/^\.\.?$/, readdir DIR; - closedir DIR; - - foreach my $file (@files) - { - my $newpath = "$path/$file"; - my $newshadow = "$shadow/$file"; - - if (-d $newpath) - { - scan_directory($newpath, $newshadow); - next; - } - next if (-B $newpath); # ignore binary files - - map_epl($newpath, $shadow, $file); - } - } - -scan_directory($work_root, $saved_root); - -printf "%d problem files\n", scalar @errorfiles; -print "\t", join("\n\t", @errorfiles), "\n"; - -printf "%d files with multiple notices\n", scalar @multinoticefiles; -print "\t", join("\n\t", @multinoticefiles), "\n"; - diff -r 27cf0a88d449 -r d50cda9d0682 williamr/convert_to_epl.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/williamr/convert_to_epl.py Wed Feb 10 12:36:27 2010 +0000 @@ -0,0 +1,160 @@ +#!/usr/bin/python +# Copyright (c) 2009 Symbian Foundation. +# All rights reserved. +# This component and the accompanying materials are made available +# under the terms of the License "Eclipse Public License v1.0" +# which accompanies this distribution, and is available +# at the URL "http://www.eclipse.org/legal/epl-v10.html". +# +# Initial Contributors: +# Symbian Foundation - Initial contribution +# +# Description: +# Map the SFL license to the EPL license + +import os +import os.path +import re +import codecs +from optparse import OptionParser +import sys + +oldtext0 = re.compile('terms of the License "Symbian Foundation License v1.0"(to Symbian Foundation)?') +oldtext1 = re.compile('the URL "http:..www.symbianfoundation.org/legal/sfl-v10.html"') + +newtext = [ + 'terms of the License "Eclipse Public License v1.0"', + 'the URL "http://www.eclipse.org/legal/epl-v10.html"' +] + +errorfiles = [] +multinoticefiles = [] +shadowroot = 'shadow_epoc32' + +def file_type(file) : + f = open(file, 'r') + data = f.read(256) + f.close() + if len(data) < 2: + return None # too short to be worth bothering about anyway + if data[0] == chr(255) and data[1] == chr(254) : + return 'utf_16_le' + if data.find(chr(0)) >= 0 : + return None # zero byte implies binary file + return 'text' + +def map_eula(dir, name, encoded) : + global oldtext0 + global newtext1 + global newtext + file = os.path.join(dir, name) + if encoded == 'text': + f = open(file, 'r') + else: + f = codecs.open(file, 'r', encoding=encoded) + lines = f.readlines() + # print ">> %s encoded as %s" % (file, f.encoding) + f.close() + + updated = 0 + newlines = [] + while len(lines) > 0: + line = lines.pop(0) + pos1 = oldtext0.search(line) + if pos1 != None: + # be careful - oldtext is a prefix of newtext + if pos1.group(1) != None: + # line already converted - nothing to do + newlines.append(line) + continue + midlines = [] + midlinecount = 1 + while len(lines) > 0: + nextline = lines.pop(0) + if not re.match('^\s$', nextline): + # non-blank line + if midlinecount == 0: + break + midlinecount -= 1 + midlines.append(nextline) + urlline = nextline + pos2 = oldtext1.search(urlline) + if pos2 != None: + # found it - assume that there's only one instance + newline = oldtext0.sub(newtext[0], line) + newurl = oldtext1.sub(newtext[1], urlline) + newlines.append(newline) + newlines.extend(midlines) + newlines.append(newurl) + updated += 1 + continue + else: + if updated != 0: + lineno = 1 + len(newlines) + print "Problem in " + file + " at " + lineno + ": incorrectly formatted >" + print line + print midlines + print urlline + global errorfiles + errorfiles.append(file) + break + newlines.append(line) + + if updated == 0: + # print " = no change to " + file + return 0 + + if updated > 1: + global multinoticefiles + multinoticefiles.append(file) + print '! found %d SFL notices in %s' % (updated, file) + + # global shadowroot + # shadowdir = os.path.join(shadowroot, dir) + # if not os.path.exists(shadowdir) : + # os.makedirs(shadowdir) + # newfile = os.path.join(shadowroot,file) + # os.rename(file, newfile) + + global options + if not options.dryrun: + if encoded == 'text': + f = open(file, 'w') + else: + f = codecs.open(file, 'w', encoding=encoded) + f.writelines(newlines) + f.close() + print "* updated %s (encoding %s)" % (file, encoded) + return 1 + +parser = OptionParser(version="%prog 0.2", usage="Usage: %prog [options]") +parser.add_option("-n", "--check", action="store_true", dest="dryrun", + help="report the files which would be updated, but don't change anything") +parser.set_defaults(dryrun=False) + +(options, args) = parser.parse_args() +if len(args) != 0: + parser.error("Unexpected commandline arguments") + +# process tree + +update_count = 0 +for root, dirs, files in os.walk('.', topdown=True): + if '.hg' in dirs: + dirs.remove('.hg') # don't recurse into the Mercurial repository storage + for name in files: + encoding = file_type(os.path.join(root, name)) + if encoding: + update_count += map_eula(root, name, encoding) + +print '%d problem files' % len(errorfiles) +print errorfiles + +print '%d files with multiple notices' % len(multinoticefiles) +print multinoticefiles + +if options.dryrun and update_count > 0: + print "%d files need updating" % update_count + sys.exit(1) + +